├── .coveragerc
├── .gitattributes
├── .github
    ├── CONTRIBUTING.md
    ├── PULL_REQUEST_TEMPLATE.md
    ├── dependabot.yml
    └── workflows
    │   └── python.yml
├── .gitignore
├── .readthedocs.yaml
├── CHANGELOG.md
├── CODE_OF_CONDUCT.rst
├── Doxyfile.in
├── MANIFEST.in
├── Makefile
├── README.md
├── benchmarks
    ├── faGen.py
    ├── fqGen.py
    ├── fqToFaConvert.py
    ├── mysql
    │   ├── create.py
    │   ├── mdbConstants.py
    │   ├── mydb.py
    │   ├── mysqlCreateTimeit.py
    │   ├── mysqlTimeit.py
    │   └── mysql_login.txt
    ├── pgres
    │   ├── create.py
    │   ├── drop.py
    │   ├── pdbConstants.py
    │   ├── pgdb.py
    │   ├── pgresCreateTimeit.py
    │   ├── pgresTimeit.py
    │   └── pgres_login.txt
    ├── screedCreateTimeit.py
    ├── screedTimeit.py
    └── screedTimeit1M.py
├── bigtests
    └── __init__.py
├── doc
    ├── CODE_OF_CONDUCT.rst
    ├── LICENSE.rst
    ├── Makefile
    ├── _static
    │   ├── labibi.css
    │   └── labibi.js
    ├── conf.py
    ├── dev
    │   ├── coding-guidelines-and-review.rst
    │   ├── index.rst
    │   ├── parsers.rst
    │   └── release-checklist.rst
    ├── example.rst
    ├── index.rst
    ├── release-notes
    │   ├── RELEASE-0.5.rst
    │   ├── RELEASE-0.8.md
    │   ├── RELEASE-0.8.rst
    │   ├── RELEASE-0.9.md
    │   ├── RELEASE-0.9.rst
    │   └── index.rst
    ├── run-doctests.py
    ├── screed.rst
    └── user
    │   └── known-issues.rst
├── legacy
    ├── ChangeLog
    └── jenkins-build.sh
├── pyproject.toml
├── pytest.ini
├── screed
    ├── DBConstants.py
    ├── __init__.py
    ├── __main__.py
    ├── conversion.py
    ├── createscreed.py
    ├── dna.py
    ├── dump_fasta.py
    ├── dump_fastq.py
    ├── fasta.py
    ├── fastq.py
    ├── hava.py
    ├── openscreed.py
    ├── pygr_api.py
    ├── screedRecord.py
    ├── seqparse.py
    ├── tests
    │   ├── __init__.py
    │   ├── havaGen.py
    │   ├── screed_tst_utils.py
    │   ├── test-data
    │   │   ├── empty.fa
    │   │   ├── test-whitespace.fa
    │   │   ├── test.fa
    │   │   ├── test.fa.bz2
    │   │   ├── test.fa.gz
    │   │   ├── test.fa.zip
    │   │   ├── test.fastq
    │   │   ├── test.fastq.bz2
    │   │   ├── test.fastq.gz
    │   │   └── test.hava
    │   ├── test_attriberror.py
    │   ├── test_convert.py
    │   ├── test_db.py
    │   ├── test_dictionary.py
    │   ├── test_dna.py
    │   ├── test_fasta.py
    │   ├── test_fasta_recover.py
    │   ├── test_fastq.py
    │   ├── test_fastq_recover.py
    │   ├── test_hava_methods.py
    │   ├── test_open.py
    │   ├── test_open_cm.py
    │   ├── test_pygr_api.py
    │   ├── test_record.py
    │   ├── test_shell.py
    │   └── test_streaming.py
    └── utils.py
├── setup.cfg
├── setup.py
└── tox.ini


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit = screed/tests/*
3 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | screed/_version.py export-subst
2 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | See [our development docs](https://screed.readthedocs.io/en/latest/dev/).
2 | 
3 | Be sure to copy and paste the [checklist](https://screed.readthedocs.io/en/latest/dev/coding-guidelines-and-review.html#checklist) in the Pull-Request comment
4 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dib-lab/screed/f290cadb01112510a7aa8f3942238ac617b6d4da/.github/PULL_REQUEST_TEMPLATE.md


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 | - package-ecosystem: pip
 4 |   directory: "/"
 5 |   schedule:
 6 |     interval: daily
 7 |   open-pull-requests-limit: 10
 8 | - package-ecosystem: "github-actions"
 9 |   directory: "/"
10 |   schedule:
11 |     interval: daily
12 | 


--------------------------------------------------------------------------------
/.github/workflows/python.yml:
--------------------------------------------------------------------------------
 1 | name: Python tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [latest]
 6 |   pull_request:
 7 |     branches: [latest]
 8 |   schedule:
 9 |     - cron: "0 0 7 * *" # monthly
10 | 
11 | jobs:
12 |   test:
13 |     runs-on: ${{ matrix.os }}
14 |     strategy:
15 |       matrix:
16 |         os: [ubuntu-latest, macos-latest]
17 |         py: ["3.10", 3.9, 3.8]
18 |       fail-fast: false
19 | 
20 |     steps:
21 |       - uses: actions/checkout@v4
22 |         with:
23 |           fetch-depth: 0
24 | 
25 |       - name: Set up Python ${{ matrix.py }}
26 |         uses: actions/setup-python@v5
27 |         with:
28 |           python-version: ${{ matrix.py }}
29 | 
30 |       - name: Get pip cache dir
31 |         id: pip-cache
32 |         run: |
33 |           echo "::set-output name=dir::$(pip cache dir)"
34 | 
35 |       - name: pip cache
36 |         uses: actions/cache@v4
37 |         with:
38 |           path: ${{ steps.pip-cache.outputs.dir }}
39 |           key: ${{ runner.os }}-pip-v2-${{ hashFiles('**/setup.py') }}
40 |           restore-keys: |
41 |             ${{ runner.os }}-pip-v2
42 | 
43 |       - name: Install dependencies
44 |         run: |
45 |           python -m pip install --upgrade pip
46 |           pip install tox tox-gh-actions
47 | 
48 |       - name: tox cache
49 |         uses: actions/cache@v4
50 |         with:
51 |           path: .tox/
52 |           key: ${{ runner.os }}-tox-v2-${{ hashFiles('**/setup.py') }}
53 |           restore-keys: |
54 |             ${{ runner.os }}-tox-v2
55 | 
56 |       - name: Test with tox
57 |         run: tox
58 |         env:
59 |           PYTHONDEVMODE: 1
60 | 
61 |       - name: Upload Python coverage to codecov
62 |         uses: codecov/codecov-action@v3
63 |         with:
64 |             flags: python
65 |             fail_ci_if_error: true
66 |             files: coverage.xml
67 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *~
 3 | *_screed
 4 | *.fa
 5 | *.fastq
 6 | build
 7 | screed.egg-info
 8 | dist
 9 | screed/tests/fa_to_fq
10 | screed/tests/fq_to_fa
11 | Doxyfile
12 | .coverage
13 | coverage.xml
14 | diff-cover.html
15 | doc/doxygen/
16 | env/
17 | htmlcov/
18 | nosetests.xml
19 | pylint_report.txt
20 | .*.swp
21 | MANIFEST
22 | doc/_build/*
23 | screed/version.py
24 | .eggs
25 | .tox
26 | .cache
27 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | # Set the version of Python and other tools you might need
 4 | build:
 5 |   os: ubuntu-22.04
 6 |   tools:
 7 |     python: "3.10"
 8 | 
 9 | # Build documentation in the docs/ directory with Sphinx
10 | sphinx:
11 |    configuration: doc/conf.py
12 | 
13 | python:
14 |   install:
15 |     - method: pip
16 |       path: .
17 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Change Log
 2 | All notable changes to the screed project will be documented in this file.
 3 | See [keepachangelog](http://keepachangelog.com/) for more info.
 4 | 
 5 | The screed Python and command-line APIs adhere to
 6 | [Semantic Versioning](http://semver.org/).
 7 | 
 8 | ## [Unreleased]
 9 | ### Added
10 | - screed CLI, with database creation and conversion commands.
11 | - screed.make_db, a simplified way of creating DB using the Python API.
12 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.rst:
--------------------------------------------------------------------------------
 1 | Contributor Code of Conduct
 2 | ===========================
 3 | 
 4 | As contributors and maintainers of this project, we pledge to respect
 5 | all people who contribute through reporting issues, posting feature
 6 | requests, updating documentation, submitting pull requests or patches,
 7 | and other activities.
 8 | 
 9 | We are committed to making participation in this project a
10 | harassment-free experience for everyone, regardless of level of
11 | experience, gender, gender identity and expression, sexual orientation,
12 | disability, personal appearance, body size, race, age, or religion.
13 | 
14 | Examples of unacceptable behavior by participants include the use of
15 | sexual language or imagery, derogatory comments or personal attacks,
16 | trolling, public or private harassment, insults, or other unprofessional
17 | conduct.
18 | 
19 | Project maintainers have the right and responsibility to remove, edit,
20 | or reject comments, commits, code, wiki edits, issues, and other
21 | contributions that are not aligned to this Code of Conduct. Project
22 | maintainers or contributors who do not follow the Code of Conduct may be
23 | removed from the project team.
24 | 
25 | Instances of abusive, harassing, or otherwise unacceptable behavior may
26 | be reported by emailing `khmer-project@idyll.org
27 | <mailto:khmer-project@idyll.org>`__ which only goes to C. Titus Brown and
28 | Michael R. Crusoe. To report an issue involving either of them please email
29 | `Judi Brown Clarke, Ph.D. <mailto:jbc@egr.msu.edu>`__ the Diversity Director
30 | at the BEACON Center for the Study of Evolution in Action, an NSF Center for
31 | Science and Technology.
32 | 
33 | This Code of Conduct is adapted from the `Contributor
34 | Covenant <http://contributor-covenant.org>`__, version 1.0.0, available at
35 | http://contributor-covenant.org/version/1/0/0/
36 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include ChangeLog Makefile README.md
2 | include MANIFEST.in Doxyfile.in
3 | include TODO doc/LICENSE.rst
4 | include screed/version.py
5 | graft screed/tests/test-data
6 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # make pep8 to check for basic Python code compliance
 2 | # make pylint to check Python code for enhanced compliance including naming
 3 | #  and documentation
 4 | # make coverage-report to check coverage of the python scripts by the tests
 5 | 
 6 | PYSOURCES=$(wildcard screed/*.py)
 7 | TESTSOURCES=$(wildcard screed/tests/*.py)
 8 | SOURCES=$(PYSOURCES) setup.py
 9 | 
10 | VERSION=$(shell git describe --tags --dirty | sed s/v//)
11 | all:
12 | 	./setup.py build
13 | 
14 | install: FORCE
15 | 	./setup.py build install
16 | 
17 | install-dependencies: FORCE
18 | 	pip install -e .[all]
19 | 
20 | develop: FORCE
21 | 	./setup.py develop
22 | 
23 | dist: dist/screed-$(VERSION).tar.gz
24 | 
25 | dist/screed-$(VERSION).tar.gz: $(SOURCES)
26 | 	./setup.py sdist
27 | 
28 | clean: FORCE
29 | 	./setup.py clean --all || true
30 | 	rm -rf build/
31 | 	rm -rf coverage-debug .coverage coverage.xml
32 | 	rm -rf doc/_build
33 | 	rm -rf .eggs/ *.egg-info/ .cache/ __pycache__/ *.pyc */*.pyc */*/*.pyc
34 | 
35 | pep8: $(PYSOURCES) $(TESTSOURCES)
36 | 	pycodestyle --exclude=_version.py setup.py screed/
37 | 
38 | pylint: FORCE
39 | 	pylint --msg-template="{path}:{line}: [{msg_id}({symbol}), {obj}] {msg}" \
40 | 		setup.py screed || true
41 | 
42 | doc: FORCE
43 | 	cd doc && make html
44 | 
45 | test: FORCE
46 | 	pytest
47 | 
48 | FORCE:
49 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # screed -- short read sequence utils in Python.
 2 | 
 3 | [![Documentation](https://readthedocs.org/projects/screed/badge/?version=latest)](http://screed.readthedocs.io/en/latest/)
 4 | <a href="https://pypi.org/project/screed/"><img alt="PyPI" src="https://badge.fury.io/py/screed.svg"></a>
 5 | <a href="https://github.com/dib-lab/screed/blob/latest/doc/LICENSE.rst"><img alt="License: 3-Clause BSD" src="https://img.shields.io/badge/License-BSD%203--Clause-blue.svg"></a>
 6 | ![Python tests](https://github.com/dib-lab/screed/workflows/Python%20tests/badge.svg)
 7 | [![Debian Stable Badge](https://badges.debian.net/badges/debian/stable/python3-screed/version.svg)](https://packages.debian.org/stable/python3-screed)
 8 | [![Debian Testing Badge](https://badges.debian.net/badges/debian/testing/python3-screed/version.svg)](https://packages.debian.org/testing/python3-screed)
 9 | 
10 | The official repository for screed is:
11 | 
12 |    https://github.com/dib-lab/screed
13 | 
14 | See http://readthedocs.org/docs/screed/en/latest/ for docs.
15 | 
16 | Issues are tracked at https://github.com/dib-lab/khmer/issues.
17 | 


--------------------------------------------------------------------------------
/benchmarks/faGen.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) 2016, The Regents of the University of California.
 3 | 
 4 | import sys, os
 5 | import random
 6 | 
 7 | seqLength = (8000, 12000)
 8 | 
 9 | class collectionOFiles(object):
10 |     def __init__(self, baseName, divisions, totalSize):
11 |         self.baseName = baseName
12 |         self.divisions = divisions
13 |         self.totalSize = totalSize
14 | 
15 |         self.fileHandles = {}
16 |         for i in range(0, divisions):
17 |             filename = self.baseName + "_%d" % i
18 |             fh = open(filename, "wb")
19 |             divisor = 2 ** i
20 | 
21 |             self.fileHandles[filename]= (fh, self.totalSize/divisor, 0)
22 | 
23 |     def writeRecord(self, name, description, sequence):
24 |         toRemove = []
25 |         for filename in self.fileHandles:
26 |             file, limit, count = self.fileHandles[filename]
27 |             file.write("%s %s\n%s\n" % (name, description, sequence))
28 |             count += 1
29 |             if count >= limit:
30 |                 file.close()
31 |                 toRemove.append(filename)
32 |             else:
33 |                 self.fileHandles[filename] = (file, limit, count)
34 | 
35 |         for fh in toRemove:
36 |             self.fileHandles.pop(fh)
37 | 
38 |     def finished(self):
39 |         return len(self.fileHandles) == 0
40 | 
41 | def genSeq(min, max):
42 |     """
43 |     Generates a sequence with min <= length <= max
44 |     """
45 |     choices = ['A','T','C','G']
46 |     result = []
47 |     length = random.randrange(min, max)
48 |     for i in range(0, length):
49 |         result.append(random.choice(choices))
50 |         if i % 80 == 0:
51 |             result.append('\n')
52 |     return "".join(result)
53 | 
54 | def createFastaFiles(filename, size, divisions):
55 |     cof = collectionOFiles(filename, divisions, size)
56 |     counter = 0
57 |     description="cdna:Genscan chromosome:PPYG2:6_qbl_hap2_random:95622:98297:1"
58 |     while(not cof.finished()):
59 |         name = ">GENSCAN00%d" % counter
60 |         sequence = genSeq(seqLength[0], seqLength[1])
61 |         cof.writeRecord(name, description, sequence)
62 |         counter += 1
63 |     return
64 | 
65 | if __name__ == '__main__':
66 |     if len(sys.argv) != 4:
67 |         print "Usage: <filename> <size> <divisions>"
68 |         exit(1)
69 | 
70 |     filename = sys.argv[1]
71 |     size = int(sys.argv[2])
72 |     divisions = int(sys.argv[3])
73 | 
74 |     createFastaFiles(filename, size, divisions)
75 | 


--------------------------------------------------------------------------------
/benchmarks/fqGen.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) 2016, The Regents of the University of California.
 3 | 
 4 | import sys, os
 5 | import random
 6 | 
 7 | seqLength = 37
 8 | 
 9 | class collectionOFiles(object):
10 |     def __init__(self, baseName, divisions, totalSize):
11 |         self.baseName = baseName
12 |         self.divisions = divisions
13 |         self.totalSize = totalSize
14 | 
15 |         self.fileHandles = {}
16 |         for i in range(0, divisions):
17 |             filename = self.baseName + "_%d" % i
18 |             fh = open(filename, "wb")
19 |             divisor = 2 ** i
20 | 
21 |             self.fileHandles[filename]= (fh, self.totalSize/divisor, 0)
22 | 
23 |     def writeRecord(self, name, sequence, quality):
24 |         toRemove = []
25 |         for filename in self.fileHandles:
26 |             file, limit, count = self.fileHandles[filename]
27 |             file.write("%s\n%s\n+\n%s\n" % (name, sequence, quality))
28 |             count += 1
29 |             if count >= limit:
30 |                 file.close()
31 |                 toRemove.append(filename)
32 |             else:
33 |                 self.fileHandles[filename] = (file, limit, count)
34 | 
35 |         for fh in toRemove:
36 |             self.fileHandles.pop(fh)
37 | 
38 |     def finished(self):
39 |         return len(self.fileHandles) == 0
40 | 
41 | 
42 | def genSeq(length):
43 |     """
44 |     Generates a sequence with length characters
45 |     """
46 |     choices = ['A','T','C','G']
47 |     result = []
48 |     for i in range(0, length):
49 |         result.append(random.choice(choices))
50 |     return "".join(result)
51 | 
52 | def genAcc(length):
53 |     """
54 |     Generates a quality with length characters
55 |     """
56 |     choices = ['A','1','7','3','.',';','*','<']
57 |     result = []
58 |     for i in range(0, length):
59 |         result.append(random.choice(choices))
60 |     return "".join(result)
61 | 
62 | def createFastqFiles(filename, size, divisions):
63 |     cof = collectionOFiles(filename, divisions, size)
64 |     counter = 0
65 |     while(not cof.finished()):
66 |         name = "@HWI-EAS_4_PE-F%d" % counter
67 |         sequence = genSeq(seqLength)
68 |         quality = genAcc(seqLength)
69 |         cof.writeRecord(name, sequence, quality)
70 |         counter += 1
71 |     return
72 | 
73 | if __name__ == '__main__':
74 |     if len(sys.argv) != 4:
75 |         print "Usage: <filename> <size> <divisions>"
76 |         exit(1)
77 | 
78 |     filename = sys.argv[1]
79 |     size = int(sys.argv[2])
80 |     divisions = int(sys.argv[3])
81 | 
82 |     createFastqFiles(filename, size, divisions)
83 | 


--------------------------------------------------------------------------------
/benchmarks/fqToFaConvert.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) 2016, The Regents of the University of California.
 3 | import sys
 4 | import os
 5 | 
 6 | class fastaModel(object):
 7 |     """
 8 |     Contains methods for writing data to a file in the fasta format
 9 |     """
10 |     def __init__(self, fileHandle):
11 |         self.fileHandle = fileHandle
12 |         self.currSeq = ""
13 | 
14 |     def writeName(self, name):
15 |         """
16 |         Writes the given name to the fileHandle in the fasta format
17 |         """
18 |         self.fileHandle.write(">%s " % name.strip())
19 | 
20 |     def writeDescription(self, description):
21 |         """
22 |         Writes the given description and the stored sequence to the file
23 |         """
24 |         self.fileHandle.write("%s\n%s\n" % (description.strip(), self.currSeq))
25 | 
26 |     def writeSequence(self, sequence):
27 |         """
28 |         Stores the given sequence until a call to writeDescription is made
29 |         so that the description and sequence will be stored in the correct
30 |         fasta order
31 |         """
32 |         self.currSeq = sequence.strip()
33 | 
34 | def convertFastqToFasta(inputFilename, outputFilename):
35 |     """
36 |     Converts the given fastq file (inputFilename) to an equilivalent fasta file
37 |     (outputFilename). The fastq's quality information is converted to a fasta's
38 |     'description' field. Sequence and name fields are left alone
39 |     """
40 | 
41 |     inputFile = open(inputFilename, "rb")
42 |     outputFile = open(outputFilename, "wb")
43 | 
44 |     model = fastaModel(outputFile)
45 | 
46 |     for line in inputFile:
47 |         if line.startswith("@"): # Line is a name
48 |             model.writeName(line[1:])
49 |         elif line.startswith('+'): # Next line is the quality
50 |             quality = inputFile.next()
51 |             model.writeDescription(quality)
52 |         else: # Line is the sequence
53 |             model.writeSequence(line)
54 | 
55 |     outputFile.close()
56 | 
57 | if __name__ == '__main__':
58 |     if len(sys.argv) != 3:
59 |         print "Usage: <input filename> <output filename>"
60 |         exit(1)
61 | 
62 |     inputFilename = sys.argv[1]
63 |     outputFilename = sys.argv[2]
64 | 
65 |     if not os.path.isfile(inputFilename):
66 |         print "Error: %s doesn't exist" % inputFilename
67 |         exit(2)
68 | 
69 |     convertFastqToFasta(inputFilename, outputFilename)
70 | 


--------------------------------------------------------------------------------
/benchmarks/mysql/create.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import MySQLdb
 3 | import mdbConstants
 4 | 
 5 | def create_db(fields, rcrditer):
 6 |     """
 7 |     Populates the mysql database with records from the record iter
 8 |     """
 9 |     conn = MySQLdb.connect(db=mdbConstants._DBNAME, user=mdbConstants._USER)
10 | 
11 |     cur = conn.cursor()
12 | 
13 |     # Create the admin table
14 |     cur.execute('CREATE TABLE %s (ID int NOT NULL auto_increment, '\
15 |                'FIELDNAME TEXT, PRIMARY KEY(ID))' % mdbConstants._SCREEDADMIN)
16 | 
17 |     for attribute in fields:
18 |         cur.execute("INSERT INTO %s (FIELDNAME) VALUES ('%s')" % \
19 |             (mdbConstants._SCREEDADMIN, attribute))
20 | 
21 |     # Setup the dictionary table creation field substring
22 |     otherFields = fields[1:]
23 |     createsub = ['%s TEXT' % field for field in otherFields]
24 |     createsub.insert(0, '%s VARCHAR(100)' % fields[0])
25 |     createsub = ','.join(createsub)
26 | 
27 |     # Create the dictionary table
28 |     cur.execute('CREATE TABLE %s (%s int NOT NULL auto_increment, %s, PRIMARY KEY(%s))' %
29 |                 (mdbConstants._DICT_TABLE, mdbConstants._PRIMARY_KEY,
30 |                  createsub,
31 |                  mdbConstants._PRIMARY_KEY))
32 | 
33 |     # Attribute to index
34 |     queryby = fields[0]
35 | 
36 |     # Make the index on the 'queryby' attribute
37 |     cur.execute('CREATE UNIQUE INDEX %sidx ON %s(%s)' %
38 |                 (queryby, mdbConstants._DICT_TABLE, queryby))
39 | 
40 |     # Setup the 'perc' pgres substring
41 |     perc = ', '.join(['%s' for i in range(len(fields))])
42 | 
43 |     # Setup the sql substring for inserting data into db
44 |     fieldsub = ','.join(fields)
45 | 
46 |     # Pull data from rcrditer and store in database
47 |     for record in rcrditer:
48 |         data = tuple([record[key] for key in fields])
49 |         cur.execute('INSERT INTO %s (%s) VALUES (%s)' %\
50 |                     (mdbConstants._DICT_TABLE, fieldsub, perc),
51 |                     data)
52 | 
53 |     conn.commit()
54 |     cur.close()
55 |     conn.close()
56 | 
57 | def droptables():
58 |     """
59 |     Drops tables in db 
60 |     """
61 |     conn = MySQLdb.connect(db=mdbConstants._DBNAME, user=mdbConstants._USER)
62 | 
63 |     cur = conn.cursor()
64 | 
65 |     try:
66 |         cur.execute('DROP TABLE %s;' % mdbConstants._DICT_TABLE)
67 |     except:
68 |         pass
69 |     try:
70 |         cur.execute('DROP TABLE %s;' % mdbConstants._SCREEDADMIN)
71 |     except:
72 |         pass
73 | 
74 |     conn.commit()
75 |     cur.close()
76 |     conn.close()
77 | 


--------------------------------------------------------------------------------
/benchmarks/mysql/mdbConstants.py:
--------------------------------------------------------------------------------
1 | _SCREEDADMIN = 'SCREEDADMIN'
2 | _DICT_TABLE = 'DICTIONARY_TABLE'
3 | _PRIMARY_KEY = 'id'
4 | _DBNAME = 'sdb'
5 | _USER = 'alex'
6 | 


--------------------------------------------------------------------------------
/benchmarks/mysql/mydb.py:
--------------------------------------------------------------------------------
 1 | import mdbConstants
 2 | import MySQLdb
 3 | import UserDict
 4 | import types
 5 | 
 6 | class _mdb_record_dict(UserDict.DictMixin):
 7 |     """
 8 |     Simple dict-like record interface with bag behavior.
 9 |     """
10 |     def __init__(self, *args, **kwargs):
11 |         self.d = dict(*args, **kwargs)
12 |         
13 |     def __getitem__(self, name):
14 |         return self.d[name]
15 | 
16 |     def __setitem__(self, name, value):
17 |         self.d[name] = value
18 |     
19 |     def __getattr__(self, name):
20 |         try:
21 |             return self.d[name]
22 |         except KeyError:
23 |             raise AttributeError, name
24 | 
25 |     def keys(self):
26 |         return self.d.keys()
27 | 
28 | class mydb(object):
29 |     def __init__(self):
30 |         self._conn = MySQLdb.connect(db=mdbConstants._DBNAME, user=mdbConstants._USER)
31 |                                        
32 |         cur = self._conn.cursor()
33 |         cur.execute('SELECT id, fieldname FROM %s' % mdbConstants._SCREEDADMIN)
34 |         self._adm = dict(cur.fetchall())
35 |         keys = self._adm.keys()
36 |         keys.sort()
37 | 
38 |         self._fields = self._adm.values()
39 |         self._fields.insert(0, mdbConstants._PRIMARY_KEY.lower())
40 |         self._fieldStr = ",".join(self._fields)
41 | 
42 |         self._queryBy = self._adm[keys[0]]
43 | 
44 |     def close(self):
45 |         """
46 |         Closes the database handles
47 |         """
48 |         self._conn.close()
49 | 
50 |     def loadRecordByIndex(self, idx):
51 |         """
52 |         Loads a record from the database by index
53 |         """
54 |     
55 |     def loadRecordByName(self, key):
56 |         """
57 |         As above, by name
58 |         """
59 |         cursor = self._conn.cursor()
60 |         query = "SELECT %s FROM %s WHERE %s='%s'" % (self._queryBy,
61 |                                                      mdbConstants._DICT_TABLE,
62 |                                                      self._queryBy,
63 |                                                      key)
64 |         cursor.execute(query)
65 |         if type(cursor.fetchone()) == types.NoneType:
66 |             raise KeyError("Key %s not found" % key)
67 | 
68 |         query = "SELECT %s FROM %s WHERE %s='%s'" % (self._fieldStr,
69 |                                                      mdbConstants._DICT_TABLE,
70 |                                                      self._queryBy,
71 |                                                      key)
72 |         cursor.execute(query)
73 |         return _mdb_record_dict(zip(self._fields, cursor.fetchone()))
74 | 
75 |     def keys(self):
76 |         """
77 |         Returns a list of keys in database
78 |         """
79 |         cursor = self._conn.cursor()
80 |         query = "SELECT %s FROM %s" % (self._queryBy,
81 |                                        mdbConstants._DICT_TABLE)
82 |         cursor.execute(query)
83 |         return [elem for elem, in cursor]
84 | 


--------------------------------------------------------------------------------
/benchmarks/mysql/mysqlCreateTimeit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import timeit
 5 | 
 6 | if __name__ == '__main__':
 7 |     if len(sys.argv) != 3:
 8 |         print "Usage: %s <filename> <fa/fq>" % sys.argv[0]
 9 |         exit(1)
10 | 
11 |     filename = sys.argv[1]
12 |     fafq = sys.argv[2]
13 | 
14 |     fqrunStatement = """
15 | create.create_db(FASTQFIELDTYPES, iterfunc)
16 | theFile.close()
17 | """
18 | 
19 |     fqsetupStatement = """
20 | import os, sys
21 | import create
22 | thisdir = sys.path[0]
23 | libdir = os.path.abspath(os.path.join(thisdir, '..', '..', 'screed'))
24 | sys.path.insert(0, libdir)
25 | from fastq import fqiter
26 | create.droptables()
27 | FASTQFIELDTYPES = ('name', 'annotations', 'sequence', 'quality')
28 | theFile = open('%s', 'rb')
29 | iterfunc = fqiter(theFile)
30 | """ % filename
31 | 
32 |     farunStatement = """
33 | create.create_db(FASTAFIELDTYPES, iterfunc)
34 | theFile.close()
35 | """
36 | 
37 |     fasetupStatement = """
38 | import os, sys
39 | import create
40 | thisdir = sys.path[0]
41 | libdir = os.path.abspath(os.path.join(thisdir, '..', '..', 'screed'))
42 | sys.path.insert(0, libdir)
43 | from fasta import faiter
44 | create.droptables()
45 | FASTAFIELDTYPES = ('name', 'description', 'sequence')
46 | theFile = open('%s', 'rb')
47 | iterfunc = faiter(theFile)
48 | """ % filename
49 | 
50 |     t = None
51 |     if fafq == 'fasta':
52 |         t = timeit.Timer(farunStatement, fasetupStatement)
53 |     elif fafq == 'fastq':
54 |         t = timeit.Timer(fqrunStatement, fqsetupStatement)
55 |     else:
56 |         raise ValueError("Invalid db type specified: %s" % fafq)
57 | 
58 |     print "[MYSQL CREATE]%s:" % filename
59 |     print t.repeat(2, 1)
60 | 


--------------------------------------------------------------------------------
/benchmarks/mysql/mysqlTimeit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import timeit
 4 | import sys
 5 | 
 6 | if __name__ == '__main__':
 7 |     runStatement = """
 8 | for i in xrange(0, 100000):
 9 |     entry = db.loadRecordByName(random.choice(keys))
10 | """
11 | 
12 |     setupStatement = """
13 | import os, sys
14 | import random
15 | import mydb
16 | db = mydb.mydb()
17 | keys = db.keys()
18 | """
19 | 
20 |     t = timeit.Timer(runStatement, setupStatement)
21 | 
22 |     print "[MYSQL TIMEIT]"
23 |     print t.repeat(2, 1)
24 | 


--------------------------------------------------------------------------------
/benchmarks/mysql/mysql_login.txt:
--------------------------------------------------------------------------------
1 | dbname: sdb
2 | user: alex
3 | 


--------------------------------------------------------------------------------
/benchmarks/pgres/create.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import psycopg2
 3 | import pdbConstants
 4 | 
 5 | def create_db(fields, rcrditer):
 6 |     """
 7 |     Populates the pgres database with records from the record iter
 8 |     """
 9 |     
10 |     conn = psycopg2.connect('dbname=%s user=%s' % (pdbConstants._DBNAME,
11 |                                                    pdbConstants._USER))
12 |     cur = conn.cursor()
13 | 
14 |     # Create the admin table
15 |     cur.execute('CREATE TABLE %s (ID serial PRIMARY KEY, '\
16 |                'FIELDNAME TEXT)' % pdbConstants._SCREEDADMIN)
17 | 
18 |     for attribute in fields:
19 |         cur.execute("INSERT INTO %s (FIELDNAME) VALUES ('%s')" % \
20 |             (pdbConstants._SCREEDADMIN, attribute))
21 | 
22 |     # Setup the dictionary table creation field substring
23 |     createsub = ','.join(['%s TEXT' % field for field in fields])
24 | 
25 |     # Create the dictionary table
26 |     cur.execute('CREATE TABLE %s (%s serial PRIMARY KEY, %s)' %
27 |                 (pdbConstants._DICT_TABLE, pdbConstants._PRIMARY_KEY,
28 |                  createsub))
29 | 
30 |     # Attribute to index
31 |     queryby = fields[0]
32 | 
33 |     # Make the index on the 'queryby' attribute
34 |     cur.execute('CREATE UNIQUE INDEX %sidx ON %s(%s)' %
35 |                 (queryby, pdbConstants._DICT_TABLE, queryby))
36 | 
37 |     # Setup the 'perc' pgres substring
38 |     perc = ', '.join(['%s' for i in range(len(fields))])
39 | 
40 |     # Setup the sql substring for inserting data into db
41 |     fieldsub = ','.join(fields)
42 | 
43 |     # Pull data from rcrditer and store in database
44 |     for record in rcrditer:
45 |         data = tuple([record[key] for key in fields])
46 |         cur.execute('INSERT INTO %s (%s) VALUES (%s)' %\
47 |                     (pdbConstants._DICT_TABLE, fieldsub, perc),
48 |                     data)
49 | 
50 |     conn.commit()
51 |     cur.close()
52 |     conn.close()
53 | 
54 | def droptables():
55 |     """
56 |     Drops tables in db 
57 |     """
58 |     conn = psycopg2.connect('dbname=%s user=%s' % (pdbConstants._DBNAME,
59 |                                                    pdbConstants._USER))
60 |     cur = conn.cursor()
61 | 
62 |     try:
63 |         cur.execute('DROP TABLE %s;' % pdbConstants._DICT_TABLE)
64 |     except:
65 |         pass
66 |     try:
67 |         cur.execute('DROP TABLE %s;' % pdbConstants._SCREEDADMIN)
68 |     except:
69 |         pass
70 | 
71 |     conn.commit()
72 |     cur.close()
73 |     conn.close()
74 | 


--------------------------------------------------------------------------------
/benchmarks/pgres/drop.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 
3 | from create import droptables
4 | 
5 | if __name__ == '__main__':
6 |     droptables()
7 | 


--------------------------------------------------------------------------------
/benchmarks/pgres/pdbConstants.py:
--------------------------------------------------------------------------------
1 | _SCREEDADMIN = 'SCREEDADMIN'
2 | _DICT_TABLE = 'DICTIONARY_TABLE'
3 | _PRIMARY_KEY = 'id'
4 | _DBNAME = 'sdb'
5 | _USER = 'alex'
6 | 


--------------------------------------------------------------------------------
/benchmarks/pgres/pgdb.py:
--------------------------------------------------------------------------------
 1 | import pdbConstants
 2 | import psycopg2
 3 | import UserDict
 4 | import types
 5 | 
 6 | class _pdb_record_dict(UserDict.DictMixin):
 7 |     """
 8 |     Simple dict-like record interface with bag behavior.
 9 |     """
10 |     def __init__(self, *args, **kwargs):
11 |         self.d = dict(*args, **kwargs)
12 |         
13 |     def __getitem__(self, name):
14 |         return self.d[name]
15 | 
16 |     def __setitem__(self, name, value):
17 |         self.d[name] = value
18 |     
19 |     def __getattr__(self, name):
20 |         try:
21 |             return self.d[name]
22 |         except KeyError:
23 |             raise AttributeError, name
24 | 
25 |     def keys(self):
26 |         return self.d.keys()
27 | 
28 | class pgdb(object):
29 |     def __init__(self):
30 |         self._conn = psycopg2.connect('dbname=%s user=%s' %
31 |                                       (pdbConstants._DBNAME,
32 |                                        pdbConstants._USER))
33 |         cur = self._conn.cursor()
34 |         cur.execute('SELECT id, fieldname FROM %s' % pdbConstants._SCREEDADMIN)
35 |         self._adm = dict(cur.fetchall())
36 |         keys = self._adm.keys()
37 |         keys.sort()
38 | 
39 |         self._fields = self._adm.values()
40 |         self._fields.insert(0, pdbConstants._PRIMARY_KEY.lower())
41 |         self._fieldStr = ",".join(self._fields)
42 | 
43 |         self._queryBy = self._adm[keys[0]]
44 | 
45 |     def close(self):
46 |         """
47 |         Closes the database handles
48 |         """
49 |         self._conn.close()
50 | 
51 |     def loadRecordByIndex(self, idx):
52 |         """
53 |         Loads a record from the database by index
54 |         """
55 |     
56 |     def loadRecordByName(self, key):
57 |         """
58 |         As above, by name
59 |         """
60 |         cursor = self._conn.cursor()
61 |         query = "SELECT %s FROM %s WHERE %s='%s'" % (self._queryBy,
62 |                                                      pdbConstants._DICT_TABLE,
63 |                                                      self._queryBy,
64 |                                                      key)
65 |         cursor.execute(query)
66 |         if type(cursor.fetchone()) == types.NoneType:
67 |             raise KeyError("Key %s not found" % key)
68 | 
69 |         query = "SELECT %s FROM %s WHERE %s='%s'" % (self._fieldStr,
70 |                                                      pdbConstants._DICT_TABLE,
71 |                                                      self._queryBy,
72 |                                                      key)
73 |         cursor.execute(query)
74 |         return _pdb_record_dict(zip(self._fields, cursor.fetchone()))
75 | 
76 |     def keys(self):
77 |         """
78 |         Returns a list of keys in database
79 |         """
80 |         cursor = self._conn.cursor()
81 |         query = "SELECT %s FROM %s" % (self._queryBy,
82 |                                        pdbConstants._DICT_TABLE)
83 |         cursor.execute(query)
84 |         return [elem for elem, in cursor]
85 | 


--------------------------------------------------------------------------------
/benchmarks/pgres/pgresCreateTimeit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import timeit
 5 | 
 6 | if __name__ == '__main__':
 7 |     if len(sys.argv) != 3:
 8 |         print "Usage: %s <filename> <fa/fq>" % sys.argv[0]
 9 |         exit(1)
10 | 
11 |     filename = sys.argv[1]
12 |     fafq = sys.argv[2]
13 | 
14 |     fqrunStatement = """
15 | create.create_db(FASTQFIELDTYPES, iterfunc)
16 | theFile.close()
17 | """
18 | 
19 |     fqsetupStatement = """
20 | import os, sys
21 | import create
22 | thisdir = sys.path[0]
23 | libdir = os.path.abspath(os.path.join(thisdir, '..', '..', 'screed'))
24 | sys.path.insert(0, libdir)
25 | from fastq import fqiter
26 | create.droptables()
27 | FASTQFIELDTYPES = ('name', 'annotations', 'sequence', 'quality')
28 | theFile = open('%s', 'rb')
29 | iterfunc = fqiter(theFile)
30 | """ % filename
31 | 
32 |     farunStatement = """
33 | create.create_db(FASTAFIELDTYPES, iterfunc)
34 | theFile.close()
35 | """
36 | 
37 |     fasetupStatement = """
38 | import os, sys
39 | import create
40 | thisdir = sys.path[0]
41 | libdir = os.path.abspath(os.path.join(thisdir, '..', '..', 'screed'))
42 | sys.path.insert(0, libdir)
43 | from fasta import faiter
44 | create.droptables()
45 | FASTAFIELDTYPES = ('name', 'description', 'sequence')
46 | theFile = open('%s', 'rb')
47 | iterfunc = faiter(theFile)
48 | """ % filename
49 | 
50 |     t = None
51 |     if fafq == 'fasta':
52 |         t = timeit.Timer(farunStatement, fasetupStatement)
53 |     elif fafq == 'fastq':
54 |         t = timeit.Timer(fqrunStatement, fqsetupStatement)
55 |     else:
56 |         raise ValueError("Invalid db type specified: %s" % fafq)
57 | 
58 |     print "[PGRES CREATE]%s:" % filename
59 |     print t.repeat(2, 1)
60 | 


--------------------------------------------------------------------------------
/benchmarks/pgres/pgresTimeit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import timeit
 4 | import sys
 5 | 
 6 | if __name__ == '__main__':
 7 |     runStatement = """
 8 | for i in xrange(0, 100000):
 9 |     entry = db.loadRecordByName(random.choice(keys))
10 | """
11 | 
12 |     setupStatement = """
13 | import os, sys
14 | import random
15 | import pgdb
16 | db = pgdb.pgdb()
17 | keys = db.keys()
18 | """
19 | 
20 |     t = timeit.Timer(runStatement, setupStatement)
21 | 
22 |     print "[PGRES RUN]"
23 |     print t.repeat(2, 1)
24 | 


--------------------------------------------------------------------------------
/benchmarks/pgres/pgres_login.txt:
--------------------------------------------------------------------------------
1 | dbname: sdb
2 | user: postgres
3 | pass: blah
4 | 
5 | user: alex
6 | 


--------------------------------------------------------------------------------
/benchmarks/screedCreateTimeit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) 2016, The Regents of the University of California.
 3 | 
 4 | import sys
 5 | import timeit
 6 | 
 7 | if __name__ == '__main__':
 8 |     if len(sys.argv) != 3:
 9 |         print "Usage: %s <filename> <fa/fq>" % sys.argv[0]
10 |         exit(1)
11 | 
12 |     filename = sys.argv[1]
13 |     fafq = sys.argv[2]
14 | 
15 |     fqrunStatement = """
16 | createscreed.create_db(filename, fastq.FieldTypes, iterfunc)
17 | theFile.close()
18 | """
19 | 
20 |     fqsetupStatement = """
21 | import os, sys
22 | thisdir = sys.path[0]
23 | libdir = os.path.abspath(os.path.join(thisdir, '..', 'screed'))
24 | sys.path.insert(0, libdir)
25 | import createscreed
26 | import fastq
27 | FASTQFIELDTYPES = ('name', 'annotations', 'sequence', 'quality')
28 | filename = '%s'
29 | theFile = open(filename, 'rb')
30 | iterfunc = fastq.fastq_iter(theFile)
31 | """ % filename
32 | 
33 |     farunStatement = """
34 | createscreed.create_db(filename, fasta.FieldTypes, iterfunc)
35 | theFile.close()
36 | """
37 | 
38 |     fasetupStatement = """
39 | import os, sys
40 | thisdir = sys.path[0]
41 | libdir = os.path.abspath(os.path.join(thisdir, '..', 'screed'))
42 | sys.path.insert(0, libdir)
43 | import createscreed
44 | import fasta
45 | FASTAFIELDTYPES = ('name', 'description', 'sequence')
46 | filename = '%s'
47 | theFile = open(filename, 'rb')
48 | iterfunc = fasta.fasta_iter(theFile)
49 | """ % filename
50 | 
51 |     t = None
52 |     if fafq == 'fasta':
53 |         t = timeit.Timer(farunStatement, fasetupStatement)
54 |     elif fafq == 'fastq':
55 |         t = timeit.Timer(fqrunStatement, fqsetupStatement)
56 |     else:
57 |         raise ValueError("Invalid db type specified: %s" % fafq)
58 | 
59 |     print "[SCREED CREATE]%s:" % filename
60 |     print t.repeat(2, 1)
61 | 


--------------------------------------------------------------------------------
/benchmarks/screedTimeit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) 2016, The Regents of the University of California.
 3 | 
 4 | import timeit
 5 | import sys
 6 | import os
 7 | 
 8 | if __name__ == '__main__':
 9 |     if len(sys.argv) != 2:
10 |         print "Usage: %s <filename>" % sys.argv[0]
11 |         exit(1)
12 | 
13 |     screedFile = sys.argv[1]
14 |     if not os.path.isfile(screedFile):
15 |         print "No such file: %s" % screedFile
16 |         exit(1)
17 | 
18 |     runStatement = """
19 | for i in xrange(0, 100000):
20 |     entry = str(db[random.choice(keys)].sequence)
21 | """
22 | 
23 |     setupStatement = """
24 | import os, sys
25 | import random
26 | thisdir = sys.path[0]
27 | libdir = os.path.abspath(os.path.join(thisdir, '..'))
28 | sys.path.insert(0, libdir)
29 | import screed
30 | db = screed.openscreed.ScreedDB('%s')
31 | keys = db.keys()
32 | """ % screedFile
33 | 
34 |     t = timeit.Timer(runStatement, setupStatement)
35 | 
36 |     print "[SCREED RUN]%s:" % screedFile
37 |     print t.repeat(2, 1)
38 | 


--------------------------------------------------------------------------------
/benchmarks/screedTimeit1M.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) 2016, The Regents of the University of California.
 3 | 
 4 | import timeit
 5 | import sys
 6 | import os
 7 | 
 8 | if __name__ == '__main__':
 9 |     if len(sys.argv) != 2:
10 |         print "Usage: %s <filename>" % sys.argv[0]
11 |         exit(1)
12 | 
13 |     screedFile = sys.argv[1]
14 |     if not os.path.isfile(screedFile):
15 |         print "No such file: %s" % screedFile
16 |         exit(1)
17 | 
18 |     runStatement = """
19 | for i in xrange(0, 100000):
20 |     entry = str(db[random.choice(keys)].sequence)
21 | """
22 | 
23 |     setupStatement = """
24 | import os, sys
25 | import random
26 | thisdir = sys.path[0]
27 | libdir = os.path.abspath(os.path.join(thisdir, '..'))
28 | sys.path.insert(0, libdir)
29 | import screed
30 | db = screed.openscreed.ScreedDB('%s')
31 | keys = []
32 | for i, k in enumerate(db.iterkeys()):
33 |     if i > 1000000:
34 |         break
35 |     keys.append(k)
36 | """ % screedFile
37 | 
38 |     t = timeit.Timer(runStatement, setupStatement)
39 | 
40 |     print "[SCREED RUN]%s:" % screedFile
41 |     print t.repeat(2, 1)
42 | 


--------------------------------------------------------------------------------
/doc/CODE_OF_CONDUCT.rst:
--------------------------------------------------------------------------------
1 | ../CODE_OF_CONDUCT.rst


--------------------------------------------------------------------------------
/doc/LICENSE.rst:
--------------------------------------------------------------------------------
 1 | =======
 2 | License
 3 | =======
 4 | 
 5 | Copyright (c) 2008, Michigan State University.
 6 | Copyright (c) 2015, The Regents of the University of California.
 7 | All rights reserved.
 8 | 
 9 | Redistribution and use in source and binary forms, with or without modification,
10 | are permitted provided that the following conditions are met:
11 | 
12 |     * Redistributions of source code must retain the above copyright notice,
13 |       this list of conditions and the following disclaimer.
14 |     * Redistributions in binary form must reproduce the above copyright notice,
15 |       this list of conditions and the following disclaimer in the documentation
16 |       and/or other materials provided with the distribution.
17 |     * Neither the name of the author nor the names of its contributors may be
18 |       used to endorse or promote products derived from this software without
19 |       specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 16 | 
 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 18 | 
 19 | all: html
 20 | 
 21 | help:
 22 | 	@echo "Please use \`make <target>' where <target> is one of"
 23 | 	@echo "  html       to make standalone HTML files"
 24 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 25 | 	@echo "  singlehtml to make a single large HTML file"
 26 | 	@echo "  pickle     to make pickle files"
 27 | 	@echo "  json       to make JSON files"
 28 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 29 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 30 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 31 | 	@echo "  epub       to make an epub"
 32 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 33 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 34 | 	@echo "  text       to make text files"
 35 | 	@echo "  man        to make manual pages"
 36 | 	@echo "  texinfo    to make Texinfo files"
 37 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 38 | 	@echo "  gettext    to make PO message catalogs"
 39 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 40 | 	@echo "  linkcheck  to check all external links for integrity"
 41 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 42 | 
 43 | clean:
 44 | 	-rm -rf $(BUILDDIR)/*
 45 | 
 46 | html:
 47 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 48 | 	@echo
 49 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 50 | 
 51 | dirhtml:
 52 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 53 | 	@echo
 54 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 55 | 
 56 | singlehtml:
 57 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 58 | 	@echo
 59 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 60 | 
 61 | pickle:
 62 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 63 | 	@echo
 64 | 	@echo "Build finished; now you can process the pickle files."
 65 | 
 66 | json:
 67 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 68 | 	@echo
 69 | 	@echo "Build finished; now you can process the JSON files."
 70 | 
 71 | htmlhelp:
 72 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 73 | 	@echo
 74 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 75 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 76 | 
 77 | qthelp:
 78 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 79 | 	@echo
 80 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 81 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 82 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/screed.qhcp"
 83 | 	@echo "To view the help file:"
 84 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/screed.qhc"
 85 | 
 86 | devhelp:
 87 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 88 | 	@echo
 89 | 	@echo "Build finished."
 90 | 	@echo "To view the help file:"
 91 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/screed"
 92 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/screed"
 93 | 	@echo "# devhelp"
 94 | 
 95 | epub:
 96 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 97 | 	@echo
 98 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 99 | 
100 | latex:
101 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
102 | 	@echo
103 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
104 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
105 | 	      "(use \`make latexpdf' here to do that automatically)."
106 | 
107 | latexpdf:
108 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
109 | 	@echo "Running LaTeX files through pdflatex..."
110 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
111 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
112 | 
113 | text:
114 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
115 | 	@echo
116 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
117 | 
118 | man:
119 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
120 | 	@echo
121 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
122 | 
123 | texinfo:
124 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
125 | 	@echo
126 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
127 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
128 | 	      "(use \`make info' here to do that automatically)."
129 | 
130 | info:
131 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
132 | 	@echo "Running Texinfo files through makeinfo..."
133 | 	make -C $(BUILDDIR)/texinfo info
134 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
135 | 
136 | gettext:
137 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
138 | 	@echo
139 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
140 | 
141 | changes:
142 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
143 | 	@echo
144 | 	@echo "The overview file is in $(BUILDDIR)/changes."
145 | 
146 | linkcheck:
147 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
148 | 	@echo
149 | 	@echo "Link check complete; look for any errors in the above output " \
150 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
151 | 
152 | doctest:
153 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
154 | 	@echo "Testing of doctests in the sources finished, look at the " \
155 | 	      "results in $(BUILDDIR)/doctest/output.txt."
156 | 


--------------------------------------------------------------------------------
/doc/_static/labibi.css:
--------------------------------------------------------------------------------
 1 | @import url('default.css');
 2 | 
 3 | /* Styles for floating Edit on GitHub box */
 4 | #editor-trap {
 5 |     margin: 1em;
 6 |     padding: 1em;
 7 |     border: 1px solid black;
 8 |     box-shadow: 0 0 3px black;
 9 |     width: 200px;
10 | 
11 |     background: #fefabc;
12 |     position: fixed;
13 |     bottom: 1em;
14 |     left: 1em;
15 |     font-size: 60%;
16 |     text-align: left;
17 |     z-index: 2;
18 | 
19 |     -moz-transform: rotate(-4deg);
20 |     -webkit-transform: rotate(-4deg);
21 |     -o-transform: rotate(-4deg);
22 |     -ms-transform: rotate(-4deg);
23 |     transform: rotate(-4deg);
24 |     box-shadow: 0px 4px 6px #333;
25 |     -moz-box-shadow: 0px 4px 6px #333;
26 |     -webkit-box-shadow: 0px 4px 6px #333;
27 | 
28 | 
29 |     cursor: pointer;
30 | }
31 | 
32 | #editor-trap h3 {
33 |     margin: 0 0 0.5em 0;
34 |     padding: 0;
35 |     background: transparent;
36 | }
37 | 
38 | #editor-trap ol {
39 |     margin: 0;
40 |     padding: 0 0 0 2em;
41 | }
42 | 
43 | /* Hide trick */
44 | 
45 | #editor-trap.toggled > * {
46 |     display: none;
47 | }
48 | 
49 | 
50 | #editor-trap.toggled > h3 {
51 |     display: block;
52 | }
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/doc/_static/labibi.js:
--------------------------------------------------------------------------------
 1 | // Store editor pop-up help state in localStorage
 2 | // so it does not re-pop-up itself between page loads.
 3 | // Do not even to pretend to support IE gracefully.
 4 | (function($) {
 5 | 
 6 |     $(document).ready(function() {
 7 |         var box = $("#editor-trap");
 8 |         var klass = "toggled";
 9 |         var storageKey = "toggled";
10 | 
11 |         function toggle() {
12 |             box.toggleClass(klass);
13 |             // Store the toggle status in local storage as "has value string" or null
14 |             window.localStorage.setItem(storageKey, box.hasClass(klass) ? "toggled" : "not-toggled");
15 |         }
16 | 
17 |         box.click(toggle);
18 | 
19 |         // Check the persistent state of the editor pop-up
20 |         // Note that localStorage does not necessarily support boolean values (ugh!)
21 |         // http://stackoverflow.com/questions/3263161/cannot-set-boolean-values-in-localstorage
22 |         var v = window.localStorage.getItem(storageKey);
23 |         if(v == "toggled" || !v) {
24 |           box.addClass(klass);
25 |         }
26 | 
27 |     });
28 | 
29 | })(jQuery);
30 | 


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # screed documentation build configuration file, created by
  4 | # sphinx-quickstart on Wed Jun  6 16:32:37 2012.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing
  7 | # dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys
 16 | import os
 17 | 
 18 | # If extensions (or modules to document with autodoc) are in another directory,
 19 | # add these directories to sys.path here. If the directory is relative to the
 20 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 21 | # sys.path.insert(0, os.path.abspath('.'))
 22 | 
 23 | # -- General configuration ----------------------------------------------------
 24 | 
 25 | # If your documentation needs a minimal Sphinx version, state it here.
 26 | # needs_sphinx = '1.0'
 27 | 
 28 | # Add any Sphinx extension module names here, as strings. They can be
 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 30 | extensions = []
 31 | 
 32 | # Add any paths that contain templates here, relative to this directory.
 33 | templates_path = ['_templates']
 34 | 
 35 | # The suffix of source filenames.
 36 | source_suffix = '.rst'
 37 | 
 38 | # The encoding of source files.
 39 | # source_encoding = 'utf-8-sig'
 40 | 
 41 | # The master toctree document.
 42 | master_doc = 'index'
 43 | 
 44 | # General information about the project.
 45 | project = u'screed'
 46 | copyright = u'2012-2015, Michigan State University'
 47 | 
 48 | # The version info for the project you're documenting, acts as replacement for
 49 | # |version| and |release|, also used in various other places throughout the
 50 | # built documents.
 51 | #
 52 | 
 53 | # The full version, including alpha/beta/rc tags.
 54 | 
 55 | sys.path.insert(0, '.')
 56 | 
 57 | import screed
 58 | release = screed.VERSION
 59 | 
 60 | # The short X.Y version.
 61 | version = '.'.join(release.split('.')[:2])
 62 | 
 63 | # The language for content autogenerated by Sphinx. Refer to documentation
 64 | # for a list of supported languages.
 65 | # language = None
 66 | 
 67 | # There are two options for replacing |today|: either, you set today to some
 68 | # non-false value, then it is used:
 69 | # today = ''
 70 | 
 71 | # Else, today_fmt is used as the format for a strftime call.
 72 | # today_fmt = '%B %d, %Y'
 73 | 
 74 | # List of patterns, relative to source directory, that match files and
 75 | # directories to ignore when looking for source files.
 76 | exclude_patterns = ['_build']
 77 | 
 78 | # The reST default role (used for this markup: `text`) to use for all
 79 | # documents.
 80 | # default_role = None
 81 | 
 82 | # If true, '()' will be appended to :func: etc. cross-reference text.
 83 | # add_function_parentheses = True
 84 | 
 85 | # If true, the current module name will be prepended to all description
 86 | # unit titles (such as .. function::).
 87 | # add_module_names = True
 88 | 
 89 | # If true, sectionauthor and moduleauthor directives will be shown in the
 90 | # output. They are ignored by default.
 91 | # show_authors = False
 92 | 
 93 | # The name of the Pygments (syntax highlighting) style to use.
 94 | pygments_style = 'sphinx'
 95 | 
 96 | # A list of ignored prefixes for module index sorting.
 97 | # modindex_common_prefix = []
 98 | 
 99 | 
100 | # -- Options for HTML output --------------------------------------------------
101 | 
102 | # The theme to use for HTML and HTML Help pages.  See the documentation for
103 | # a list of builtin themes.
104 | html_theme = 'default'
105 | 
106 | # Theme options are theme-specific and customize the look and feel of a theme
107 | # further.  For a list of options available for each theme, see the
108 | # documentation.
109 | # html_theme_options = {}
110 | 
111 | # Add any paths that contain custom themes here, relative to this directory.
112 | # html_theme_path = []
113 | 
114 | # The name for this set of Sphinx documents.  If None, it defaults to
115 | # "<project> v<release> documentation".
116 | # html_title = None
117 | 
118 | # A shorter title for the navigation bar.  Default is the same as html_title.
119 | # html_short_title = None
120 | 
121 | # The name of an image file (relative to this directory) to place at the top
122 | # of the sidebar.
123 | # html_logo = None
124 | 
125 | # The name of an image file (within the static path) to use as favicon of the
126 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
127 | # pixels large.
128 | # html_favicon = None
129 | 
130 | # Add any paths that contain custom static files (such as style sheets) here,
131 | # relative to this directory. They are copied after the builtin static files,
132 | # so a file named "default.css" will overwrite the builtin "default.css".
133 | html_static_path = ['_static']
134 | html_style = 'labibi.css'
135 | 
136 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
137 | # using the given strftime format.
138 | # html_last_updated_fmt = '%b %d, %Y'
139 | 
140 | # If true, SmartyPants will be used to convert quotes and dashes to
141 | # typographically correct entities.
142 | # html_use_smartypants = True
143 | 
144 | # Custom sidebar templates, maps document names to template names.
145 | # html_sidebars = {}
146 | 
147 | # Additional templates that should be rendered to pages, maps page names to
148 | # template names.
149 | # html_additional_pages = {}
150 | 
151 | # If false, no module index is generated.
152 | # html_domain_indices = True
153 | 
154 | # If false, no index is generated.
155 | # html_use_index = True
156 | 
157 | # If true, the index is split into individual pages for each letter.
158 | # html_split_index = False
159 | 
160 | # If true, links to the reST sources are added to the pages.
161 | # html_show_sourcelink = True
162 | 
163 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
164 | # html_show_sphinx = True
165 | 
166 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
167 | # html_show_copyright = True
168 | 
169 | # If true, an OpenSearch description file will be output, and all pages will
170 | # contain a <link> tag referring to it.  The value of this option must be the
171 | # base URL from which the finished HTML is served.
172 | # html_use_opensearch = ''
173 | 
174 | # This is the file name suffix for HTML files (e.g. ".xhtml").
175 | # html_file_suffix = None
176 | 
177 | # Output file base name for HTML help builder.
178 | htmlhelp_basename = 'screeddoc'
179 | 
180 | 
181 | html_context = {
182 |         "google_analytics_id": 'UA-51731094-1',
183 |         "disqus_shortname": 'screed-docs',
184 |         #   "github_base_account": 'dib-lab',
185 |         "github_project": 'screed',
186 | }
187 | 
188 | # -- Options for LaTeX output -------------------------------------------------
189 | 
190 | latex_elements = {
191 |     # The paper size ('letterpaper' or 'a4paper').
192 |     # 'papersize': 'letterpaper',
193 | 
194 |     # The font size ('10pt', '11pt' or '12pt').
195 |     # 'pointsize': '10pt',
196 | 
197 |     # Additional stuff for the LaTeX preamble.
198 |     # 'preamble': '',
199 | }
200 | 
201 | # Grouping the document tree into LaTeX files. List of tuples
202 | # (source start file, target name, title, author, documentclass
203 | # [howto/manual]).
204 | latex_documents = [
205 |   ('index', 'screed.tex', u'screed Documentation',
206 |    u'Alex Nolley and Titus Brown', 'manual'),
207 | ]
208 | 
209 | # The name of an image file (relative to this directory) to place at the top of
210 | # the title page.
211 | # latex_logo = None
212 | 
213 | # For "manual" documents, if this is true, then toplevel headings are parts,
214 | # not chapters.
215 | # latex_use_parts = False
216 | 
217 | # If true, show page references after internal links.
218 | # latex_show_pagerefs = False
219 | 
220 | # If true, show URL addresses after external links.
221 | # latex_show_urls = False
222 | 
223 | # Documents to append as an appendix to all manuals.
224 | # latex_appendices = []
225 | 
226 | # If false, no module index is generated.
227 | # latex_domain_indices = True
228 | 
229 | 
230 | # -- Options for manual page output -------------------------------------------
231 | 
232 | # One entry per manual page. List of tuples
233 | # (source start file, name, description, authors, manual section).
234 | man_pages = [
235 |     ('index', 'screed', u'screed Documentation',
236 |      [u'Alex Nolley and Titus Brown'], 1)
237 | ]
238 | 
239 | # If true, show URL addresses after external links.
240 | # man_show_urls = False
241 | 
242 | 
243 | # -- Options for Texinfo output -----------------------------------------------
244 | 
245 | # Grouping the document tree into Texinfo files. List of tuples
246 | # (source start file, target name, title, author,
247 | #  dir menu entry, description, category)
248 | texinfo_documents = [
249 |   ('index', 'screed', u'screed Documentation', u'Alex Nolley and Titus Brown',
250 |    'screed', 'One line description of project.', 'Miscellaneous'),
251 | ]
252 | 
253 | # Documents to append as an appendix to all manuals.
254 | # texinfo_appendices = []
255 | 
256 | # If false, no module index is generated.
257 | # texinfo_domain_indices = True
258 | 
259 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
260 | # texinfo_show_urls = 'footnote'
261 | 


--------------------------------------------------------------------------------
/doc/dev/coding-guidelines-and-review.rst:
--------------------------------------------------------------------------------
 1 | .. vim: set filetype=rst
 2 | 
 3 | Coding guidelines and code review checklist
 4 | ===========================================
 5 | 
 6 | This document is for anyone who want to contribute code to the screed
 7 | project, and describes our coding standards and code review checklist.
 8 | 
 9 | ----
10 | 
11 | Coding standards
12 | ----------------
13 | 
14 | All plain-text files should have line widths of 80 characters or less unless
15 | that is not supported for the particular file format.
16 | 
17 | Vim user can set the indentation with::
18 | 
19 | 	set expandtab
20 | 	set shiftwidth=4
21 | 	set softtabstop=4
22 | 
23 | We are a pure Python project and `PEP 8 <http://www.python.org/dev/peps/pep-0008/>`__ is our
24 | standard. The ```pep8``` and ```autopep8``` Makefile targets are helpful. 
25 | 
26 | Code  and documentation must have its spelling checked. Vim users can
27 | run::
28 | 
29 |         :setlocal spell spelllang=en_us
30 | 
31 | Use `]s` and `[s` to navigate between misspellings and `z=` to suggest a
32 | correctly spelled word. `zg` will add a word as a good word.
33 | 
34 | GNU `aspell` can also be used to check the spelling in a single file::
35 | 
36 |         aspell check --mode $filename
37 | 
38 | Code Review
39 | -----------
40 | 
41 | Please read `11 Best Practices for Peer Code Review
42 | <http://smartbear.com/SmartBear/media/pdfs/WP-CC-11-Best-Practices-of-Peer-Code-Review.pdf>`__.
43 | 
44 | See also `Code reviews: the lab meeting for code
45 | <http://fperez.org/py4science/code_reviews.html>`__ and
46 | `the PyCogent coding guidelines
47 | <http://pycogent.org/coding_guidelines.html>`__.
48 | 
49 | Checklist
50 | ---------
51 | 
52 | Copy and paste the following into a pull request comment when it is
53 | ready for review::
54 |    
55 |    - [ ] Is it mergeable?
56 |    - [ ] `make test` Did it pass the tests?
57 |    - [ ] `make clean diff-cover` If it introduces new functionality, is it tested?
58 |    - [ ] `make format diff_pylint_report doc` Is it well formatted?
59 |    - [ ] Is it documented in the `ChangeLog`?
60 |      http://en.wikipedia.org/wiki/Changelog#Format
61 |    - [ ] Was a spellchecker run on the source code and documentation after
62 |      changes were made?
63 | 
64 | **Note** that after you submit the comment you can check and uncheck
65 | the individual boxes on the formatted comment; no need to put x or y
66 | in the middle.
67 | 


--------------------------------------------------------------------------------
/doc/dev/index.rst:
--------------------------------------------------------------------------------
 1 | The screed developer documentation
 2 | ==================================
 3 | 
 4 | This section of the documentation is for people who are contributing
 5 | (or would like to contribute to) the screed project codebase, either by
 6 | contributing code or by helping improve the documentation.
 7 | 
 8 | Please note that this project is released with a :doc:`../CODE_OF_CONDUCT`.
 9 | By participating in the development of this project you agree to abide by its
10 | terms.
11 | 
12 | Contents:
13 | 
14 | .. toctree::
15 |    :maxdepth: 1
16 | 
17 |    parsers
18 |    coding-guidelines-and-review
19 |    release-checklist
20 | 


--------------------------------------------------------------------------------
/doc/dev/parsers.rst:
--------------------------------------------------------------------------------
  1 | Writing Custom Sequence Parsers
  2 | ===============================
  3 | 
  4 | screed is built to be adaptable to new kinds of file sequence formats.
  5 | Included with screed are parsers for handling FASTA and FASTQ sequence
  6 | file types, though if you need screed to work with a new format, all
  7 | you need to do is write a new parser.
  8 | 
  9 | Field Roles
 10 | -----------
 11 | 
 12 | Each field in a screed database is assigned a role. These roles
 13 | describe what kind of information is stored in their field. Right now
 14 | there are only 4 different roles in a screed database: the text role,
 15 | the sliceable role, the indexed key role and the primary key role. All
 16 | roles are defined in the file: screed/DBConstants.py
 17 | 
 18 | The text role (DBConstants._STANDARD_TEXT) is the role most fields in
 19 | a database will have. This role tells screed that the associated field
 20 | is storing standard textual data. Nothing special.
 21 | 
 22 | The sliceable role (DBConstants._SLICEABLE_TEXT) is a role that can be
 23 | assigned to long sequence fields. screed's default FASTA parser
 24 | defines the 'sequence' field with the sliceable role. When screed
 25 | retrieves a field that has the sliceable role, it builds a special
 26 | data structure that supports slicing into the text.
 27 | 
 28 | The indexed key role (DBConstants._INDEXED_TEXT_KEY) is associated
 29 | with exactly one of the fields in a screed database. In screed's FASTA
 30 | and FASTQ parsers, this role is fulfilled by the 'name' field. This
 31 | field is required because it is the field screed tells sqlite to index
 32 | when creating the database and it is the field used for name look-ups
 33 | when querying a screed database.
 34 | 
 35 | The primary key role (DBConstants._PRIMARY_KEY_ROLE) is a role
 36 | automatically associated with the 'id' field in each database. This
 37 | field is always created with each screed database and always holds
 38 | this role. You as a user of screed won't need to worry about this one.
 39 | 
 40 | General Parsing Function Format
 41 | -------------------------------
 42 | 
 43 | create_db is the function central to the creation of screed
 44 | databases. This function accepts a file path, a tuple of field names
 45 | and roles, and an iterator function. The file path describes where the
 46 | screed database should go, the tuple contains the names of fields and
 47 | their associated roles and the iterator function yields records in a
 48 | dictionary format.
 49 | 
 50 | This sub-section describes general steps for preparing and using
 51 | screed with a custom sequence parser. Though they don't have to be,
 52 | future sequence parsers should be located in the seqparse.py file for
 53 | convenience.  These steps will be described in the context of working
 54 | from the Python shell.
 55 | 
 56 | First import the create_db function::
 57 | 
 58 |     >>> from screed import create_db
 59 | 
 60 | The create_db class handles the formatting of screed databases and
 61 | provides a simple interface for storing sequence data.
 62 | 
 63 | Next the database fields and roles must be specified. The fields tell
 64 | screed the names and order of the data fields inside each record. For instance,
 65 | lets say our new sequence has types 'name', 'bar', and 'baz', all text. The
 66 | tuple will be::
 67 | 
 68 |     >>> fields = (('name', DBConstants._INDEXED_TEXT_KEY),
 69 |                   ('bar', DBConstants._STANDARD_TEXT),
 70 |                   ('baz', DBConstants._STANDARD_TEXT))
 71 | 
 72 | Notice how 'name' is given the indexed key role and bar and baz are
 73 | given text roles? If, for instance, you know 'baz' fields can be very long
 74 | and you want to be able to retrieve slices of them, you could specify
 75 | fields as::
 76 | 
 77 |     >>> fields = (('name', DBConstants._INDEXED_TEXT_KEY),
 78 |                   ('bar', DBConstants._STANDARD_TEXT),
 79 |                   ('baz', DBConstants._SLICEABLE_TEXT))
 80 | 
 81 | All screed databases come with an 'id' field, which is a sequential
 82 | numbering order starting at 0 for the first record, 1 for the second, and
 83 | so on. The names and number of the other fields are arbitrary with one
 84 | restriction: one and only one of the fields must fulfill the indexed key role.
 85 | 
 86 | Next, you need to setup an iterator function that will return records in
 87 | a dictionary format. Have a look at the 'fastq_iter', 'fasta_iter', or
 88 | 'hava_iter' functions in the screed/fastq.py, screed/fasta.py, and
 89 | screed/hava.py files, respectively for examples on how to write one of these.
 90 | If you don't know what an iterator function is, the documentation on the
 91 | Python website gives a good description:
 92 | http://docs.python.org/library/stdtypes.html#iterator-types.
 93 | 
 94 | Once the iterator function is written, it needs to be instantiated. In the
 95 | context of the built-in parsing functions, this means opening a file and
 96 | passing the file handle to the iterator function::
 97 | 
 98 |     >>> seqfile = open('path_to_seq_file', 'rb')
 99 |     >>> iter_instance = myiter(seqfile)
100 | 
101 | Assuming that your iterator function is called 'myiter', this sets up an
102 | instance of it ready to use with create_db.
103 | 
104 | Now the screed database is created with one command::
105 | 
106 |     >>> create_db('path_to_screed_db', fields, iter_instance)
107 | 
108 | If you want the screed database saved at 'path_to_screed_db'. If instead you
109 | want the screed database created in the same directory and with a
110 | similar file name as the sequence file, its OK to do this::
111 | 
112 |     >>> create_db('path_to_seq_file', fields, iter_instance)
113 | 
114 | create_db will just append '_screed' to the end of the file name and make
115 | a screed database at that file path so the original file won't be
116 | overwritten.
117 | 
118 | When you're done the sequence file should be closed::
119 | 
120 |     >>> seqfile.close()
121 | 
122 | Using the Built-in Sequence Iterator Functions
123 | ----------------------------------------------
124 | 
125 | This section shows how to use the 'fastq_iter' and 'fasta_iter' functions
126 | for returning records from a sequence file.
127 | 
128 | These functions both take a file handle as the only argument and then return
129 | a dictionary for each record in the file containing names of fields and
130 | associated data. These functions are primarily used in conjunction with
131 | the db_create() function, but they can be useful by themselves.
132 | 
133 | First, import the necessary module and open a text file containing sequences.
134 | For this example, the 'fastq_iter' function will be used::
135 | 
136 |     >>> import screed.fastq
137 |     >>> seqfile = open('path_to_seqfile', 'rb')
138 | 
139 | Now, the 'fastq_iter' can be instantiated and iterated over::
140 | 
141 |     >>> fq_instance = screed.fastq(seqfile)
142 |     >>> for record in fq_instance:
143 |     ...     print record.name
144 | 
145 | That will print the name of every sequence in the file. If instead you want
146 | to accumulate the sequences::
147 | 
148 |     >>> sequences = []
149 |     >>> for record in fq_instance:
150 |     ...     sequences.append(record.sequence)
151 | 
152 | These iterators are the core of screed's sequence modularity. If there is
153 | a new sequence format you want screed to work with, all it needs is its
154 | own iterator.
155 | 
156 | Error checking in parsing methods
157 | ---------------------------------
158 | 
159 | The existing FASTA/FASTQ parsing functions contain some error
160 | checking, such as making sure the file can be opened and checking
161 | correct data is being read. Though screed doesn't enforce this, it is
162 | strongly recommended to include error checking code in your parser. To
163 | remain non-specific to one file sequence type or another, the
164 | underlying screed library can't contain error checking code of this
165 | kind. If errors are not detected by the parsing function, they will be
166 | silently included into the database being built and could cause
167 | problems much later when trying to read from the database.
168 | 


--------------------------------------------------------------------------------
/doc/dev/release-checklist.rst:
--------------------------------------------------------------------------------
  1 | .. vim: set filetype=rst
  2 | 
  3 | =====================
  4 | Release Documentation
  5 | =====================
  6 | 
  7 | 
  8 | Introduction
  9 | ============
 10 | 
 11 | This is the release documentation for releasing a new version of screed. This
 12 | document is meant for screed release managers. Michael R. Crusoe and C. Titus
 13 | Brown have released screed in the past. Jake Fenton is the first to release
 14 | screed using this checklist.
 15 | 
 16 | Getting Started
 17 | ===============
 18 | 
 19 | #. Create and activate an empty Python environment::
 20 | 
 21 |         mamba create -n screed-rc -y python=3.10 pip make setuptools_scm
 22 |         conda activate screed-rc
 23 |         python -m pip install -U pip
 24 |         python -m pip install -U virtualenv wheel tox-setuptools-version build
 25 | 
 26 | #. Start with a clean checkout::
 27 | 
 28 |         cd $(mktemp -d)
 29 |         git clone git@github.com:dib-lab/screed.git
 30 |         cd screed
 31 | 
 32 | #. Set the new version number and release candidate::
 33 | 
 34 |         new_version=1.1.0
 35 |         rc=rc1
 36 | 
 37 |    Tag the release candidate with the new version prefixed by the letter 'v'::
 38 | 
 39 |         git tag v${new_version}-${rc}
 40 |         git push --tags git@github.com:dib-lab/screed.git
 41 | 
 42 | #. Test the release candidate::
 43 | 
 44 |         cd ..
 45 |         virtualenv testenv1
 46 |         virtualenv testenv2
 47 |         virtualenv testenv3
 48 |         virtualenv testenv4
 49 | 
 50 |         # first we test the tag
 51 |         cd testenv1
 52 |         source bin/activate
 53 |         git clone --depth 1 --branch v${new_version}-${rc} \
 54 |                 https://github.com/dib-lab/screed.git
 55 |         cd screed
 56 |         make install-dependencies
 57 |         make install
 58 |         make test
 59 |         python -c 'import screed; print(screed.__version__)' # double-check version number
 60 | 
 61 | 
 62 |         # Test via pip
 63 |         cd ../../testenv2
 64 |         source bin/activate
 65 |         pip install -e \
 66 |                 git+https://github.com/dib-lab/screed.git@v${new_version}-${rc}#egg=screed
 67 |         cd src/screed
 68 |         make dist
 69 |         make install
 70 |         pip install pytest
 71 |         pytest screed 
 72 |         python -c 'import screed; print(screed.__version__)' # double-check version number
 73 |         cp dist/screed-1.1rc1.tar.gz ../../../testenv3 
 74 | 
 75 |         # test if the dist made in testenv2 is complete enough to build another
 76 |         # functional dist
 77 | 
 78 |         cd ../../../testenv3
 79 |         source bin/activate
 80 |         pip install pytest
 81 |         pip install screed*tar.gz
 82 |         python -c 'import screed; print(screed.__version__)'
 83 |         tar xzf screed*tar.gz
 84 |         cd screed*
 85 |         make dist
 86 |         make test
 87 | 
 88 | #. Do any final testing (acceptance tests, etc.) A good test is to install
 89 |    the new version of screed and then run the sourmash tests.
 90 | 
 91 | How to make a final release
 92 | ===========================
 93 | 
 94 | When you have a thoroughly tested release candidate, cut a release like so:
 95 | 
 96 | #. Delete the release candidate tag and push the tag updates to GitHub::
 97 | 
 98 |        cd ../../screed
 99 |        git tag -d v${new_version}-${rc}
100 |        git push --delete origin v${new_version}${rc}
101 | 
102 | #. Create the final tag and publish the new release on PyPI (requires an
103 |    authorized account) ::
104 | 
105 |        git tag v${new_version}
106 |        git push --tags origin
107 |        make dist
108 |        twine upload dist/screed-${new_version}.tar.gz
109 | 
110 | #. Add the release on GitHub, using the tag you just pushed. Name it "Version
111 |    X.Y.Z" and copy/paste in the release notes.
112 | 
113 | #. Update the Read the Docs to point to the new version. Visit
114 |    https://readthedocs.org/builds/screed/ and ‘Build Version: master’ to pick up
115 |    the new tag. Once that build has finished check the “Activate” box next to
116 |    the new version at https://readthedocs.org/dashboard/screed/versions/ under
117 |    “Choose Active Versions”. Finally change the default version at
118 |    https://readthedocs.org/dashboard/screed/advanced/ to the new version.
119 | 
120 | #. Delete any RC tags created:: 
121 |    
122 |        git tag -d ${new_version}-${rc}
123 |        git push origin :refs/tags/${new_version}-${rc}
124 | 
125 | #. Tweet about the new release
126 | 
127 | #. Send email including the release notes to khmer@lists.idyll.org and
128 |    khmer-announce@lists.idyll.org
129 | 
130 | Notes on this document
131 | ======================
132 | This is the procedure for cutting a new release of screed. It has been adapted
133 | from the release documentation for the khmer project, found at
134 | http://khmer.readthedocs.org/en/v1.1/release.html.
135 | 
136 | 


--------------------------------------------------------------------------------
/doc/example.rst:
--------------------------------------------------------------------------------
 1 | ===============
 2 | screed examples
 3 | ===============
 4 | 
 5 | .. contents:
 6 | 
 7 | Basic Usage
 8 | ===========
 9 | 
10 | Load screed, index the database, and return a dictionary-like object:
11 | 
12 |  >>> import screed
13 |  >>> db = screed.read_fasta_sequences('../screed/tests/test.fa')
14 | 
15 | Get the list of sequence names, sort alphabetically, and look at the
16 | first one:
17 | 
18 |  >>> names = db.keys()
19 |  >>> names.sort()
20 |  >>> names[0]
21 |  u'ENSMICT00000000730'
22 | 
23 | Retrieve that record:
24 | 
25 |  >>> r = db[names[0]]
26 |  >>> print r.keys()
27 |  [u'description', u'id', u'name', u'sequence']
28 | 
29 | Print out the internal ID number and the name:
30 | 
31 |  >>> print r.id
32 |  13
33 |  >>> print r.name
34 |  ENSMICT00000000730
35 | 


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
 1 | .. screed documentation master file, created by
 2 |    sphinx-quickstart on Wed Jun  6 16:32:37 2012.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | screed - short read sequence utils
 7 | ==================================
 8 | 
 9 | :Copyright: 2008, Michigan State University.
10 | :Copyright: 2015, The Regents of the University of California.
11 | :Authors: Alex Nolley, C. Titus Brown
12 | :Contact: ctb@msu.edu
13 | :License: BSD
14 | 
15 | Contents:
16 | 
17 | .. toctree::
18 |    :maxdepth: 2
19 | 
20 |    screed
21 |    example
22 | 
23 |    dev/index
24 |    release-notes/index
25 |    user/known-issues
26 | 
27 |    CODE_OF_CONDUCT
28 |    LICENSE
29 | 
30 | Indices and tables
31 | ==================
32 | 
33 | * :ref:`genindex`
34 | * :ref:`modindex`
35 | * :ref:`search`
36 | 


--------------------------------------------------------------------------------
/doc/release-notes/RELEASE-0.5.rst:
--------------------------------------------------------------------------------
 1 | ============
 2 | Release v0.5
 3 | ============
 4 | 
 5 | We are proud to announce the release of screed v0.5. screed is a database engine
 6 | capable of storing and retriving short-read sequence data. screed is designed
 7 | to be fast and adaptable to different sequence file formats. This marks the
 8 | first release of screed which we consider stable and complete.
 9 | 
10 | Features:
11 |  - Read sequence data from FASTA/FASTQ files into screed databases
12 |  - Save screed databases back to FASTA/FASTQ files
13 |  - Lookup sequence data by index (offset) or name
14 |  - Native support for sequence substring slicing
15 |  - Convert between FASTA <-> FASTQ file formats
16 | 
17 | screed is written entirely in Python and uses the Sqlite database for backend
18 | storage. screed can be downloaded from the public git repository:
19 | http://github.com/acr/screed.git
20 | 
21 | screed is licensed under the BSD license which can be viewed in the
22 | doc/LICENSE.txt file.
23 | 


--------------------------------------------------------------------------------
/doc/release-notes/RELEASE-0.8.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Release v0.8
 3 | 
 4 | We are pleased to announce the release of Screed v0.8. Screed is a database
 5 | engine capable of storing and retrieving short-read sequence data and is
 6 | designed to be fast and adaptable to different sequence file formats.
 7 | 
 8 | This version of Screed contains developer documentation for contributing to the
 9 | Screed project and a code of conduct for interacting with other contributors
10 | and project maintainers. Documentation is available at
11 | http://screed.readthedocs.org/en/v0.8/
12 | 
13 | ## New items of note:
14 | 
15 | This release successfully installs and passes its unit tests on
16 | Ubuntu 14.04 and the latest release of Mac OS X 10 "Yosemite". It
17 | also passes the khmer acceptance tests as per the [eelpond testing
18 | protocol.](https://github.com/dib-lab/literate-resting/blob/master/kp/README.txt)
19 | 
20 | This release of screed has renamed the 'accuracy' attribute of read records to
21 | 'quality;' this API change will need to be adopted by all users wanting to
22 | upgrade to this version. Unlike the khmer project, Screed is not currently
23 | under semantic versioning. It will be with the 1.0 release.
24 | 
25 |  - Screed now has automatic compression detection via magic bit sniffing
26 |  for gzip and bzip2 compressed files (from @mr-c in dib-lab/khmer#432)
27 |  - Screed now supports streaming of uncompressed FASTA and FASTQ formatted
28 |  nucleotide sequence data. bzip2 compressed FASTA and FASTQ formatted
29 |  nucleotide sequence data can also be streamed but not gzip compressed
30 |  FASTA and FASTQ formatted nucleotide sequence data. (from @mr-c, see
31 |  dib-lab/khmer#633)
32 |  - Screed now has a Changelog, developer documentation and a code of conduct
33 |  (from @ctb, @mr-c, @bocajnotnef in dib-lab/khmer#625)
34 |  - Versions are now autogenerated using git tags via Versioneer (from
35 |  @bocajnotnef in cadceb5)
36 |  - Documentation is now autogenerated using Doxygen (from @mr-c in d8ed05b)
37 | 
38 | ## Notable bugs fixed/issues closed:
39 |  - A khmer script was not accepting reads on the stdin dib-lab/khmer#633
40 |  by @mr-c
41 |  - screed returning the wrong version and breaking dev installs
42 |  dib-lab/khmer#803 by @mr-c
43 | 
44 | 
45 | ## Known Issues
46 | 
47 | These are all pre-existing
48 | 
49 |  - Screed records cannot be sliced requiring un-Pythonic techniques to achieve
50 |  the same behavior. This will be included in a future release. This is being
51 |  tracked in dib-lab/khmer#768
52 |  - Screed self-tests do not use a temporary directory which causes
53 |  tests run from package-based installs to fail. This is being tracked in
54 |  dib-lab/khmer#748
55 |  - Screed does not support gzip file streaming. This is an issue with Python
56 |  2.x and will likely *not* be fixed in future releases. This is being tracked
57 |  in dib-lab/khmer#700
58 |  - Screed is overly tolerant of spaces in fast{a,q} which is against spec. This
59 |  is being tracked in dib-lab/khmer#108
60 | 
61 | ## Contributors
62 | 
63 | @bocajnotnef @mr-c @brtaylor92 @wrightmhw @kdmurray91 @luizirber @ctb
64 | 
65 | 


--------------------------------------------------------------------------------
/doc/release-notes/RELEASE-0.8.rst:
--------------------------------------------------------------------------------
 1 | Release v0.8
 2 | ============
 3 | 
 4 | We are pleased to announce the release of Screed v0.8. Screed is a
 5 | database engine capable of storing and retrieving short-read sequence
 6 | data and is designed to be fast and adaptable to different sequence file
 7 | formats.
 8 | 
 9 | This version of Screed contains developer documentation for contributing
10 | to the Screed project and a code of conduct for interacting with other
11 | contributors and project maintainers. Documentation is available at
12 | http://screed.readthedocs.org/en/v0.8/
13 | 
14 | New items of note:
15 | ------------------
16 | 
17 | This release successfully installs and passes its unit tests on Ubuntu
18 | 14.04 and the latest release of Mac OS X 10 "Yosemite". It also passes
19 | the khmer acceptance tests as per the `eelpond testing
20 | protocol. <https://github.com/dib-lab/literate-resting/blob/master/kp/README.txt>`__
21 | 
22 | This release of screed has renamed the 'accuracy' attribute of read
23 | records to 'quality;' this API change will need to be adopted by all
24 | users wanting to upgrade to this version. Unlike the khmer project,
25 | Screed is not currently under semantic versioning. It will be with the
26 | 1.0 release.
27 | 
28 | -  Screed now has automatic compression detection via magic bit sniffing
29 |    for gzip and bzip2 compressed files (from @mr-c in dib-lab/khmer#432)
30 | -  Screed now supports streaming of uncompressed FASTA and FASTQ
31 |    formatted nucleotide sequence data. bzip2 compressed FASTA and FASTQ
32 |    formatted nucleotide sequence data can also be streamed but not gzip
33 |    compressed FASTA and FASTQ formatted nucleotide sequence data. (from
34 |    @mr-c, see dib-lab/khmer#633)
35 | -  Screed now has a Changelog, developer documentation and a code of
36 |    conduct (from @ctb, @mr-c, @bocajnotnef in dib-lab/khmer#625)
37 | -  Versions are now autogenerated using git tags via Versioneer (from
38 |    @bocajnotnef in cadceb5)
39 | -  Documentation is now autogenerated using Doxygen (from @mr-c in
40 |    d8ed05b)
41 | 
42 | Notable bugs fixed/issues closed:
43 | ---------------------------------
44 | 
45 | -  A khmer script was not accepting reads on the stdin dib-lab/khmer#633
46 |    by @mr-c
47 | -  screed returning the wrong version and breaking dev installs
48 |    dib-lab/khmer#803 by @mr-c
49 | 
50 | Known Issues
51 | ------------
52 | 
53 | These are all pre-existing
54 | 
55 | -  Screed records cannot be sliced requiring un-Pythonic techniques to
56 |    achieve the same behavior. This will be included in a future release.
57 |    This is being tracked in dib-lab/khmer#768
58 | -  Screed self-tests do not use a temporary directory which causes tests
59 |    run from package-based installs to fail. This is being tracked in
60 |    dib-lab/khmer#748
61 | -  Screed does not support gzip file streaming. This is an issue with
62 |    Python 2.x and will likely *not* be fixed in future releases. This is
63 |    being tracked in dib-lab/khmer#700
64 | -  Screed is overly tolerant of spaces in fast{a,q} which is against
65 |    spec. This is being tracked in dib-lab/khmer#108
66 | 
67 | Contributors
68 | ------------
69 | 
70 | @bocajnotnef @mr-c @brtaylor92 @wrightmhw @kdmurray91 @luizirber @ctb
71 | 


--------------------------------------------------------------------------------
/doc/release-notes/RELEASE-0.9.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Release v0.9
 3 | 
 4 | We are pleased to announce the release of Screed v0.9. Screed is a database
 5 | engine capable of storing and retrieving short-read sequence data and is
 6 | designed to be fast and adaptable to different sequence file formats.
 7 | 
 8 | This version of Screed features Python 3 syntax with compatibility with Python 2. Additional changes have broken backwards compatibility in several small ways in preparation for our 1.0 release and adoption of strict semantic versioning from there on out.
 9 | 
10 | It is also the first release since our move to the University of Davis, California and also under our new name, the Lab for Data Intensive Biology.
11 | 
12 | Documentation is available at http://screed.readthedocs.org/en/v0.9/
13 | 
14 | ## New items of note:
15 | 
16 | - Now a primarily Python 3 codebase with Python 2 compatibility. https://github.com/dib-lab/screed/pull/41 @luizirber & @mr-c
17 | 
18 | - Tests now correctly run using temporary directories and the test data is now shipped allowing the tests to be run after installation. https://github.com/dib-lab/screed/pull/30 @bocajnotnef https://github.com/dib-lab/screed/pull/40 @mr-c
19 | - The private method `screed/screedRecord._screed_record_dict()` has been renamed to `screed.screedRecord.Record()`. This is **not** a backwards compatible change. https://github.com/dib-lab/screed/pull/35 @sguermond
20 | - `screed.open()` now accepts `-` as a synonym for STDIN and is now an (optional) context manager. It no longer defaults to parsing out a separate description from the name. The description field will br removed altogether from the next release. This is **not** a backwards compatible change. https://github.com/dib-lab/screed/pull/36 @anotherthomas  https://github.com/dib-lab/screed/pull/39 https://github.com/dib-lab/screed/pull/41 @luizirber https://github.com/dib-lab/screed/pull/43 @ctb 
21 | - The FASTQ parser was improved and it no longer hangs in the presence of empty lines. https://github.com/dib-lab/screed/pull/38 @proteasome
22 | - Screed records now slice correctly https://github.com/dib-lab/screed/pull/41 @wrightmhw @luizirber 
23 | 
24 | 
25 | ## Other bugs fixed/issues closed:
26 | 
27 | - Release notes are now a part of the documentation. https://github.com/dib-lab/screed/pull/33 @bocajnotnef 
28 | - A test was made more robust to prevent hangs. https://github.com/dib-lab/screed/pull/37 @anotherthomas 
29 | 
30 | ## Known Issues
31 | 
32 | These are all pre-existing
33 | 
34 |  - Screed does not support gzip file streaming. This is an issue with Python 2.x and will likely *not* be fixed in future releases. This is being tracked in ged-lab/khmer#700
35 |  - Screed is overly tolerant of spaces in fast{a,q} which is against spec. This is being tracked in ged-lab/khmer#108
36 |  
37 | ## Contributors
38 | 
39 | @luizirber @mr-c @bocajnotnef @ctb \*@proteasome \*@anotherthomas \*@sguermond 
40 | 
41 | \* Indicates new contributors
42 | 


--------------------------------------------------------------------------------
/doc/release-notes/RELEASE-0.9.rst:
--------------------------------------------------------------------------------
 1 | Release v0.9
 2 | ============
 3 | 
 4 | We are pleased to announce the release of Screed v0.9. Screed is a
 5 | database engine capable of storing and retrieving short-read sequence
 6 | data and is designed to be fast and adaptable to different sequence file
 7 | formats.
 8 | 
 9 | This version of Screed features Python 3 syntax with compatibility with
10 | Python 2. Additional changes have broken backwards compatibility in
11 | several small ways in preparation for our 1.0 release and adoption of
12 | strict semantic versioning from there on out.
13 | 
14 | It is also the first release since our move to the University of Davis,
15 | California and also under our new name, the Lab for Data Intensive
16 | Biology.
17 | 
18 | Documentation is available at http://screed.readthedocs.org/en/v0.9/
19 | 
20 | New items of note:
21 | ------------------
22 | 
23 | -  Now a primarily Python 3 codebase with Python 2 compatibility.
24 |    https://github.com/dib-lab/screed/pull/41 @luizirber & @mr-c
25 | 
26 | -  Tests now correctly run using temporary directories and the test data
27 |    is now shipped allowing the tests to be run after installation.
28 |    https://github.com/dib-lab/screed/pull/30 @bocajnotnef
29 |    https://github.com/dib-lab/screed/pull/40 @mr-c
30 | -  The private method ``screed/screedRecord._screed_record_dict()`` has
31 |    been renamed to ``screed.screedRecord.Record()``. This is **not** a
32 |    backwards compatible change.
33 |    https://github.com/dib-lab/screed/pull/35 @sguermond
34 | -  ``screed.open()`` now accepts ``-`` as a synonym for STDIN and is now
35 |    an (optional) context manager. It no longer defaults to parsing out a
36 |    separate description from the name. The description field will br
37 |    removed altogether from the next release. This is **not** a backwards
38 |    compatible change. https://github.com/dib-lab/screed/pull/36
39 |    @anotherthomas https://github.com/dib-lab/screed/pull/39
40 |    https://github.com/dib-lab/screed/pull/41 @luizirber
41 |    https://github.com/dib-lab/screed/pull/43 @ctb
42 | -  The FASTQ parser was improved and it no longer hangs in the presence
43 |    of empty lines. https://github.com/dib-lab/screed/pull/38 @proteasome
44 | -  Screed records now slice correctly
45 |    https://github.com/dib-lab/screed/pull/41 @wrightmhw @luizirber
46 | 
47 | Other bugs fixed/issues closed:
48 | -------------------------------
49 | 
50 | -  Release notes are now a part of the documentation.
51 |    https://github.com/dib-lab/screed/pull/33 @bocajnotnef
52 | -  A test was made more robust to prevent hangs.
53 |    https://github.com/dib-lab/screed/pull/37 @anotherthomas
54 | 
55 | Known Issues
56 | ------------
57 | 
58 | These are all pre-existing
59 | 
60 | -  Screed does not support gzip file streaming. This is an issue with
61 |    Python 2.x and will likely *not* be fixed in future releases. This is
62 |    being tracked in ged-lab/khmer#700
63 | -  Screed is overly tolerant of spaces in fast{a,q} which is against
64 |    spec. This is being tracked in ged-lab/khmer#108
65 | 
66 | Contributors
67 | ------------
68 | 
69 | @luizirber @mr-c @bocajnotnef @ctb \*@proteasome \*@anotherthomas
70 | \*@sguermond
71 | 
72 | \* Indicates new contributors
73 | 


--------------------------------------------------------------------------------
/doc/release-notes/index.rst:
--------------------------------------------------------------------------------
 1 | .. vim set filetype=rst
 2 | 
 3 | Release notes for past versions of screed
 4 | =========================================
 5 | 
 6 | Contents:
 7 | 
 8 | .. toctree::
 9 |    :maxdepth: 1
10 | 
11 |    RELEASE-0.5
12 |    RELEASE-0.8
13 | 


--------------------------------------------------------------------------------
/doc/run-doctests.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | import doctest
 3 | import sys
 4 | 
 5 | for filename in sys.argv[1:]:
 6 |     print '... running doctests on', filename
 7 |     doctest.testfile(filename)
 8 | 
 9 | print '*** SUCCESS ***'
10 | 


--------------------------------------------------------------------------------
/doc/screed.rst:
--------------------------------------------------------------------------------
  1 | ===========
  2 | User Manual
  3 | ===========
  4 | 
  5 | .. note::
  6 | 
  7 |    Some doctests are included in :doc:`example`. The examples in this
  8 |    document are meant for human consumption only. They will not work in
  9 |    doctests!
 10 | 
 11 | screed parses FASTA and FASTQ files, generates databases, and lets you query
 12 | these databases. Values such as sequence name, sequence description, sequence
 13 | quality, and the sequence itself can be retrieved from these databases.
 14 | 
 15 | Installation
 16 | ============
 17 | 
 18 | The following software packages are required to run screed:
 19 | 
 20 | * Python 2 (2.7) or Python 3 (3.3 or newer)
 21 | * pytest (only required to running tests)
 22 | 
 23 | Use pip to download, and install Screed and its dependencies::
 24 | 
 25 |     pip install screed
 26 | 
 27 | To run the optional tests type::
 28 | 
 29 |     python -m screed.tests
 30 | 
 31 | Command-line Quick Start
 32 | ========================
 33 | 
 34 | Creating a database
 35 | -------------------
 36 | 
 37 | .. code::
 38 | 
 39 |     $ screed db <fasta/fastq file>
 40 | 
 41 | Dumping a database to a file
 42 | ----------------------------
 43 | 
 44 | .. code::
 45 | 
 46 |     $ screed dump_fasta <screed database file> <fasta output>
 47 |     $ screed dump_fastq <screed database file> <fastq output>
 48 | 
 49 | If no output file is provided, sequences are written to the terminal (stdout) by
 50 | default.
 51 | 
 52 | Python Quick Start
 53 | ==================
 54 | 
 55 | Reading FASTA/FASTQ files
 56 | -------------------------
 57 | 
 58 |    >>> import screed
 59 |    >>> with screed.open(filename) as seqfile:
 60 |    >>>     for read in seqfile:
 61 |    ...         print(read.name, read.sequence)
 62 | 
 63 | Here, :code:`filename` can be a FASTA or FASTQ file, and can be uncompressed,
 64 | gzip-compressed, or bzip2-compressed. screed natively supports FASTA and FASTQ
 65 | databases creation. If your sequences are in a different format see the
 66 | developer documentation on :doc:`dev/parsers`.
 67 | 
 68 | Creating a database
 69 | -------------------
 70 | 
 71 |     >>> import screed
 72 |     >>> screed.make_db('screed/tests/test-data/test.fa')
 73 | 
 74 | This loads a FASTA file :code:`screed/tests/test-data/test.fa` into a screed database
 75 | named :code:`screed/tests/test-data/test.fa_screed`. A couple of things to note:
 76 | 
 77 | * The screed database is independent of the text file from which it was derived,
 78 |   so moving, renaming or deleting :code:`screed/tests/test-data/test.fa` will not affect
 79 |   the newly created database.
 80 | * The :code:`make_db` function inferred the file type as FASTA automatically.
 81 |   The :code:`read_fasta_sequences()` and :code:`read_fastq_sequences()`
 82 |   functions are available if you'd prefer to be explicit.
 83 | 
 84 |     >>> screed.read_fasta_sequences('screed/tests/test-data/test.fasta')
 85 |     >>> screed.read_fastq_sequences('screed/tests/test-data/test.fastq')
 86 | 
 87 | Opening a database
 88 | ------------------
 89 | 
 90 | The class :code:`ScreedDB` is used to read screed databases, regardless of what
 91 | file format they were derived from (FASTA/FASTQ/hava/etc.). One reader to rule
 92 | them all!
 93 | 
 94 | From the Python prompt, import the ScreedDB class and load some databases::
 95 | 
 96 |     >>> from screed import ScreedDB
 97 |     >>> fadb = ScreedDB('screed/tests/test-data/test.fa')
 98 |     >>> fqdb = ScreedDB('screed/tests/test-data/test.fastq')
 99 | 
100 | Notice how you didn't need to write the '_screed' at the end of the file names?
101 | screed automatically adds that to the file name if you didn't.
102 | 
103 | Database dictionary interface
104 | -----------------------------
105 | 
106 | Since screed emulates a read-only dictionary interface, any methods that don't
107 | modify a dictionary are supported::
108 | 
109 |     >>> fadb.keys()
110 |     >>> fqdb.keys()
111 | 
112 | Each record in the database contains 'fields' such as name and sequence
113 | information. If the database was derived from a FASTQ file, quality and optional
114 | annotation strings are included. Conversely, FASTA-derived databases have a
115 | description field.
116 | 
117 | To retrieve the names of records in the database::
118 | 
119 |     >>> names = fadb.keys()
120 | 
121 | The size of the databases (number of sequence records) is easily found::
122 | 
123 |     >>> len(fadb)
124 |     22
125 |     >>> len(fqdb)
126 |     125
127 | 
128 | Retrieving records from a database
129 | ----------------------------------
130 | 
131 | A record is the standard container unit in screed. Each has *fields* that vary
132 | slightly depending on what kind of file the database was derived from. For
133 | instance, a FASTQ-derived screed database has an id, a name, a quality score and
134 | a sequence. A FASTA-derived screed database has an id, name, description and a
135 | sequence.
136 | 
137 | Retrieving entire records::
138 | 
139 |     >>> records = [r for r in fadb.itervalues()]
140 | 
141 | Each record is a dictionary of fields. The names of fields are keys into this
142 | dictionary with the actual information as values. For example::
143 | 
144 |     >>> record = fadb[fadb.keys()[0]]
145 |     >>> index = record['id']
146 |     >>> name = record['name']
147 |     >>> description = record['description']
148 |     >>> sequence = record['sequence']
149 | 
150 | What this does is retrieve the first record object in the screed database, then
151 | retrieve the index, name, description and sequence from the record object using
152 | standard dictionary key -> value pairs.
153 | 
154 | Retrieving partial sequences (slicing)
155 | --------------------------------------
156 | 
157 | screed supports the concept of retrieving a *slice* or a subset of a sequence
158 | string. The motivation is speed: if you have a database entry with a very long
159 | sequence string but only want a small portion of the string, it is faster to
160 | retrieve only the portion than to retrieve the entire string and then perform
161 | standard Python string slicing.
162 | 
163 | By default, screed's FASTA database creator sets up the :code:`sequence` column
164 | to support slicing. For example, if you have an entry with name :code:`someSeq`
165 | which has a 10K long sequence, and you want a slice of the sequence spanning
166 | positions 4000 to 4080::
167 | 
168 |     >>> seq = db['someSeq'].sequence
169 |     >>> slice = seq[4000:4080]
170 | 
171 | This is much faster than say::
172 | 
173 |     >>> seq = str(db['someSeq'].sequence)
174 |     >>> slice = seq[4000:4080]
175 | 
176 | Because deep down, less information is being read off the disk. The :code`str()`
177 | method above causes the entire sequence to be retrieved as a string. Then Python
178 | slicing is done on the string :code:`seq` and the subset stored in
179 | :code:`slice`.
180 | 
181 | Retrieving records *via* index
182 | ------------------------------
183 | 
184 | Sometimes you don't care what the name of a sequence is; you're only interested
185 | in its position in the database. In these cases, retrieval via index is the
186 | method you'll want to use::
187 | 
188 |     >>> record = fqdb.loadRecordByIndex(5)
189 | 
190 | An index is like an offset into the database. The order records were kept in the
191 | FASTA or FASTQ file determines the index in their resulting screed database. The
192 | first record in a sequence file will have an index of 0, the second, an index of
193 | 1 and so on.
194 | 
195 | File Formats As Understood By Screed
196 | ====================================
197 | 
198 | While the screed database remains non-specific to file formats, the included
199 | FASTA and FASTQ parsers expect specific formats. These parsers attempt to handle
200 | the most common attributes of sequence files, though they can not support all
201 | features.
202 | 
203 | FASTQ
204 | -----
205 | 
206 | The FASTQ parsing function is :code:`read_fastq_sequences()` and is located in
207 | the screed module.
208 | 
209 | The first line in a record must begin with '@' and is followed by a record
210 | identifier (a name). An optional annotations string may be included after a
211 | space on the same line.
212 | 
213 | The second line begins the sequence line(s) which may be line wrapped. screed
214 | defines no limit on the length of sequence lines and no length on how many
215 | sequence lines a record may contain.
216 | 
217 | After the sequence line(s) comes a '+' character on a new line. Some FASTQ
218 | formats require the first line to be repeated after the '+' character, but since
219 | this adds no new information to the record, :code:`read_fastq_sequences()` will
220 | ignore this if it is included.
221 | 
222 | The quality line(s) is last. Like the sequence line(s) this may be line wrapped.
223 | :code:`read_fastq_sequences()` will raise an exception if the quality and
224 | sequence strings are of unequal length. screed performs no checking for valid
225 | quality scores.
226 | 
227 | FASTA
228 | -----
229 | 
230 | The FASTA parsing function is read_fasta_sequences() and is also located in the
231 | screed module.
232 | 
233 | The first line in a record must begin with '>' and is followed with the
234 | sequence's name and an optional description. If the description is included, it
235 | is separated from the name with a space. Note that though the FASTA format
236 | doesn't require named records, screed does. Without a unique name, screed can't
237 | look up sequences by name.
238 | 
239 | The second line begins the line(s) of sequence. Like the FASTQ parser,
240 | :code:`read_fasta_sequences()` allows any number of lines of any length.
241 | 
242 | FASTA <-> FASTQ Conversion
243 | ==========================
244 | 
245 | As an extra nicety, screed can convert FASTA files to FASTQ and back again.
246 | 
247 | FASTA to FASTQ
248 | --------------
249 | 
250 | The function used for this process is called 'ToFastq' and is located
251 | in the screed module. It takes the path to a screed database as the
252 | first argument and a path to the desired FASTQ file as the second
253 | argument. There is also a shell interface if the screed module is in
254 | your PYTHONPATH::
255 | 
256 |     $ python -m screed dump_fastq <path to fasta db> [ <converted fastq file> ]
257 | 
258 | The FASTA name attribute is directly dumped from the file. The
259 | sequence attribute is also dumped pretty much directly, but is line
260 | wrapped to 80 characters if it is longer.
261 | 
262 | Any description line in the FASTA database is stored as a FASTQ annotation
263 | string with no other interpretation done.
264 | 
265 | Finally, as there is no quality or quality score in a FASTA file, a
266 | default one is generated. The generation of the quality follows the
267 | Sanger FASTQ conventions. The score is 1 (ASCII: '"') meaning a
268 | probability of about 75% that the read is incorrect (1 in 4
269 | chance). This PHRED quality score is calculated from the Sanger
270 | format: Q = -10log(p) where p is the probability of an incorrect
271 | read. Obviously this is a very rough way of providing a quality score
272 | and it is only intended to fill in the requirements of a FASTQ
273 | file. Any application needing a true measurement of the quality
274 | should not rely on this automatic conversion.
275 | 
276 | FASTQ to FASTA
277 | --------------
278 | 
279 | The function used for this process is called 'toFasta' and is located
280 | in the screed module. It takes the path to a screed database as the
281 | first argument and a path to the desired FASTA file as the second
282 | argument. Like the ToFastq function before, there is a shell interface
283 | to ToFasta if the screed module is in your PYTHONPATH::
284 | 
285 |     $ python -m screed dump_fasta <path to fastq db> [ <converted fasta file> ]
286 | 
287 | As above, the name and sequence attributes are directly dumped from
288 | the FASTQ database to the FASTA file with the sequence line wrapping
289 | to 80 characters.
290 | 
291 | If it exists, the FASTQ annotation tag is stored as the FASTA description tag.
292 | As there is no equivalent in FASTA, the FASTQ quality score is ignored.
293 | 


--------------------------------------------------------------------------------
/doc/user/known-issues.rst:
--------------------------------------------------------------------------------
 1 | .. vim: set filetype=rst
 2 | 
 3 | ============
 4 | Known Issues
 5 | ============
 6 | 
 7 | This document details the known issues in the current release of screed. All
 8 | issues for screed are tracked at https://github.com/dib-lab/khmer/labels/screed
 9 | 
10 | List of known issues
11 | ====================
12 | 
13 | Screed does not support gzip file streaming. This is an issue
14 | with Python 2.x and will likely *not* be fixed in future
15 | releases. https://github.com/dib-lab/khmer/issues/700
16 | 
17 | Screed is overly tolerant of spaces in fast{q,a} which is against
18 | spec. https://github.com/dib-lab/khmer/issues/108
19 | 


--------------------------------------------------------------------------------
/legacy/ChangeLog:
--------------------------------------------------------------------------------
  1 | 2016-11-14  Daniel Standage  <daniel.standage@gmail.com>
  2 | 
  3 |    * screed/{__init__.py,fasta.py,fastq.py,openscreed.py}: Remove implementation
  4 |      (and related references) of Writer classes.
  5 |    * screed/tests/{test_open.py,test_open_cm.py}: Remove Writer tests.
  6 |    * screed/{screedRecord.py,{tests/test_{fasta,fastq}.py}}: New write_fastx
  7 |      implementation and associated tests.
  8 | 
  9 | 2016-11-14  Luiz Irber  <khmer@luizirber.org>
 10 | 
 11 |    * Makefile,doc/dev/release-checklist.rst,doc/screed.rst,jenkins-build.sh,
 12 |      pytest.ini, setup.{cfg,py}, tox.ini, screed/tests/{__main__,havaGen,
 13 |      screed_tst_utils,test_pygr_api, test_streaming}: Replace nose and adapt
 14 |      for pytest.
 15 | 
 16 | 2016-10-13  Daniel Standage  <daniel.standage@gmail.com>
 17 | 
 18 |    * .travis.yml: Reduce the size of the CI build.
 19 | 
 20 | 2016-10-07  Luiz Irber  <khmer@luizirber.org>
 21 | 
 22 |    * screed/f{a,q}dbm.py: Fix import errors on Python 3.
 23 |    * screed/tests/test_shell.py: check for return code and rewrite the test to
 24 |    work more like the expected usage in the shell.
 25 | 
 26 | 2016-10-06  Luiz Irber  <khmer@luizirber.org>
 27 | 
 28 |    * tox.ini: Use codecov for coverage reports, add Python 3.5 to builds.
 29 |    * .travis.yml: Activate Python 3.5 build.
 30 |    * .github/{CONTRIBUTING,PULL_REQUEST_TEMPLATE}.md: Add GitHub templates.
 31 |    * Makefile: Throw an error If there are pep8 warnings.
 32 |    * doc/dev/coding-guidelinbes-and-review.rst: Update checklist
 33 |    * screed/{dna,openscreed,tests/test_attriberror}.py: Fix pep8 warnings.
 34 | 
 35 | 2016-10-04  Luiz Irber  <khmer@luizirber.org>
 36 | 
 37 |    * screed/screedRecord.py: Implement comparison using total_ordering
 38 |    decorator from functools.
 39 |    * screed/tests/test_attriberror.py: Fix syntax errors for Python 3 and
 40 |    remove tests for not implemented methods (they are implemented now).
 41 | 
 42 | 2016-06-10  Titus Brown  <titus@idyll.org>
 43 | 
 44 |    * screed/dna.py: Fix reverse complement calculation for Python 2.7
 45 | 
 46 | 2015-06-22  Jacob Fenton  <bocajnotnef@gmail.com>
 47 | 
 48 |    * screed/tests/test_attriberror.py: added tests to check screed db attribute
 49 |    exception throwing
 50 |    * screed/screedRecord.py: removed __cmp__ function, explicitly disallowed
 51 |    all rich comparator functions that aren't == or !=
 52 | 
 53 | 2015-06-10  Michael R. Crusoe  <crusoe@ucdavis.edu>
 54 | 
 55 |    * doc/user/known-issues.rst: removed two fixed issues
 56 |    * doc/screed.rst: updated install & test instructions
 57 |    * doc/screed.html: removed un-needed file
 58 | 
 59 | 2015-06-05  Titus Brown  <titus@idyll.org>
 60 | 
 61 |    * screed/{fasta.py,fastq.py,seqparse.py}: Set parse_description default
 62 |    to False.
 63 |    * screed/tests/test*.py: updated tests appropriately.
 64 | 
 65 | 2015-06-05  Luiz Irber  <screed@luizirber.org>
 66 | 
 67 |    * screed/screedRecord.py: Simplify implementation of record slicing.
 68 |    * screed/tests/test_fast{a,q}.py: Loop over distinct slices during test.
 69 | 
 70 | 2015-06-05  Michael Wright  <wrigh517@msu.edu>
 71 | 
 72 |    * screed/screedRecord.py: Allow slicing of screed records to fix issue #768
 73 | 
 74 | 2015-06-05  en zyme  <en_zyme@outlook.com>
 75 | 
 76 |    * screed/tests/fastq.py: check for empty line in two places
 77 | 
 78 | 2015-05-29  Luiz Irber  <screed@luizirber.org>
 79 | 
 80 |    * screed/openscreed.py: Add missing "close" method to context manager.
 81 | 
 82 | 2015-05-27  Michael R. Crusoe  <mcrusoe@msu.edu>
 83 | 
 84 |    * MANIFEST.in: ship the recently relocated test data, fixed reference to
 85 |    renamed LICENSE file
 86 |    * doc/dev/CODE_OF_CONDUCT.rst: drop unused symlink
 87 |    * doc/dev/release-checklist.rst: fix line wrap
 88 | 
 89 | 2015-05-12  Luiz Irber  <screed@luizirber.org>
 90 | 
 91 |    * screed/openscreed.py: Implement open as a context manager, keep backward
 92 |    compatibility.
 93 |    * screed/tests/test_open_cm.py: Add same tests as test_open.py, but using
 94 |    a context manager to make sure file is closed after being used.
 95 | 
 96 | 2015-04-15  Thomas Fenzl  <thomas.fenzl@gmx.net>
 97 | 
 98 |    * screed/tests/screed_tst_utils.py: removed unnecessary import
 99 |    * screed/tests/test_streaming.py: changed execution order to handle
100 |    missing import files better
101 |    * screed/openscreed.py: pylint-ified
102 | 
103 | 2015-04-15  Thomas Fenzl  <thomas.fenzl@gmx.net>
104 | 
105 |    * Makefile: added setup.py develop to test goal
106 |    * screed/openscreed.py,screed/tests/test_open.py: added handling of '-'
107 | 
108 | 2015-04-09  Sarah Guermond  <sarah.guermond@gmail.com>
109 | 
110 |    * screed/screedRecord.py: renamed _screed_record_dict() to Rename()
111 |    * screed/__init__.py: added import for Record
112 |    * screed/fasta.py: changed _screed_record_dict() to Rename()
113 |    * screed/fastq.py: changed _screed_record_dict() to Rename()
114 | 
115 | 2015-04-09  Jacob Fenton  <bocajnotnef@gmail.com>
116 | 
117 |    * doc/dev/release-checklist.txt: added "making final release" notes
118 |    * Makefile: copied over @mr-c's md-to-rst release notes conversion target
119 |    * doc/dev/release-notes/RELEASE-0.8.txt: added rst version of release notes
120 |    for sphinx
121 |    * doc/dev/release-notes/index.txt: added rst version of 0.8 release notes to
122 |    toctree
123 | 
124 | 2015-04-07  Jacob Fenton  <bocajnotnef@gmail.com>
125 | 
126 |    * screed/tests/test_{dictionary, fasta, fasta_recover, fastq, fastq_recover,
127 |    hava_methods, shell}.py: changed tests to use tempdirs
128 |    * screed/tests/screed_tst_utils.py: copied in khmer test utils
129 |    * screed/tests/{empty.fa, test-whitespace.fa, test.fa, test.fa.bz2,
130 |    test.fa.gz, test.fa.zip, test.fastq, test.fastq.bz2, test.hava}: moved test
131 |    data to screed/tests/test-data/ directory
132 | 
133 | 2015-04-04  Jacob Fenton  <bocajnotnef@gmail.com>
134 | 
135 |    * doc/dev/release{.txt,-checklist.txt}: renamed/restored release.txt to
136 |    release-checklist.txt
137 | 
138 | 2015-03-06  Kevin Murray  <spam@kdmurray.id.au>
139 | 
140 |    * screed/screedRecord.py: Fix a typo in a try: except: block.
141 |    s/AttributError/AttributeError/
142 | 
143 | 2015-02-23  Gabriel Pratt  <gpratt@ucsd.edu>
144 | 
145 |    * Fixed Issue 705 len(read) != len(read.sequence)
146 | 
147 | 2015-02-23  Michael R. Crusoe  <mcrusoe@msu.edu>
148 | 
149 |    * Doxyfile.in: make documentation generation reproducible; removed
150 |    timestamp
151 | 
152 | 2015-02-23  Michael R. Crusoe  <mcrusoe@msu.edu>
153 | 
154 |    * doc/dev/release.txt: Fix formatting
155 |    * MANIFEST.in: include the MANIFEST.in template, the license and other
156 |    files
157 |    * versioneer.py,screed/{__init__,_version}.py: upgrade versioneer to 0.13
158 | 
159 | 2015-02-23  Michael R. Crusoe  <mcrusoe@msu.edu>
160 | 
161 |    * setup.py: work around versioneer bug:
162 |    https://github.com/warner/python-versioneer/issues/52
163 | 
164 | 2014-12-07  Michael R. Crusoe  <mcrusoe@msu.edu>
165 | 
166 |    * Initial jenkins-build.sh
167 | 
168 | 2014-12-03  Jacob Fenton  <bocajnotnef@gmail.com>
169 | 
170 |    * ChangeLog: updated to include major revisions since 0.7.1
171 |    * CODE_OF_CONDUCT: copied in code of conduct from khmer project
172 |    * docs/dev/{CODE_OF_CONDUCT, coding-guidelines-and-review, index,
173 |    releases}.txt, docs/index.txt: added screed dev docs
174 |    * setup.py, .gitattributes, __init__.py, _version.py, versioneer.py:
175 |    installed versioneer version naming system
176 |    * MANIFEST.in: now includes versioneer files and empty testing file
177 | 
178 | 2014-11-02  Michael R. Crusoe  <mcrusoe@msu.edu>
179 | 
180 |    * Doxyfile, Makefile: added Doxygen support, coverage & pylint make targets
181 | 
182 | 2014-10-27  Ben Taylor  <taylo886@msu.edu>
183 | 
184 |    * benchmarks/fqGen.py, benchmarks/fqToFaConvert.py,
185 |    benchmarks/mysql/mysqlCreateTimeit.py, benchmarks/screedCreateTimeit.py,
186 |    benchmarks/pgres/pgresCreateTimeit.py, gibtests/__init__.py,
187 |    doc/screed.html, doc/screed.txt, screed/conversion.py, screed/fastq.py,
188 |    screed/tests/test_fastq.py: Changed all uses of "accuracy" to "quality"
189 |    * screed/tests/test_dna.py: Added basic test coverage for screed/dna.py
190 | 
191 | 2014-05-16  Michael R. Crusoe  <mcrusoe@msu.edu>
192 | 
193 |    * screed/openscreed.py: added sniffing of compression types, including zip
194 | 


--------------------------------------------------------------------------------
/legacy/jenkins-build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if type python2> /dev/null 2>&1
 4 | then
 5 |     PYTHON_EXECUTABLE=$(which python2)
 6 | else
 7 |     PYTHON_EXECUTABLE=$(which python)
 8 | fi
 9 | virtualenv -p ${PYTHON_EXECUTABLE} .env
10 | 
11 | . .env/bin/activate
12 | make install-dependencies > install_dependencies.out
13 | make develop
14 | make coverage.xml
15 | make tests.xml
16 | if type doxygen >/dev/null 2>&1
17 | then
18 |         make doxygen 2>&1 > doxygen.out
19 | fi
20 | make pylint_report.txt
21 | make pep8_report.txt
22 | if type sloccount >/dev/null 2>&1
23 | then
24 |         make sloccount.sc
25 | fi
26 | 
27 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = [
 3 |     "setuptools >= 48",
 4 |     "setuptools_scm[toml] >= 4, <6",
 5 |     "setuptools_scm_git_archive",
 6 |     "wheel >= 0.29.0",
 7 | ]
 8 | build-backend = 'setuptools.build_meta'
 9 | 
10 | [tool.setuptools_scm]
11 | write_to = "screed/version.py"
12 | git_describe_command = "git describe --dirty --tags --long --match v* --first-parent"
13 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | # If you change anything in addopts,
3 | # don't forget to update screed/tests/__main__.py too!
4 | addopts = -m "not known_failing" -v
5 | testpaths = screed/tests
6 | 


--------------------------------------------------------------------------------
/screed/DBConstants.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2008, Michigan State University.
 2 | 
 3 | """
 4 | Defines some constant strings identifications used in multiple
 5 | files throughout screed
 6 | """
 7 | 
 8 | # Name of table holding information about rest of db
 9 | _SCREEDADMIN = 'SCREEDADMIN'
10 | 
11 | # Names of _SCREEDADMIN columns
12 | _FIELDNAME = 'FIELDNAME'
13 | _ROLENAME = 'ROLE'
14 | _PRIMARY_KEY = 'id'
15 | 
16 | # Names of roles
17 | _STANDARD_TEXT = 'STANDARDATTR'
18 | _SLICEABLE_TEXT = 'SLICEABLEATTR'
19 | _INDEXED_TEXT_KEY = 'TEXTKEYATTR'
20 | _PRIMARY_KEY_ROLE = 'INTKEYATTR'
21 | 
22 | # Name of table holding sequence information
23 | _DICT_TABLE = 'DICTIONARY_TABLE'
24 | 
25 | # The file extension given to all screed databases
26 | fileExtension = '_screed'
27 | 


--------------------------------------------------------------------------------
/screed/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2008, Michigan State University.
 2 | # Copyright (c) 2021, University of California
 3 | 
 4 | """
 5 | screed is a database tool useful for retrieving arbitrary kinds of sequence
 6 | data through a on-disk database that emulates a read-only Python dictionary.
 7 | 
 8 | For opening a screed database, the 'ScreedDB' class is used. This class
 9 | accepts a string file path to a pre-created screed database. Read-only
10 | dictionary methods are implemented here.
11 | 
12 | For creating a screed database, the 'create_db' function is used. This
13 | function accepts an iterator as an argument which will yield records
14 | from its respective sequence file. create_db will sequentially pull
15 | records from the iterator, writing them to disk in a screed database
16 | until the iterator is done.
17 | 
18 | Automatic ways for parsing FASTA and FASTQ files are accessed through
19 | the read_fast*_sequences functions. These parse the given sequence
20 | file into a screed database.
21 | 
22 | Conversion between sequence file types is provided in the ToFastq and
23 | ToFasta functions
24 | """
25 | 
26 | from __future__ import absolute_import
27 | 
28 | from screed.openscreed import ScreedDB
29 | from screed.openscreed import Open as open
30 | from screed.conversion import ToFastq
31 | from screed.conversion import ToFasta
32 | from screed.createscreed import create_db, make_db
33 | from screed.seqparse import read_fastq_sequences
34 | from screed.seqparse import read_fasta_sequences
35 | from screed.dna import rc
36 | from screed.screedRecord import Record
37 | 
38 | 
39 | from importlib.metadata import version, PackageNotFoundError
40 | try:
41 |     VERSION = version(__name__)
42 | except PackageNotFoundError:  # pragma: no cover
43 |     try:
44 |         from .version import version as VERSION  # noqa
45 |     except ImportError:  # pragma: no cover
46 |         raise ImportError(
47 |             "Failed to find (autogenerated) version.py. "
48 |             "This might be because you are installing from GitHub's tarballs, "
49 |             "use the PyPI ones."
50 |             )
51 | __version__ = VERSION
52 | 


--------------------------------------------------------------------------------
/screed/__main__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright (c) 2016, The Regents of the University of California.
 4 | 
 5 | from __future__ import absolute_import, print_function
 6 | 
 7 | import argparse
 8 | import sys
 9 | 
10 | from . import createscreed
11 | from . import dump_fasta
12 | from . import dump_fastq
13 | 
14 | 
15 | class ScreedCommands(object):
16 | 
17 |     def __init__(self):
18 |         parser = argparse.ArgumentParser(
19 |             description="",
20 |             usage='''screed <command> [<args>]
21 | 
22 | Available:
23 | 
24 |     db <filename>               Creates a screed database.
25 |     dump_fasta <db> <output>    Convert a screed database to a FASTA file
26 |     dump_fastq <db> <output>    Convert a screed database to a FASTQ file
27 | 
28 | ''')
29 | 
30 |         commands = {
31 |             'db': createscreed.main,
32 |             'dump_fasta': dump_fasta.main,
33 |             'dump_fastq': dump_fastq.main,
34 |         }
35 | 
36 |         parser.add_argument('command')
37 |         args = parser.parse_args(sys.argv[1:2])
38 |         if args.command not in commands:
39 |             print('Unrecognized command')
40 |             parser.print_help()
41 |             sys.exit(1)
42 | 
43 |         cmd = commands[args.command]
44 |         cmd(sys.argv[2:])
45 | 
46 | 
47 | def main():
48 |     ScreedCommands()
49 |     return 0
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     main()
54 | 


--------------------------------------------------------------------------------
/screed/conversion.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2008-2010, Michigan State University.
 2 | 
 3 | from __future__ import absolute_import
 4 | from .openscreed import ScreedDB
 5 | 
 6 | _MAXLINELEN = 80
 7 | _null_quality = '\"'  # ASCII 34, e.g 75% chance of incorrect read
 8 | 
 9 | 
10 | def GetComments(value):
11 |     """
12 |     Returns description or annotations attributes from given
13 |     dictionary object
14 |     """
15 |     if 'description' in value:
16 |         return value['description']
17 |     elif 'annotations' in value:
18 |         return value['annotations']
19 |     else:
20 |         return ''
21 | 
22 | 
23 | def linewrap(longString):
24 |     """
25 |     Given a long string of characters, inserts newline characters
26 |     every _MAXLINELEN characters
27 |     """
28 |     res = []
29 |     begin = 0
30 |     while begin < len(longString):
31 |         res.append(longString[begin:begin + _MAXLINELEN])
32 |         begin += _MAXLINELEN
33 | 
34 |     return '\n'.join(res)
35 | 
36 | 
37 | def GenerateQuality(value):
38 |     """
39 |     Returns quality from value if it exists. Otherwise, makes
40 |     a null quality. Quality is line wrapped to _MAXLINELEN
41 |     either way
42 |     """
43 |     if 'quality' in value:
44 |         return linewrap(value['quality'])
45 | 
46 |     return linewrap(_null_quality * len(str(value['sequence'])))
47 | 
48 | 
49 | def ToFastq(dbFile, outputFile):
50 |     """
51 |     Opens the screed database file and attempts to dump it
52 |     to a FASTQ-formatted text file
53 |     """
54 |     outFile = open(outputFile, 'wb')
55 |     db = ScreedDB(dbFile)
56 | 
57 |     for n, value in enumerate(db.itervalues()):
58 |         line = '@%s %s\n%s\n+\n%s\n' % (value['name'],
59 |                                         GetComments(value),
60 |                                         linewrap(str(value['sequence'])),
61 |                                         GenerateQuality(value))
62 |         outFile.write(line.encode('UTF-8'))
63 |     db.close()
64 |     outFile.close()
65 | 
66 |     return n + 1
67 | 
68 | 
69 | def ToFasta(dbFile, outputFile):
70 |     """
71 |     Opens the screed database file and attempts to dump it
72 |     to a FASTA-formatted text file
73 |     """
74 |     outFile = open(outputFile, 'wb')
75 |     db = ScreedDB(dbFile)
76 | 
77 |     for n, value in enumerate(db.itervalues()):
78 |         line = '>%s %s\n%s\n' % (value['name'], GetComments(value),
79 |                                  linewrap(str(value['sequence'])))
80 |         outFile.write(line.encode('UTF-8'))
81 | 
82 |     db.close()
83 |     outFile.close()
84 | 
85 |     return n + 1
86 | 


--------------------------------------------------------------------------------
/screed/createscreed.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2016, The Regents of the University of California.
  2 | 
  3 | from __future__ import absolute_import
  4 | 
  5 | import argparse
  6 | import itertools
  7 | import os
  8 | try:
  9 |     import sqlite3
 10 | except ImportError:
 11 |     pass
 12 | import itertools
 13 | import sys
 14 | 
 15 | from . import DBConstants, fasta, fastq, openscreed
 16 | 
 17 | 
 18 | def create_db(filepath, fields, rcrditer):
 19 |     """
 20 |     Creates a screed database in the given filepath. Fields is a tuple
 21 |     specifying the names and relative order of attributes in a
 22 |     record. rcrditer is an iterator returning records over a
 23 |     sequence dataset. Records yielded are in dictionary form
 24 |     """
 25 |     try:
 26 |         sqlite3
 27 |     except NameError:
 28 |         raise Exception("error: sqlite3 is needed for this functionality" +
 29 |                         " but is not installed.")
 30 | 
 31 |     if not filepath.endswith(DBConstants.fileExtension):
 32 |         filepath += DBConstants.fileExtension
 33 | 
 34 |     if os.path.exists(filepath):  # Remove existing files
 35 |         os.unlink(filepath)
 36 | 
 37 |     con = sqlite3.connect(filepath)
 38 |     cur = con.cursor()
 39 | 
 40 |     # Sqlite PRAGMA settings for speed
 41 |     cur.execute("PRAGMA synchronous='OFF'")
 42 |     cur.execute("PRAGMA locking_mode=EXCLUSIVE")
 43 | 
 44 |     # Create the admin table
 45 |     cur.execute('CREATE TABLE %s (%s INTEGER PRIMARY KEY, '
 46 |                 '%s TEXT, %s TEXT)' % (DBConstants._SCREEDADMIN,
 47 |                                        DBConstants._PRIMARY_KEY,
 48 |                                        DBConstants._FIELDNAME,
 49 |                                        DBConstants._ROLENAME))
 50 |     query = 'INSERT INTO %s (%s, %s) VALUES (?, ?)' % \
 51 |             (DBConstants._SCREEDADMIN, DBConstants._FIELDNAME,
 52 |              DBConstants._ROLENAME)
 53 | 
 54 |     # Put the primary key in as an attribute
 55 |     cur.execute(query, (DBConstants._PRIMARY_KEY,
 56 |                         DBConstants._PRIMARY_KEY_ROLE))
 57 |     for attribute, role in fields:
 58 |         cur.execute(query, (attribute, role))
 59 | 
 60 |     # Setup the dictionary table creation field substring
 61 |     fieldsub = ','.join(['%s TEXT' % field for field, role in fields])
 62 | 
 63 |     # Create the dictionary table
 64 |     cur.execute('CREATE TABLE %s (%s INTEGER PRIMARY KEY, %s)' %
 65 |                 (DBConstants._DICT_TABLE, DBConstants._PRIMARY_KEY,
 66 |                  fieldsub))
 67 | 
 68 |     # Setup the 'qmarks' sqlite substring
 69 |     qmarks = ','.join(['?' for i in range(len(fields))])
 70 | 
 71 |     # Setup the sql substring for inserting fields into database
 72 |     fieldsub = ','.join([fieldname for fieldname, role in fields])
 73 | 
 74 |     query = 'INSERT INTO %s (%s) VALUES (%s)' %\
 75 |             (DBConstants._DICT_TABLE, fieldsub, qmarks)
 76 |     # Pull data from the iterator and store in database
 77 |     # Commiting in batches seems faster than a single call to executemany
 78 |     data = (tuple(record[fieldname] for fieldname, role in fields)
 79 |             for record in rcrditer)
 80 |     while True:
 81 |         batch = list(itertools.islice(data, 10000))
 82 |         if not batch:
 83 |             break
 84 |         cur.executemany(query, batch)
 85 |     con.commit()
 86 | 
 87 |     # Attribute to index
 88 |     queryby = fields[0][0]  # Defaults to the first field
 89 |     for fieldname, role in fields:
 90 |         if role == DBConstants._INDEXED_TEXT_KEY:
 91 |             queryby = fieldname
 92 |             break
 93 | 
 94 |     # Make the index on the 'queryby' attribute
 95 |     cur.execute('CREATE UNIQUE INDEX %sidx ON %s(%s)' %
 96 |                 (queryby, DBConstants._DICT_TABLE, queryby))
 97 | 
 98 |     con.commit()
 99 |     con.close()
100 | 
101 | 
102 | def make_db(filename):
103 |     iterfunc = openscreed.Open(filename, parse_description=True)
104 | 
105 |     field_mapping = {
106 |         fastq.fastq_iter.__name__: fastq.FieldTypes,
107 |         fasta.fasta_iter.__name__: fasta.FieldTypes
108 |     }
109 | 
110 |     fieldTypes = field_mapping[iterfunc.iter_fn.__name__]
111 | 
112 |     # Create the screed db
113 |     create_db(filename, fieldTypes, iterfunc)
114 | 
115 | 
116 | def main(args):
117 |     parser = argparse.ArgumentParser(description="A shell interface to the "
118 |                                      "screed database writing function")
119 |     parser.add_argument('filename')
120 |     args = parser.parse_args(args)
121 | 
122 |     make_db(args.filename)
123 | 
124 |     print("Database saved in {}{}".format(args.filename,
125 |                                           DBConstants.fileExtension))
126 |     exit(0)
127 | 
128 | 
129 | if __name__ == "__main__":
130 |     main(sys.argv[1:])
131 | 


--------------------------------------------------------------------------------
/screed/dna.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2016, The Regents of the University of California.
 2 | 
 3 | import array
 4 | import string
 5 | 
 6 | legal_dna = "ACGTN"
 7 | 
 8 | 
 9 | def is_DNA(seq):
10 |     """
11 |     Returns 1 if it contains only legal values for a DNA sequence.
12 | 
13 |     c.f.  http://www.ncbi.nlm.nih.gov/BLAST/fasta.html
14 |     """
15 |     for ch in seq:
16 |         if ch not in legal_dna:
17 |             return 0
18 | 
19 |     return 1
20 | 
21 | 
22 | def reverse_complement(s):
23 |     """
24 |     Build reverse complement of 's'.
25 |     """
26 |     s = s.upper()
27 |     assert is_DNA(s), "Your sequence must be DNA!"
28 | 
29 |     r = reverse(s)
30 |     rc = complement(r)
31 | 
32 |     return rc
33 | 
34 | 
35 | rc = reverse_complement                 # alias 'rc' to 'reverse_complement'
36 | 
37 | __complementTranslation = {"A": "T", "C": "G", "G": "C", "T": "A", "N": "N"}
38 | 
39 | 
40 | def complement(s):
41 |     """
42 |     Return complement of 's'.
43 |     """
44 |     c = "".join(__complementTranslation[n] for n in s)
45 |     return c
46 | 
47 | 
48 | def reverse(s):
49 |     """
50 |     Return reverse of 's'.
51 |     """
52 |     r = "".join(reversed(s))
53 | 
54 |     return r
55 | 


--------------------------------------------------------------------------------
/screed/dump_fasta.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright (c) 2008, Michigan State University.
 4 | # Copyright (c) 2016, The Regents of the University of California.
 5 | 
 6 | from __future__ import print_function
 7 | 
 8 | import argparse
 9 | import os
10 | import sys
11 | 
12 | from screed import ToFasta
13 | 
14 | 
15 | # Shell interface to the ToFasta screed conversion function
16 | def main(args):
17 |     parser = argparse.ArgumentParser(
18 |         description="Convert a screed database to a FASTA file")
19 |     parser.add_argument('dbfile')
20 |     parser.add_argument('outputfile', default='/dev/stdout', nargs='?')
21 |     args = parser.parse_args(args)
22 | 
23 |     if not os.path.isfile(args.dbfile):
24 |         print("No such file: %s" % args.dbfile)
25 |         exit(1)
26 | 
27 |     n = ToFasta(args.dbfile, args.outputfile)
28 | 
29 |     sys.stderr.write('Wrote {} records in FASTA format.\n'.format(n))
30 | 
31 | 
32 | if __name__ == '__main__':
33 |     main(sys.argv[1])
34 | 


--------------------------------------------------------------------------------
/screed/dump_fastq.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright (c) 2008, Michigan State University.
 4 | # Copyright (c) 2016, The Regents of the University of California.
 5 | 
 6 | from __future__ import print_function
 7 | from screed import ToFastq
 8 | import argparse
 9 | import sys
10 | import os
11 | 
12 | 
13 | # Shell interface to the ToFastq screed conversion function
14 | def main(args):
15 |     parser = argparse.ArgumentParser(
16 |         description="Convert a screed database to a FASTA file")
17 |     parser.add_argument('dbfile')
18 |     parser.add_argument('outputfile', default='/dev/stdout', nargs='?')
19 |     args = parser.parse_args(args)
20 | 
21 |     if not os.path.isfile(args.dbfile):
22 |         print("No such file: %s" % args.dbfile)
23 |         exit(1)
24 | 
25 |     n = ToFastq(args.dbfile, args.outputfile)
26 | 
27 |     sys.stderr.write('Wrote {} records in FASTQ format.\n'.format(n))
28 | 
29 | 
30 | if __name__ == '__main__':
31 |     main(sys.argv[1])
32 | 


--------------------------------------------------------------------------------
/screed/fasta.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2016, The Regents of the University of California.
 2 | 
 3 | from __future__ import absolute_import
 4 | from . import DBConstants
 5 | from .screedRecord import Record
 6 | from .utils import to_str
 7 | 
 8 | FieldTypes = (('name', DBConstants._INDEXED_TEXT_KEY),
 9 |               ('description', DBConstants._STANDARD_TEXT),
10 |               ('sequence', DBConstants._SLICEABLE_TEXT))
11 | 
12 | 
13 | def fasta_iter(handle, parse_description=False, line=None):
14 |     """
15 |     Iterator over the given FASTA file handle, returning records. handle
16 |     is a handle to a file opened for reading
17 |     """
18 |     if line is None:
19 |         line = handle.readline()
20 | 
21 |     while line:
22 |         data = {}
23 | 
24 |         line = to_str(line.strip())
25 |         if not line.startswith('>'):
26 |             msg = f"Bad FASTA format: no '>' at beginning of line: {line}"
27 |             raise IOError(msg)
28 | 
29 |         if parse_description:  # Try to grab the name and optional description
30 |             try:
31 |                 data['name'], data['description'] = line[1:].split(' ', 1)
32 |             except ValueError:  # No optional description
33 |                 data['name'] = line[1:]
34 |                 data['description'] = ''
35 |         else:
36 |             data['name'] = line[1:]
37 |             data['description'] = ''
38 | 
39 |         data['name'] = data['name'].strip()
40 |         data['description'] = data['description'].strip()
41 | 
42 |         # Collect sequence lines into a list
43 |         sequenceList = []
44 |         line = to_str(handle.readline())
45 |         while line and not line.startswith('>'):
46 |             sequenceList.append(line.strip())
47 |             line = to_str(handle.readline())
48 | 
49 |         data['sequence'] = ''.join(sequenceList)
50 |         yield Record(**data)
51 | 


--------------------------------------------------------------------------------
/screed/fastq.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2016, The Regents of the University of California.
 2 | 
 3 | from __future__ import absolute_import
 4 | from . import DBConstants
 5 | from .screedRecord import Record
 6 | from .utils import to_str
 7 | 
 8 | FieldTypes = (('name', DBConstants._INDEXED_TEXT_KEY),
 9 |               ('annotations', DBConstants._STANDARD_TEXT),
10 |               ('sequence', DBConstants._STANDARD_TEXT),
11 |               ('quality', DBConstants._STANDARD_TEXT))
12 | 
13 | 
14 | def fastq_iter(handle, line=None, parse_description=False):
15 |     """
16 |     Iterator over the given FASTQ file handle returning records. handle
17 |     is a handle to a file opened for reading
18 |     """
19 |     if line is None:
20 |         line = handle.readline()
21 |     line = to_str(line.strip())
22 |     while line:
23 |         data = {}
24 | 
25 |         if line and not line.startswith('@'):
26 |             raise IOError("Bad FASTQ format: no '@' at beginning of line")
27 | 
28 |         # Try to grab the name and (optional) annotations
29 |         if parse_description:
30 |             try:
31 |                 data['name'], data['annotations'] = line[1:].split(' ', 1)
32 |             except ValueError:  # No optional annotations
33 |                 data['name'] = line[1:]
34 |                 data['annotations'] = ''
35 |                 pass
36 |         else:
37 |             data['name'] = line[1:]
38 |             data['annotations'] = ''
39 | 
40 |         # Extract the sequence lines
41 |         sequence = []
42 |         line = to_str(handle.readline().strip())
43 |         while line and not line.startswith('+') and not line.startswith('#'):
44 |             sequence.append(line)
45 |             line = to_str(handle.readline().strip())
46 | 
47 |         data['sequence'] = ''.join(sequence)
48 | 
49 |         # Extract the quality lines
50 |         quality = []
51 |         line = to_str(handle.readline().strip())
52 |         seqlen = len(data['sequence'])
53 |         aclen = 0
54 |         while not line == '' and aclen < seqlen:
55 |             quality.append(line)
56 |             aclen += len(line)
57 |             line = to_str(handle.readline().strip())
58 | 
59 |         data['quality'] = ''.join(quality)
60 |         if len(data['sequence']) != len(data['quality']):
61 |             raise IOError('sequence and quality strings must be '
62 |                           'of equal length')
63 | 
64 |         yield Record(**data)
65 | 


--------------------------------------------------------------------------------
/screed/hava.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2016, The Regents of the University of California.
 2 | 
 3 | from __future__ import absolute_import
 4 | from . import DBConstants
 5 | from .utils import to_str
 6 | 
 7 | FieldTypes = (('hava', DBConstants._INDEXED_TEXT_KEY),
 8 |               ('quarzk', DBConstants._STANDARD_TEXT),
 9 |               ('muchalo', DBConstants._STANDARD_TEXT),
10 |               ('fakours', DBConstants._STANDARD_TEXT),
11 |               ('selimizicka', DBConstants._STANDARD_TEXT),
12 |               ('marshoon', DBConstants._STANDARD_TEXT))
13 | 
14 | 
15 | def hava_iter(handle):
16 |     """
17 |     Iterator over a 'hava' sequence file, returning records. handle
18 |     is a handle to a file opened for reading
19 |     """
20 |     data = {}
21 |     line = to_str(handle.readline().strip())
22 |     while line:
23 |         data['hava'] = line
24 |         data['quarzk'] = to_str(handle.readline().strip())
25 |         data['muchalo'] = to_str(handle.readline().strip())
26 |         data['fakours'] = to_str(handle.readline().strip())
27 |         data['selimizicka'] = to_str(handle.readline().strip())
28 |         data['marshoon'] = to_str(handle.readline().strip())
29 | 
30 |         line = to_str(handle.readline().strip())
31 |         yield data
32 | 


--------------------------------------------------------------------------------
/screed/openscreed.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2008, Michigan State University.
  2 | """Reader and writer for screed."""
  3 | 
  4 | from __future__ import absolute_import
  5 | 
  6 | import os
  7 | import io
  8 | import sys
  9 | import gzip
 10 | import bz2
 11 | from collections.abc import MutableMapping
 12 | 
 13 | try:
 14 |     import sqlite3
 15 | except ImportError:
 16 |     pass
 17 | 
 18 | from . import DBConstants
 19 | from . import screedRecord
 20 | from .fastq import fastq_iter
 21 | from .fasta import fasta_iter
 22 | from .utils import to_str
 23 | 
 24 | 
 25 | def _normalize_filename(filename):
 26 |     """Map '-' to '/dev/stdin' to handle the usual shortcut."""
 27 |     if filename == '-':
 28 |         filename = '/dev/stdin'
 29 |     return filename
 30 | 
 31 | 
 32 | class Open(object):
 33 |     def __init__(self, filename, *args, **kwargs):
 34 |         self.sequencefile = None
 35 |         self.iter_fn = self.open_reader(filename, *args, **kwargs)
 36 |         if self.iter_fn:
 37 |             self.__name__ = self.iter_fn.__name__
 38 | 
 39 |     def open_reader(self, filename, *args, **kwargs):
 40 |         """
 41 |         Make a best-effort guess as to how to parse the given sequence file.
 42 | 
 43 |         Handles '-' as shortcut for stdin.
 44 |         Deals with .gz, FASTA, and FASTQ records.
 45 |         """
 46 |         magic_dict = {
 47 |             b"\x1f\x8b\x08": "gz",
 48 |             b"\x42\x5a\x68": "bz2",
 49 |             # "\x50\x4b\x03\x04": "zip"
 50 |         }  # Inspired by http://stackoverflow.com/a/13044946/1585509
 51 |         filename = _normalize_filename(filename)
 52 |         bufferedfile = io.open(file=filename, mode='rb', buffering=8192)
 53 |         num_bytes_to_peek = max(len(x) for x in magic_dict)
 54 |         file_start = bufferedfile.peek(num_bytes_to_peek)
 55 |         compression = None
 56 |         for magic, ftype in magic_dict.items():
 57 |             if file_start.startswith(magic):
 58 |                 compression = ftype
 59 |                 break
 60 |         if compression == 'bz2':
 61 |             sequencefile = bz2.BZ2File(filename=bufferedfile)
 62 |             peek = sequencefile.peek(1)
 63 |         elif compression == 'gz':
 64 |             if not bufferedfile.seekable():
 65 |                 bufferedfile.close()
 66 |                 raise ValueError("gziped data not streamable, pipe "
 67 |                                  "through zcat first")
 68 |             peek = gzip.GzipFile(filename=filename).read(1)
 69 |             sequencefile = gzip.GzipFile(filename=filename)
 70 |             bufferedfile.close()
 71 |         else:
 72 |             peek = bufferedfile.peek(1)
 73 |             sequencefile = bufferedfile
 74 | 
 75 |         iter_fn = None
 76 |         try:
 77 |             first_char = peek[0]
 78 |         except IndexError as err:
 79 |             sequencefile.close()
 80 |             return []  # empty file
 81 | 
 82 |         try:
 83 |             first_char = chr(first_char)
 84 |         except TypeError:
 85 |             pass
 86 | 
 87 |         if first_char == '>':
 88 |             iter_fn = fasta_iter
 89 |         elif first_char == '@':
 90 |             iter_fn = fastq_iter
 91 | 
 92 |         if iter_fn is None:
 93 |             sequencefile.close()
 94 |             raise ValueError("unknown file format for '%s'" % filename)
 95 | 
 96 |         self.sequencefile = sequencefile
 97 |         return iter_fn(sequencefile, *args, **kwargs)
 98 | 
 99 |     def __enter__(self):
100 |         return self.iter_fn
101 | 
102 |     def __exit__(self, *exc_info):
103 |         self.close()
104 | 
105 |     def __iter__(self):
106 |         if self.iter_fn:
107 |             return self.iter_fn
108 |         return iter(())
109 | 
110 |     def close(self):
111 |         if self.sequencefile is not None:
112 |             self.sequencefile.close()
113 | 
114 | 
115 | class ScreedDB(MutableMapping):
116 | 
117 |     """
118 |     Core on-disk dictionary interface for reading screed databases. Accepts a
119 |     path string to a screed database
120 |     """
121 | 
122 |     def __init__(self, filepath):
123 |         self._db = None
124 |         try:
125 |             sqlite3
126 |         except NameError:
127 |             raise Exception("error: sqlite3 is needed for this " +
128 |                             "functionality, but is not installed.")
129 | 
130 |         self._filepath = filepath
131 |         if not self._filepath.endswith(DBConstants.fileExtension):
132 |             self._filepath += DBConstants.fileExtension
133 | 
134 |         if not os.path.exists(self._filepath):
135 |             raise ValueError('No such file: %s' % self._filepath)
136 | 
137 |         self._db = sqlite3.connect(self._filepath)
138 |         cursor = self._db.cursor()
139 | 
140 |         # Make sure the database is a prepared screed database
141 |         query = "SELECT name FROM sqlite_master WHERE type='table' "\
142 |                 "ORDER BY name"
143 |         res = cursor.execute(query)
144 |         try:
145 |             dictionary_table, = res.fetchone()
146 |             admin_table, = res.fetchone()
147 | 
148 |             if dictionary_table != DBConstants._DICT_TABLE:
149 |                 raise TypeError
150 |             if admin_table != DBConstants._SCREEDADMIN:
151 |                 raise TypeError
152 | 
153 |         except TypeError:
154 |             self._db.close()
155 |             raise TypeError("Database %s is not a proper screed database"
156 |                             % self._filepath)
157 | 
158 |         nothing = res.fetchone()
159 |         if nothing is not None:
160 |             self._db.close()
161 |             raise TypeError("Database %s has too many tables." % filename)
162 | 
163 |         # Store the fields of the admin table in a tuple
164 |         query = "SELECT %s, %s FROM %s" % \
165 |             (DBConstants._FIELDNAME,
166 |              DBConstants._ROLENAME,
167 |              DBConstants._SCREEDADMIN)
168 |         res = cursor.execute(query)
169 |         self.fields = tuple([(str(field), role) for field, role in res])
170 | 
171 |         # Indexed text column for querying, search fields to find
172 |         self._queryBy = self.fields[1][0]
173 |         for fieldname, role in self.fields:
174 |             if role == DBConstants._INDEXED_TEXT_KEY:
175 |                 self._queryBy = fieldname
176 | 
177 |         # Sqlite PRAGMA settings for speed
178 |         cursor.execute("PRAGMA cache_size=2000")
179 | 
180 |         # Retrieve the length of the database
181 |         query = 'SELECT MAX(%s) FROM %s' % (DBConstants._PRIMARY_KEY,
182 |                                             DBConstants._DICT_TABLE)
183 |         self._len, = cursor.execute(query).fetchone()
184 | 
185 |     def __del__(self):
186 |         """
187 |         Alias for close()
188 |         """
189 |         self.close()
190 | 
191 |     def close(self):
192 |         """
193 |         Closes the sqlite database handle
194 |         """
195 |         if self._db is not None:
196 |             self._db.close()
197 |             self._db = None
198 | 
199 |     def __getitem__(self, key):
200 |         """
201 |         Retrieves from database the record with the key 'key'
202 |         """
203 |         cursor = self._db.cursor()
204 |         key = str(key)  # So lazy retrieval objectes are evaluated
205 |         query = 'SELECT %s FROM %s WHERE %s=?' % (self._queryBy,
206 |                                                   DBConstants._DICT_TABLE,
207 |                                                   self._queryBy)
208 |         res = cursor.execute(query, (key,))
209 |         if res.fetchone() is None:
210 |             raise KeyError("Key %s not found" % key)
211 |         return screedRecord._buildRecord(self.fields, self._db,
212 |                                          key,
213 |                                          self._queryBy)
214 | 
215 |     def values(self):
216 |         """
217 |         Retrieves all records from the database and returns them as a list
218 |         """
219 |         return list(self.itervalues())
220 | 
221 |     def items(self):
222 |         """
223 |         Retrieves all records from the database and returns them as a list of
224 |         (key, record) tuple pairs
225 |         """
226 |         return list(self.iteritems())
227 | 
228 |     def loadRecordByIndex(self, index):
229 |         """
230 |         Retrieves record from database at the given index
231 |         """
232 |         cursor = self._db.cursor()
233 |         index = int(index) + 1  # Hack to make indexing start at 0
234 |         query = 'SELECT %s FROM %s WHERE %s=?' % (DBConstants._PRIMARY_KEY,
235 |                                                   DBConstants._DICT_TABLE,
236 |                                                   DBConstants._PRIMARY_KEY)
237 |         res = cursor.execute(query, (index,))
238 |         if res.fetchone() is None:
239 |             raise KeyError("Index %d not found" % index)
240 |         return screedRecord._buildRecord(self.fields, self._db,
241 |                                          index,
242 |                                          DBConstants._PRIMARY_KEY)
243 | 
244 |     def __len__(self):
245 |         """
246 |         Returns the number of records in the database
247 |         """
248 |         return self._len
249 | 
250 |     def keys(self):
251 |         """
252 |         Returns a list of keys in the database
253 |         """
254 |         return list(self.iterkeys())
255 | 
256 |     def __repr__(self):
257 |         """
258 |         Returns a string with some general information about the database
259 |         """
260 |         return "<%s, '%s'>" % (self.__class__.__name__,
261 |                                self._filepath)
262 | 
263 |     def itervalues(self):
264 |         """
265 |         Iterator over records in the database
266 |         """
267 |         for index in range(1, self.__len__() + 1):
268 |             yield screedRecord._buildRecord(self.fields, self._db,
269 |                                             index,
270 |                                             DBConstants._PRIMARY_KEY)
271 | 
272 |     def iterkeys(self):
273 |         """
274 |         Iterator over keys in the database
275 |         """
276 |         cursor = self._db.cursor()
277 |         query = 'SELECT %s FROM %s ORDER BY id' % (
278 |             self._queryBy, DBConstants._DICT_TABLE)
279 |         for key, in cursor.execute(query):
280 |             yield key
281 | 
282 |     def __iter__(self):
283 |         return self.iterkeys()
284 | 
285 |     def iteritems(self):
286 |         """
287 |         Iterator returning a (index, record) pairs
288 |         """
289 |         for v in self.itervalues():
290 |             yield v[DBConstants._PRIMARY_KEY], v
291 | 
292 |     def has_key(self, key):
293 |         """
294 |         Returns true if given key exists in database, false otherwise
295 |         """
296 |         return key in self
297 | 
298 |     def copy(self):
299 |         """
300 |         Returns shallow copy
301 |         """
302 |         return self
303 | 
304 |     def __contains__(self, key):
305 |         """
306 |         Returns true if given key exists in database, false otherwise
307 |         """
308 |         cursor = self._db.cursor()
309 |         query = 'SELECT %s FROM %s WHERE %s = ?' % \
310 |                 (self._queryBy, DBConstants._DICT_TABLE, self._queryBy)
311 |         if cursor.execute(query, (key,)).fetchone() is None:
312 |             return False
313 |         return True
314 | 
315 |     # Here follow the methods that are not implemented
316 | 
317 |     def __setitem__(self, something):
318 |         """
319 |         Not implemented (Read-only database)
320 |         """
321 |         raise NotImplementedError
322 | 
323 |     def __delitem__(self, something):
324 |         """
325 |         Not implemented (Read-only database)
326 |         """
327 |         raise NotImplementedError
328 | 
329 |     def clear(self):
330 |         """
331 |         Not implemented (Read-only database)
332 |         """
333 |         raise NotImplementedError
334 | 
335 |     def update(self, something):
336 |         """
337 |         Not implemented (Read-only database)
338 |         """
339 |         raise NotImplementedError
340 | 
341 |     def setdefault(self, something):
342 |         """
343 |         Not implemented (Read-only database)
344 |         """
345 |         raise NotImplementedError
346 | 
347 |     def pop(self):
348 |         """
349 |         Not implemented (Read-only database)
350 |         """
351 |         raise NotImplementedError
352 | 
353 |     def popitem(self):
354 |         """
355 |         Not implemented (Read-only database)
356 |         """
357 |         raise NotImplementedError
358 | 


--------------------------------------------------------------------------------
/screed/pygr_api.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2008, Michigan State University.
  2 | 
  3 | """
  4 | A simple wrapper implementing a pygr-compatible SequenceDB based on screed.
  5 | 
  6 | There are two implementions:
  7 |  - ScreedSequenceDB
  8 |  - ScreedSequenceDB_ByIndex
  9 | 
 10 | ScreedSequenceDB uses the sequence name as the sequence ID, which
 11 | mimics the behavior of pygr's SequenceFileDB and is good for
 12 | small-to-medium sized collections of sequences.
 13 | ScreedSequenceDB_ByIndex uses the sequence's index (0...size of
 14 | database) as a sequence ID, rather than the sequence name; this is
 15 | much faster for databases with many, many sequences.
 16 | 
 17 | Unlike the normal seqdb, screed will load the entire sequence record
 18 | into memory on request, so it's not good for large sequences.
 19 | 
 20 | All screed records are guaranteed to have an 'index', a 'name', and a
 21 | 'sequence' attribute; anything else is specific to the database writer
 22 | you use.  The raw screed record (which contains any other information)
 23 | is available under seqObj.record.
 24 | 
 25 | Note: the underlying screed database must already have been built with
 26 | fadbm or fqdbm.
 27 | 
 28 | CTB 3/20/09
 29 | """
 30 | 
 31 | import UserDict
 32 | 
 33 | from screed import ScreedDB
 34 | 
 35 | from pygr.sequence import SequenceBase
 36 | from pygr.seqdb import SequenceDB
 37 | from pygr.sequtil import DNA_SEQTYPE
 38 | 
 39 | #
 40 | 
 41 | 
 42 | class ScreedSequence(SequenceBase):
 43 | 
 44 |     """Sequence implementation based on screed; stores screed record info.
 45 | 
 46 |     Attributes:
 47 |       - 'id' and 'db' are the standard pygr-ish name/database attrs.
 48 |       - 'record' is the screed 'record' object, containing name, etc.
 49 |       - 'name' is the record name, which can be the same as 'id' but
 50 |         can also be different (see ScreedSequenceDB_ByIndex).
 51 |       - 'seq' is the sequence.
 52 | 
 53 |     """
 54 | 
 55 |     def __init__(self, db, id):
 56 |         self.id = id
 57 |         SequenceBase.__init__(self)
 58 |         info = db.seqInfoDict[id]
 59 | 
 60 |         self.record = info.record
 61 |         self.name = info.record.name
 62 |         self.seq = info.record.sequence
 63 | 
 64 | 
 65 | class ScreedSequenceDB(SequenceDB):
 66 | 
 67 |     """SequenceDB implementation based on screed; retrieve seqs by name."""
 68 |     itemClass = ScreedSequence
 69 | 
 70 |     def __init__(self, filepath):
 71 |         self.filepath = filepath
 72 |         self.seqInfoDict = _ScreedSeqInfoDict_ByName(filepath)
 73 |         SequenceDB.__init__(self)
 74 | 
 75 |     def _set_seqtype(self):
 76 |         self._seqtype = DNA_SEQTYPE
 77 | 
 78 |     def __repr__(self):
 79 |         return "<%s '%s'>" % (self.__class__.__name__, self.filepath)
 80 | 
 81 |     # override inherited __reduce__/__getstate__/__setstate__ from SequenceDB.
 82 |     def __reduce__(self):
 83 |         return (ScreedSequenceDB, (self.filepath,))
 84 | 
 85 | 
 86 | class ScreedSequenceDB_ByIndex(SequenceDB):
 87 | 
 88 |     """SequenceDB implementation based on screed; retrieve seqs by index."""
 89 |     itemClass = ScreedSequence
 90 | 
 91 |     def __init__(self, filepath):
 92 |         self.filepath = filepath
 93 |         self.seqInfoDict = _ScreedSeqInfoDict_ByIndex(filepath)
 94 |         SequenceDB.__init__(self)
 95 | 
 96 |     def _set_seqtype(self):
 97 |         self._seqtype = DNA_SEQTYPE
 98 | 
 99 |     def __repr__(self):
100 |         return "<%s '%s'>" % (self.__class__.__name__, self.filepath)
101 | 
102 |     # override inherited __reduce__/__getstate__/__setstate__ from SequenceDB.
103 |     def __reduce__(self):
104 |         return (ScreedSequenceDB_ByIndex, (self.filepath,))
105 | 
106 | 
107 | class _ScreedSequenceInfo(object):
108 | 
109 |     """Objects to put in seqInfoDict values, for holding screed record info."""
110 | 
111 |     def __init__(self, id, record):
112 |         self.id = id
113 |         self.record = record
114 |         self.length = len(record.sequence)
115 | 
116 | 
117 | class _ScreedSeqInfoDict_ByName(object, UserDict.DictMixin):
118 | 
119 |     """seqInfoDict implementation that uses names to retrieve records."""
120 | 
121 |     def __init__(self, filepath):
122 |         self.sdb = ScreedDB(filepath)
123 | 
124 |     def __getitem__(self, k):
125 |         v = self.sdb[k]
126 |         return _ScreedSequenceInfo(k, v)
127 | 
128 |     def keys(self):
129 |         return self.sdb.keys()
130 | 
131 |     def itervalues(self):
132 |         i = 0
133 |         max_index = len(self.sdb)
134 |         while i < max_index:
135 |             v = self.sdb.loadRecordByIndex(i)
136 |             yield _ScreedSequenceInfo(v.name, v)
137 |             i += 1
138 | 
139 |     def iteritems(self):
140 |         for v in self.itervalues():
141 |             yield v.record.name, v
142 | 
143 | 
144 | class _ScreedSeqInfoDict_ByIndex(object, UserDict.DictMixin):
145 | 
146 |     """seqInfoDict implementation that uses indices to retrieve records."""
147 | 
148 |     def __init__(self, filepath):
149 |         self.sdb = ScreedDB(filepath)
150 | 
151 |     def __getitem__(self, k):
152 |         n = int(k)
153 |         v = self.sdb.loadRecordByIndex(n)
154 |         return _ScreedSequenceInfo(k, v)
155 | 
156 |     def keys(self):
157 |         return range(0, len(self.sdb))
158 | 
159 |     def iterkeys(self):
160 |         i = 0
161 |         max_index = len(self.sdb)
162 |         while i < max_index:
163 |             yield i
164 |             i += 1
165 | 
166 | 
167 | if __name__ == '__main__':
168 |     import sys
169 |     filename = sys.argv[1]
170 | 
171 |     db = ScreedSequenceDB(filename)
172 |     for k in db:
173 |         print(k, repr(db[k]), db[k].name)
174 | 
175 |     db = ScreedSequenceDB_ByIndex(filename)
176 |     for k in db:
177 |         print(k, repr(db[k]), db[k].name)
178 | 


--------------------------------------------------------------------------------
/screed/screedRecord.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2016, The Regents of the University of California.
  2 | 
  3 | from __future__ import absolute_import
  4 | from functools import total_ordering
  5 | import types
  6 | from . import DBConstants
  7 | import gzip
  8 | import bz2
  9 | from io import BytesIO
 10 | from collections.abc import MutableMapping
 11 | 
 12 | 
 13 | class Record(MutableMapping):
 14 |     """
 15 |     Simple dict-like record interface with bag behavior.
 16 |     """
 17 | 
 18 |     def __init__(self, name=None, sequence=None, **kwargs):
 19 |         d = dict()
 20 |         if name is not None:
 21 |             d['name'] = name
 22 |         if sequence is not None:
 23 |             d['sequence'] = sequence
 24 | 
 25 |         d.update(kwargs)
 26 | 
 27 |         if 'quality' in d and d['quality'] is None:
 28 |             del d['quality']
 29 |         self.d = d
 30 | 
 31 |     def __setitem__(self, name, value):
 32 |         self.d[name] = value
 33 | 
 34 |     def __getattr__(self, name):
 35 |         try:
 36 |             return self.d[name]
 37 |         except KeyError:
 38 |             raise AttributeError(name)
 39 | 
 40 |     def __len__(self):
 41 |         return len(self.sequence)
 42 | 
 43 |     def keys(self):
 44 |         return self.d.keys()
 45 | 
 46 |     def __getitem__(self, idx):
 47 |         if isinstance(idx, slice):
 48 |             trimmed = dict(self.d)
 49 |             trimmed['sequence'] = trimmed['sequence'][idx]
 50 |             if 'quality' in trimmed:
 51 |                 trimmed['quality'] = trimmed['quality'][idx]
 52 |             return Record(**trimmed)
 53 |         return self.d[idx]
 54 | 
 55 |     def __delitem__(self, key):
 56 |         del self.d[key]
 57 | 
 58 |     def __iter__(self):
 59 |         return iter(self.d)
 60 | 
 61 |     def __repr__(self):
 62 |         return repr(self.d)
 63 | 
 64 | 
 65 | @total_ordering
 66 | class _screed_attr(object):
 67 | 
 68 |     """
 69 |     Sliceable database object that supports lazy retrieval
 70 |     """
 71 | 
 72 |     def __init__(self, dbObj, attrName, rowName, queryBy):
 73 |         """
 74 |         Initializes database object with specific record retrieval
 75 |         information
 76 |         dbOjb = database handle
 77 |         attrName = name of attr in db
 78 |         rowName = index/name of row
 79 |         queryBy = by name or index
 80 |         """
 81 |         self._dbObj = dbObj
 82 |         self._attrName = attrName
 83 |         self._rowName = rowName
 84 |         self._queryBy = queryBy
 85 | 
 86 |     def __getitem__(self, sliceObj):
 87 |         """
 88 |         Slicing interface. Returns the slice range given.
 89 |         *.start + 1 to be compatible with sqlite's 1 not 0 scheme
 90 |         """
 91 |         if not isinstance(sliceObj, slice):
 92 |             raise TypeError('__getitem__ argument must be of slice type')
 93 |         if not sliceObj.start <= sliceObj.stop:  # String reverse in future?
 94 |             raise ValueError('start must be less than stop in slice object')
 95 |         length = sliceObj.stop - sliceObj.start
 96 | 
 97 |         query = 'SELECT substr(%s, %d, %d) FROM %s WHERE %s = ?' \
 98 |                 % (self._attrName, sliceObj.start + 1, length,
 99 |                    DBConstants._DICT_TABLE,
100 |                    self._queryBy)
101 |         cur = self._dbObj.cursor()
102 |         result = cur.execute(query, (str(self._rowName),))
103 |         try:
104 |             subStr, = result.fetchone()
105 |         except TypeError:
106 |             raise KeyError("Key %s not found" % self._rowName)
107 |         return str(subStr)
108 | 
109 |     def __len__(self):
110 |         """
111 |         Returns the length of the string
112 |         """
113 |         return len(self.__str__())
114 | 
115 |     def __repr__(self):
116 |         """
117 |         Prints out the name of the class and the name of the sliceable attr
118 |         """
119 |         return "<%s '%s'>" % (self.__class__.__name__, self._attrName)
120 | 
121 |     def __eq__(self, given):
122 |         """
123 |         Compares attribute to given object in string form
124 |         """
125 |         if isinstance(given, bytes):
126 |             return given == self.__str__()
127 |         else:
128 |             return str(given) == self.__str__()
129 | 
130 |     def __lt__(self, given):
131 |         if isinstance(given, bytes):
132 |             return self.__str__() < given
133 |         else:
134 |             return self.__str__() < str(given)
135 | 
136 |     def __str__(self):
137 |         """
138 |         Returns the full attribute as a string
139 |         """
140 |         query = 'SELECT %s FROM %s WHERE %s = ?' \
141 |                 % (self._attrName, DBConstants._DICT_TABLE, self._queryBy)
142 |         cur = self._dbObj.cursor()
143 |         result = cur.execute(query, (str(self._rowName),))
144 |         try:
145 |             record, = result.fetchone()
146 |         except TypeError:
147 |             raise KeyError("Key %s not found" % self._rowName)
148 |         return str(record)
149 | 
150 | 
151 | def _buildRecord(fieldTuple, dbObj, rowName, queryBy):
152 |     """
153 |     Constructs a dict-like object with record attribute names as keys and
154 |     _screed_attr objects as values
155 |     """
156 | 
157 |     # Separate the lazy and full retrieval objects
158 |     kvResult = []
159 |     fullRetrievals = []
160 |     for fieldname, role in fieldTuple:
161 |         if role == DBConstants._SLICEABLE_TEXT:
162 |             kvResult.append((fieldname, _screed_attr(dbObj,
163 |                                                      fieldname,
164 |                                                      rowName,
165 |                                                      queryBy)))
166 |         else:
167 |             fullRetrievals.append(fieldname)
168 | 
169 |     # Retrieve the full text fields from the db
170 |     subs = ','.join(fullRetrievals)
171 |     query = 'SELECT %s FROM %s WHERE %s=?' % \
172 |             (subs, DBConstants._DICT_TABLE, queryBy)
173 |     cur = dbObj.cursor()
174 |     res = cur.execute(query, (rowName,))
175 | 
176 |     # Add the full text fields to the result tuple list
177 |     data = tuple([str(r) for r in res.fetchone()])
178 |     kvResult.extend(zip(fullRetrievals, data))
179 | 
180 |     # Hack to make indexing start at 0
181 |     hackedResult = []
182 |     for key, value in kvResult:
183 |         if key == DBConstants._PRIMARY_KEY:
184 |             hackedResult.append((key, int(value) - 1))
185 |         else:
186 |             hackedResult.append((key, value))
187 | 
188 |     return Record(**dict(hackedResult))
189 | 
190 | 
191 | def write_fastx(record, fileobj):
192 |     """Write sequence record to 'fileobj' in FASTA/FASTQ format."""
193 |     isbytesio = isinstance(fileobj, BytesIO)
194 |     iswb = hasattr(fileobj, 'mode') and fileobj.mode == 'wb'
195 |     outputvalid = isbytesio or iswb
196 |     if not outputvalid:
197 |         message = ('cannot call "write_fastx" on object, must be of a file '
198 |                    'handle with mode "wb" or an instance of "BytesIO"')
199 |         raise AttributeError(message)
200 | 
201 |     defline = record.name
202 |     if hasattr(record, 'description'):
203 |         defline += ' ' + record.description
204 | 
205 |     if hasattr(record, 'quality'):
206 |         recstr = '@{defline}\n{sequence}\n+\n{quality}\n'.format(
207 |             defline=defline,
208 |             sequence=record.sequence,
209 |             quality=record.quality)
210 |     else:
211 |         recstr = '>{defline}\n{sequence}\n'.format(
212 |             defline=defline,
213 |             sequence=record.sequence)
214 | 
215 |     fileobj.write(recstr.encode('utf-8'))
216 | 
217 | 
218 | def write_fastx_pair(read1, read2, fileobj):
219 |     """Write a pair of sequence records to 'fileobj' in FASTA/FASTQ format."""
220 |     if hasattr(read1, 'quality'):
221 |         assert hasattr(read2, 'quality')
222 |     write_record(read1, fileobj)
223 |     write_record(read2, fileobj)
224 | 


--------------------------------------------------------------------------------
/screed/seqparse.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2008, Michigan State University.
 2 | 
 3 | """
 4 | seqparse contains custom sequence parsers for extending screed's
 5 | functionality to arbitrary sequence formats. An example 'hava'
 6 | parser is included for API reference
 7 | """
 8 | 
 9 | from __future__ import absolute_import
10 | 
11 | import os
12 | 
13 | from .createscreed import create_db
14 | from .openscreed import ScreedDB
15 | from . import openscreed
16 | from . import fastq
17 | from . import fasta
18 | from . import hava
19 | 
20 | # [AN] these functions look strangely similar
21 | 
22 | 
23 | def read_fastq_sequences(filename):
24 |     """
25 |     Function to parse text from the given FASTQ file into a screed database
26 |     """
27 |     # Will raise an exception if the file doesn't exist
28 |     iterfunc = openscreed.Open(filename, parse_description=True)
29 | 
30 |     # Create the screed db
31 |     create_db(filename, fastq.FieldTypes, iterfunc)
32 | 
33 |     return ScreedDB(filename)
34 | 
35 | 
36 | def read_fasta_sequences(filename):
37 |     """
38 |     Function to parse text from the given FASTA file into a screed database
39 |     """
40 |     # Will raise an exception if the file doesn't exist
41 |     iterfunc = openscreed.Open(filename, parse_description=True)
42 | 
43 |     # Create the screed db
44 |     create_db(filename, fasta.FieldTypes, iterfunc)
45 | 
46 |     return ScreedDB(filename)
47 | 
48 | 
49 | def read_hava_sequences(filename):
50 |     """
51 |     Function to parse text from the given HAVA file into a screed database
52 |     """
53 |     # Will raise an exception if the file doesn't exist
54 |     theFile = open(filename, "rb")
55 | 
56 |     # Setup the iterator function
57 |     iterfunc = hava.hava_iter(theFile)
58 | 
59 |     # Create the screed db
60 |     create_db(filename, hava.FieldTypes, iterfunc)
61 |     theFile.close()
62 | 
63 |     return ScreedDB(filename)
64 | 


--------------------------------------------------------------------------------
/screed/tests/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | 


--------------------------------------------------------------------------------
/screed/tests/havaGen.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | havaGen is for generating sequence files of the imaginary type 'hava'.
  5 | These files consist of attributes in the following newline seperated order
  6 | hava
  7 | quarzk
  8 | muchalo
  9 | fakours
 10 | selimizicka
 11 | marshoon
 12 | 
 13 | Since this 'sequence' has absolutely no utility outside of screed, it's only
 14 | purpose is to make sure screed can work with arbitrary fields when running
 15 | the tests.
 16 | 
 17 | This is a work of fiction. Names are the product of the author's imagination
 18 | and any resemblance to real life is entirely coincidental.
 19 | """
 20 | from __future__ import print_function
 21 | 
 22 | import sys
 23 | import os
 24 | import random
 25 | 
 26 | 
 27 | class collectionOFiles(object):
 28 | 
 29 |     def __init__(self, baseName, divisions, totalSize):
 30 |         self.baseName = baseName
 31 |         self.divisions = divisions
 32 |         self.totalSize = totalSize
 33 | 
 34 |         self.fileHandles = {}
 35 |         for i in range(0, divisions):
 36 |             filename = self.baseName + "_%d" % i
 37 |             fh = open(filename, "wb")
 38 |             divisor = i * 2
 39 |             if divisor == 0:
 40 |                 divisor = 1
 41 |             self.fileHandles[filename] = (fh, self.totalSize / divisor, 0)
 42 | 
 43 |     def writeRecord(self, hava, quarzk, muchalo, fakours, selimizicka,
 44 |                     marshoon):
 45 |         toRemove = []
 46 |         for filename in self.fileHandles:
 47 |             file, limit, count = self.fileHandles[filename]
 48 |             file.write("%s\n%s\n%s\n%s\n%s\n%s\n" %
 49 |                        (hava, quarzk, muchalo, fakours, selimizicka, marshoon))
 50 |             count += 1
 51 |             if count >= limit:
 52 |                 file.close()
 53 |                 toRemove.append(filename)
 54 |             else:
 55 |                 self.fileHandles[filename] = (file, limit, count)
 56 | 
 57 |         for fh in toRemove:
 58 |             self.fileHandles.pop(fh)
 59 | 
 60 |     def finished(self):
 61 |         return len(self.fileHandles) == 0
 62 | 
 63 | 
 64 | def genString(length, allowedChars):
 65 |     res = []
 66 |     for i in range(0, length):
 67 |         char = allowedChars[random.randint(0, len(allowedChars) - 1)]
 68 |         res.append(char)
 69 |     return "".join(res)
 70 | 
 71 | 
 72 | def createHavaFiles(filename, size, divisions):
 73 |     cof = collectionOFiles(filename, divisions, size)
 74 |     counter = 0
 75 |     lenString = 80
 76 |     allowedQuarzk = ['A', 'T', 'C', 'G']
 77 |     allowedMuchalo = "A B C D E F G H I J K L M N O P".split(' ')
 78 |     allowedFakours = "1 2 3 4 5 6 7 8 9".split(' ')
 79 |     allowedSelimizicka = ["b"]
 80 |     allowedMarshoon = "A 1 B 2 C 3 D 4 E 5 G 6 F 7".split(' ')
 81 |     while not cof.finished():
 82 |         hava = "test_00%d" % counter
 83 |         quarzk = genString(lenString, allowedQuarzk)
 84 |         muchalo = genString(lenString, allowedMuchalo)
 85 |         fakours = genString(lenString, allowedFakours)
 86 |         selimizicka = genString(lenString, allowedSelimizicka)
 87 |         marshoon = genString(lenString, allowedMarshoon)
 88 |         cof.writeRecord(hava, quarzk, muchalo, fakours, selimizicka, marshoon)
 89 |         counter += 1
 90 |     return
 91 | 
 92 | 
 93 | if __name__ == '__main__':
 94 |     if len(sys.argv) != 4:
 95 |         print("Usage: <filename> <size> <divisions>")
 96 |         exit(1)
 97 | 
 98 |     filename = sys.argv[1]
 99 |     size = int(sys.argv[2])
100 |     divisions = int(sys.argv[3])
101 | 
102 |     createHavaFiles(filename, size, divisions)
103 | 


--------------------------------------------------------------------------------
/screed/tests/screed_tst_utils.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is part of screed, http://github.com/dib-lab/screed/, and is
 3 | # Copyright (C) Michigan State University, 2009-2015. It is licensed under
 4 | # the three-clause BSD license; see doc/LICENSE.txt.
 5 | # Contact: khmer-project@idyll.org
 6 | #
 7 | # This file has been modified from the khmer project at
 8 | # https://github.com/dib-lab/khmer/blob/a8356b7abbebf8540c7656378b1459442b781f87/tests/khmer_tst_utils.py
 9 | #
10 | 
11 | import tempfile
12 | import os
13 | import shutil
14 | from io import StringIO
15 | import sys
16 | import traceback
17 | 
18 | from importlib import resources
19 | 
20 | # Remove when we drop support for 3.8
21 | if sys.version_info < (3, 9):
22 |     import importlib_resources as resources
23 | 
24 | 
25 | def get_test_data(filename):
26 |     filepath = resources.files('screed') / 'screed' / 'tests' / filename
27 |     if not filepath.exists() or not os.path.isfile(filepath):
28 |         filepath = os.path.join(os.path.dirname(__file__), 'test-data',
29 |                                 filename)
30 |     return filepath
31 | 
32 | 
33 | cleanup_list = []
34 | 
35 | 
36 | def get_temp_filename(filename, tempdir=None):
37 |     if tempdir is None:
38 |         tempdir = tempfile.mkdtemp(prefix='screedtest_')
39 |         cleanup_list.append(tempdir)
40 | 
41 |     return os.path.join(tempdir, filename)
42 | 
43 | 
44 | def cleanup():
45 |     global cleanup_list
46 | 
47 |     for path in cleanup_list:
48 |         shutil.rmtree(path, ignore_errors=True)
49 |     cleanup_list = []
50 | 


--------------------------------------------------------------------------------
/screed/tests/test-data/empty.fa:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dib-lab/screed/f290cadb01112510a7aa8f3942238ac617b6d4da/screed/tests/test-data/empty.fa


--------------------------------------------------------------------------------
/screed/tests/test-data/test-whitespace.fa:
--------------------------------------------------------------------------------
 1 | >ENSMICT00000012722 cdna:pseudogene scaffold:micMur1:scaffold_185008:9:424:1 gene:ENSMICG00000012730
 2 | TGCAGAAAATATCAAGAGTCAGCAGAAAAACTATACAAGGGCTGGTATTTTGATTATTCT
 3 | ATAAAAATTCACTTTTTGCTCAGTGTCTTTCATCTGGGCCTGGCCTCCTCTCTTGCAAGC
 4 | CCTGGATTCATAACATCTATAATAATTTTTATATGTGGTAGAGTAATATTAGCTGATTCC
 5 | TTTGCCTCCTGTTCCTTCCCCTCATTCAGGCAGCTGGCCAGGTTTGTGCTCCTTATCTCG
 6 | CAGAAGAGATGTGATAGCAGGCAGAGAATTAAAGTCTTCCTGGCTTTTGGTTTCAGAAGC
 7 | TGCCTTGGGAAGGAAGCAAACAAACATGCCACAGATAAAATATTTGAAAGAAAAGATAAT
 8 | GAAAGTAGAAAAGGGTTCCCTGTTCTTGTGGGGAGGAAGTGA
 9 | 
10 | >ENSMICT00000012401 cdna:novel scaffold:micMur1:scaffold_184912:461:550:-1 gene:ENSMICG00000012409
11 | GAACAGTCTCCTTTGGTTTGTGAAAAGAAACAAAAGAGTGTGGGGGTCGGGGAGCTCATC
12 | CAGCACTTCGTCGATTTCATGACCAACCAG
13 | 


--------------------------------------------------------------------------------
/screed/tests/test-data/test.fa.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dib-lab/screed/f290cadb01112510a7aa8f3942238ac617b6d4da/screed/tests/test-data/test.fa.bz2


--------------------------------------------------------------------------------
/screed/tests/test-data/test.fa.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dib-lab/screed/f290cadb01112510a7aa8f3942238ac617b6d4da/screed/tests/test-data/test.fa.gz


--------------------------------------------------------------------------------
/screed/tests/test-data/test.fa.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dib-lab/screed/f290cadb01112510a7aa8f3942238ac617b6d4da/screed/tests/test-data/test.fa.zip


--------------------------------------------------------------------------------
/screed/tests/test-data/test.fastq.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dib-lab/screed/f290cadb01112510a7aa8f3942238ac617b6d4da/screed/tests/test-data/test.fastq.bz2


--------------------------------------------------------------------------------
/screed/tests/test-data/test.fastq.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dib-lab/screed/f290cadb01112510a7aa8f3942238ac617b6d4da/screed/tests/test-data/test.fastq.gz


--------------------------------------------------------------------------------
/screed/tests/test_attriberror.py:
--------------------------------------------------------------------------------
 1 | import screed
 2 | from screed.DBConstants import fileExtension
 3 | import os
 4 | from . import screed_tst_utils as utils
 5 | import shutil
 6 | 
 7 | 
 8 | class nostring:
 9 |     def __str__(self):
10 |         return ""
11 | 
12 |     def __repr__(self):
13 |         return ""
14 | 
15 | 
16 | class test_comparisons():
17 | 
18 |     def setup_method(self):
19 |         self._testfile = utils.get_temp_filename('test.fa')
20 |         shutil.copy(utils.get_test_data('test.fa'), self._testfile)
21 |         screed.read_fasta_sequences(self._testfile)
22 | 
23 |         self._db = screed.ScreedDB(self._testfile)
24 |         self._ns = nostring()
25 | 
26 |     def test_eq(self):
27 |         for k in self._db:
28 |             record = self._db.get(k)
29 |             res = (record.sequence == self._ns)
30 |             assert res is False, res
31 | 
32 |     def test_neq(self):
33 |         for k in self._db:
34 |             record = self._db.get(k)
35 |             res = (record.sequence != self._ns)
36 |             assert res is True, res
37 | 
38 |     def test_comp_greateq(self):
39 |         for k in self._db:
40 |             record = self._db.get(k)
41 |             res = (record.sequence >= self._ns)
42 |             assert res is True, res
43 | 
44 |     def test_comp_lesseq(self):
45 |         for k in self._db:
46 |             record = self._db.get(k)
47 |             res = (record.sequence <= self._ns)
48 |             assert res is False, res
49 | 
50 |     def test_comp_less(self):
51 |         for k in self._db:
52 |             record = self._db.get(k)
53 |             res = (record.sequence < self._ns)
54 |             assert res is False, res
55 | 
56 |     def test_comp_great(self):
57 |         for k in self._db:
58 |             record = self._db.get(k)
59 |             res = (record.sequence > self._ns)
60 |             assert res is True, res
61 | 
62 |     def teardown(self):
63 |         os.unlink(self._testfile + fileExtension)
64 | 


--------------------------------------------------------------------------------
/screed/tests/test_convert.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from . import test_fasta
 3 | import os
 4 | import screed
 5 | from screed.DBConstants import fileExtension
 6 | from . import screed_tst_utils as utils
 7 | import shutil
 8 | 
 9 | 
10 | class Test_fasta_to_fastq(test_fasta.Test_fasta):
11 | 
12 |     """
13 |     Tests the ability to convert a fasta db to a fastq file, parse it into
14 |     a fastq db, save to a fasta file, parse the fasta file into a fasta
15 |     db and then run the fasta suite
16 |     """
17 | 
18 |     def setup_method(self):
19 | 
20 |         self._fqName = utils.get_temp_filename('fa_to_fq')
21 |         self._faName = utils.get_temp_filename('fq_to_fa')
22 |         self._testfa = utils.get_temp_filename('test.fa')
23 |         shutil.copy(utils.get_test_data('test.fa'), self._testfa)
24 | 
25 |         screed.read_fasta_sequences(self._testfa)
26 |         screed.ToFastq(self._testfa, self._fqName)  # Fasta db -> fasta text
27 |         screed.read_fastq_sequences(self._fqName)  # Fastq file -> fastq db
28 |         screed.ToFasta(self._fqName, self._faName)  # Fastq db -> fasta text
29 |         screed.read_fasta_sequences(self._faName)  # Fasta file -> fasta db
30 |         self.db = screed.ScreedDB(self._faName)
31 | 
32 |     def teardown_method(self):
33 |         os.unlink(self._fqName)
34 |         os.unlink(self._fqName + fileExtension)
35 |         os.unlink(self._faName)
36 |         os.unlink(self._faName + fileExtension)
37 |         os.unlink(self._testfa + fileExtension)
38 | 


--------------------------------------------------------------------------------
/screed/tests/test_db.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | 
 4 | import screed
 5 | from screed.DBConstants import fileExtension
 6 | from . import screed_tst_utils as utils
 7 | 
 8 | 
 9 | def test_make_db():
10 |     _testfa = utils.get_temp_filename('test.fa')
11 |     shutil.copy(utils.get_test_data('test.fa'), _testfa)
12 |     screed.make_db(_testfa)
13 | 
14 |     db = screed.ScreedDB(_testfa)
15 | 
16 |     os.unlink(_testfa + fileExtension)
17 | 
18 | 
19 | def test_no_sqlite_openscreed():
20 |     import screed.openscreed
21 | 
22 |     saveme = screed.openscreed.sqlite3
23 |     del screed.openscreed.sqlite3
24 | 
25 |     try:
26 |         try:
27 |             screed.openscreed.ScreedDB('xxx')
28 |         except Exception as e:
29 |             assert 'sqlite3 is needed' in str(e)
30 |     finally:
31 |         screed.openscreed.sqlite3 = saveme
32 | 
33 | 
34 | def test_no_sqlite_createscreed():
35 |     import screed.createscreed
36 | 
37 |     saveme = screed.createscreed.sqlite3
38 |     del screed.createscreed.sqlite3
39 | 
40 |     try:
41 |         try:
42 |             screed.createscreed.create_db(None, None, None)
43 |         except Exception as e:
44 |             assert 'sqlite3 is needed' in str(e)
45 |     finally:
46 |         screed.createscreed.sqlite3 = saveme
47 | 
48 | 
49 | def test_nodb():
50 |     """
51 |     Tests if screed throws an appropriate exception if it is
52 |     asked to open a non-existant screed database
53 |     """
54 |     try:
55 |         db = screed.ScreedDB('foo')
56 |         assert 1 == 0  # Previous line should throw an error
57 |     except ValueError:
58 |         pass
59 | 
60 | 
61 | def test_wrongdb():
62 |     """
63 |     Tests if screed throws an appropriate exception if it is
64 |     asked to open a file that isn't a screed database
65 |     """
66 |     try:
67 |         blah = 'blah_screed'
68 |         blah_file = open(blah, 'wb')
69 |         blah_file.close()
70 | 
71 |         db = screed.ScreedDB(blah)
72 |         os.unlink(blah)
73 |         assert 1 == 0
74 |     except TypeError:
75 |         os.unlink(blah)
76 |         pass
77 | 


--------------------------------------------------------------------------------
/screed/tests/test_dictionary.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import os
  3 | import screed
  4 | from screed.DBConstants import fileExtension
  5 | from . import screed_tst_utils as utils
  6 | import shutil
  7 | 
  8 | 
  9 | class Test_dict_methods(object):
 10 | 
 11 |     """
 12 |     Make sure that screed returns sensible results for standard dictionary
 13 |     queries.
 14 |     """
 15 | 
 16 |     def setup_method(self):
 17 |         self._testfa = utils.get_temp_filename('test.fa')
 18 |         shutil.copy(utils.get_test_data('test.fa'), self._testfa)
 19 | 
 20 |         screed.read_fasta_sequences(self._testfa)
 21 |         self.db = screed.ScreedDB(self._testfa)
 22 | 
 23 |     def teardown_method(self):
 24 |         os.unlink(self._testfa + fileExtension)
 25 | 
 26 |     def test_iter_stuff(self):
 27 |         db = self.db
 28 |         keys = db.keys()
 29 |         ikeys = list(db.iterkeys())
 30 |         assert all(key in ikeys for key in keys)
 31 | 
 32 |         values = db.values()
 33 |         ivalues = list(db.itervalues())
 34 |         assert all(value in ivalues for value in values)
 35 | 
 36 |         items = db.items()
 37 |         iitems = list(db.iteritems())
 38 |         assert all(item in iitems for item in items)
 39 | 
 40 |     def test_contains(self):
 41 |         for k in self.db:
 42 |             assert k in self.db
 43 | 
 44 |         assert db.get('FOO') is None
 45 | 
 46 |         assert 'FOO' not in self.db
 47 | 
 48 |     def test_contains(self):
 49 |         for k in self.db:
 50 |             assert k in self.db
 51 | 
 52 |         assert 'FOO' not in self.db
 53 | 
 54 |     def test_get(self):
 55 |         for k in self.db:
 56 |             record = self.db.get(k)
 57 |             assert record.name == k
 58 | 
 59 |             record = self.db[k]
 60 |             assert record.name == k
 61 | 
 62 |         try:
 63 |             self.db['FOO']
 64 |             assert False, "the previous line should raise a KeyError"
 65 |         except KeyError:
 66 |             pass
 67 | 
 68 |     def test_missing(self):
 69 |         """
 70 |         Make sure that unsupported dict attributes are actually missing.
 71 |         """
 72 |         db = self.db
 73 | 
 74 |         try:
 75 |             db.clear()
 76 |             assert 0
 77 |         except NotImplementedError:
 78 |             pass
 79 | 
 80 |         try:
 81 |             db.update({})
 82 |             assert 0
 83 |         except NotImplementedError:
 84 |             pass
 85 | 
 86 |         try:
 87 |             db.setdefault(None)
 88 |             assert 0
 89 |         except NotImplementedError:
 90 |             pass
 91 | 
 92 |         try:
 93 |             db.pop()
 94 |             assert 0
 95 |         except NotImplementedError:
 96 |             pass
 97 | 
 98 |         try:
 99 |             db.popitem()
100 |             assert 0
101 |         except NotImplementedError:
102 |             pass
103 | 


--------------------------------------------------------------------------------
/screed/tests/test_dna.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import screed
 3 | from screed.DBConstants import fileExtension
 4 | 
 5 | 
 6 | class Test_dna(object):
 7 | 
 8 |     """Tests the dna module of screed"""
 9 |     def test_is_DNA(args):
10 |         valid_DNA_str = "ATCCG"
11 |         invalid_DNA_str = "ATXXG"
12 |         assert screed.dna.is_DNA(valid_DNA_str)
13 |         assert not screed.dna.is_DNA(invalid_DNA_str)
14 | 
15 |     def test_complement(args):
16 |         dna = "ATCCG"
17 |         comp = "TAGGC"
18 |         assert screed.dna.complement(dna) == comp
19 | 
20 |     def test_reverse(args):
21 |         dna = "ATCCG"
22 |         reverse = "GCCTA"
23 |         assert screed.dna.reverse(dna) == reverse
24 | 
25 |     def test_reverse_complement(args):
26 |         dna = "ATCCG"
27 |         reverse_complement = "CGGAT"
28 |         assert screed.dna.reverse_complement(dna) == reverse_complement
29 | 


--------------------------------------------------------------------------------
/screed/tests/test_fasta.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, unicode_literals
  2 | import screed
  3 | from screed.DBConstants import fileExtension
  4 | from screed.screedRecord import write_fastx
  5 | import os
  6 | from io import StringIO
  7 | from io import BytesIO
  8 | from . import screed_tst_utils as utils
  9 | import shutil
 10 | 
 11 | 
 12 | class FakeRecord(object):
 13 |     """Empty extensible object"""
 14 |     pass
 15 | 
 16 | 
 17 | def test_new_record():
 18 |     # test for a bug where the record dict was not reset after each
 19 |     # sequence load, leading to all records being identical if you
 20 |     # kept a handle on the returned dictionary.
 21 | 
 22 |     s = StringIO(">1\nACTG\n>2\nACGG\n")
 23 | 
 24 |     records = list(iter(screed.fasta.fasta_iter(s)))
 25 |     assert records[0]['name'] == '1'
 26 |     assert records[1]['name'] == '2'
 27 |     assert not hasattr(records[0], 'accuracy')   # check for legacy attribute
 28 | 
 29 | 
 30 | class Test_fasta(object):
 31 | 
 32 |     def setup_method(self):
 33 |         self._testfa = utils.get_temp_filename('test.fa')
 34 |         shutil.copy(utils.get_test_data('test.fa'), self._testfa)
 35 | 
 36 |         screed.read_fasta_sequences(self._testfa)
 37 |         self.db = screed.ScreedDB(self._testfa)
 38 | 
 39 |     def teardown_method(self):
 40 |         os.unlink(self._testfa + fileExtension)
 41 | 
 42 |     def test_length(self):
 43 |         assert len(self.db) == 22
 44 | 
 45 |     def test_keys(self):
 46 |         for key in self.db:
 47 |             assert key == self.db[key].name
 48 | 
 49 |     def test_id_retrieval(self):
 50 |         for key in self.db:
 51 |             record = self.db[key]
 52 |             intRcrd = self.db.loadRecordByIndex(record.id)
 53 |             assert record == intRcrd
 54 | 
 55 |     def test_length_2(self):
 56 |         read = self.db[self.db.keys()[0]]
 57 | 
 58 |         assert len(read) == len(read.sequence)
 59 | 
 60 |     def test_contains_front(self):
 61 |         first = self.db[self.db.keys()[0]]
 62 |         assert first.id == 0
 63 |         assert first.name == 'ENSMICT00000012722'
 64 |         assert first.description == 'cdna:pseudogene scaffold:micMur1:'\
 65 |             'scaffold_185008:9:424:1 gene:ENSMICG00000012730'
 66 |         assert str(first.sequence).startswith('TGCAGAAAATATCAAGAGTCAGC'
 67 |                                               'AGAAAAACTATACAAGGGCTGGT'
 68 |                                               'ATTTTGATTATTCT')
 69 | 
 70 |     def test_contains_middle(self):
 71 |         middle = self.db[self.db.keys()[10]]
 72 |         assert middle.id == 10
 73 |         assert middle.name == 'ENSMICT00000012078'
 74 |         assert middle.description == 'cdna:pseudogene scaffold:micMur1'\
 75 |             ':scaffold_180699:3:774:-1 gene:ENSMICG00000012085'
 76 |         assert str(middle.sequence).startswith('GCGCACTCCCAGTGGCTACCCA'
 77 |                                                'CGGCAGGAGGCGGCGGCAGTGA'
 78 |                                                'CTGGGCCGGCGGCCCG')
 79 | 
 80 |     def test_contains_end(self):
 81 |         end = self.db[self.db.keys()[21]]
 82 |         assert end.id == 21
 83 |         assert end.name == 'ENSMICT00000003880'
 84 |         assert end.description == 'cdna:novel scaffold:micMur1:scaffol'\
 85 |             'd_175819:130:631:1 gene:ENSMICG00000003884'
 86 |         assert str(end.sequence).startswith('ATGCTGCCTAAGTTTGACCCCAACG'
 87 |                                             'CGATCAAAGTCATGTACCTGAGGTG'
 88 |                                             'CACGGGTGGC')
 89 | 
 90 |     def test_contains(self):
 91 |         for k in self.db:
 92 |             assert k in self.db
 93 | 
 94 |         assert self.db.get('FOO') is None
 95 | 
 96 |         assert 'FOO' not in self.db
 97 | 
 98 |     def test_iterv(self):
 99 |         entries = []
100 |         for entry in self.db:
101 |             entries.append(self.db[entry])
102 | 
103 |         ivalues = list(self.db.itervalues())
104 |         assert all(entry in ivalues for entry in entries)
105 | 
106 |     def test_iteri(self):
107 |         for id, entry in self.db.iteritems():
108 |             assert id == self.db[entry.name].id
109 |             assert entry == self.db[entry.name]
110 | 
111 | 
112 | class Test_fasta_whitespace(object):
113 | 
114 |     def setup_method(self):
115 |         self._testfa = utils.get_temp_filename('test-whitespace.fa')
116 |         shutil.copy(utils.get_test_data('test-whitespace.fa'), self._testfa)
117 | 
118 |         screed.read_fasta_sequences(self._testfa)
119 |         self.db = screed.ScreedDB(self._testfa)
120 | 
121 |     def test_for_omitted_record(self):
122 |         assert 'ENSMICT00000012401' in self.db
123 | 
124 |     def teardown_method(self):
125 |         os.unlink(self._testfa + fileExtension)
126 | 
127 | 
128 | def test_output_sans_desc():
129 |     read = FakeRecord()
130 |     read.name = 'foo'
131 |     read.sequence = 'ATCG'
132 | 
133 |     fileobj = BytesIO()
134 |     write_fastx(read, fileobj)
135 |     assert fileobj.getvalue().decode('utf-8') == '>foo\nATCG\n'
136 | 
137 | 
138 | def test_output_with_desc():
139 |     read = FakeRecord()
140 |     read.name = 'foo'
141 |     read.description = 'bar'
142 |     read.sequence = 'ATCG'
143 | 
144 |     fileobj = BytesIO()
145 |     write_fastx(read, fileobj)
146 |     assert fileobj.getvalue().decode('utf-8') == '>foo bar\nATCG\n'
147 | 
148 | 
149 | def test_output_two_reads():
150 |     fileobj = BytesIO()
151 |     for i in range(2):
152 |         read = FakeRecord()
153 |         read.name = 'seq{}'.format(i)
154 |         read.sequence = 'GATTACA' * (i + 1)
155 |         write_fastx(read, fileobj)
156 |     testoutput = '>seq0\nGATTACA\n>seq1\nGATTACAGATTACA\n'
157 |     assert fileobj.getvalue().decode('utf-8') == testoutput
158 | 
159 | 
160 | def test_fasta_slicing():
161 |     testfa = utils.get_temp_filename('test.fa')
162 |     shutil.copy(utils.get_test_data('test.fa'), testfa)
163 | 
164 |     with screed.open(testfa) as sequences:
165 |         record = next(sequences)
166 | 
167 |     trimmed = record[:10]
168 |     assert trimmed['sequence'] == "TGCAGAAAAT"
169 | 
170 |     for s in (slice(5, 10), slice(2, 26), slice(5, -1, 2),
171 |               slice(-2, -10, 1), slice(-1, 5, 2), slice(5)):
172 |         trimmed = record[s]
173 | 
174 |         assert trimmed['name'] == record['name']
175 |         assert trimmed.name == record.name
176 | 
177 |         assert trimmed['description'] == record['description']
178 |         assert trimmed.description == record.description
179 | 
180 |         assert trimmed['sequence'] == record['sequence'][s]
181 |         assert trimmed.sequence == record.sequence[s]
182 | 


--------------------------------------------------------------------------------
/screed/tests/test_fasta_recover.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from . import test_fasta
 3 | import os
 4 | import screed
 5 | from screed.DBConstants import fileExtension
 6 | from . import screed_tst_utils as utils
 7 | import shutil
 8 | 
 9 | 
10 | class test_fa_recover(test_fasta.Test_fasta):
11 | 
12 |     def setup_method(self):
13 |         self._fileName = utils.get_temp_filename('fastaRecovery')
14 | 
15 |         self._testfa = utils.get_temp_filename('test.fa')
16 |         shutil.copy(utils.get_test_data('test.fa'), self._testfa)
17 | 
18 |         screed.read_fasta_sequences(self._testfa)
19 |         screed.ToFasta(self._testfa, self._fileName)
20 |         screed.read_fasta_sequences(self._fileName)
21 |         self.db = screed.ScreedDB(self._fileName)
22 | 
23 |     def teardown_method(self):
24 |         os.unlink(self._fileName)
25 |         os.unlink(self._fileName + fileExtension)
26 |         os.unlink(self._testfa + fileExtension)
27 | 


--------------------------------------------------------------------------------
/screed/tests/test_fastq.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, unicode_literals
  2 | import screed
  3 | from screed.DBConstants import fileExtension
  4 | from screed.screedRecord import write_fastx
  5 | import os
  6 | from io import StringIO
  7 | from io import BytesIO
  8 | from . import screed_tst_utils as utils
  9 | import shutil
 10 | import pytest
 11 | 
 12 | 
 13 | class FakeRecord(object):
 14 |     """Empty extensible object"""
 15 |     pass
 16 | 
 17 | 
 18 | def test_new_record():
 19 |     # test for a bug where the record dict was not reset after each
 20 |     # sequence load, leading to all records being identical if you
 21 |     # kept a handle on the returned dictionary.
 22 | 
 23 |     s = StringIO("@1\nACTG\n+\nAAAA\n@2\nACGG\n+\nAAAA\n")
 24 | 
 25 |     records = list(iter(screed.fastq.fastq_iter(s)))
 26 |     assert records[0]['name'] == '1'
 27 |     assert records[1]['name'] == '2'
 28 | 
 29 | 
 30 | def test_parse_description_true():
 31 |     # test for a bug where the record dict was not reset after each
 32 |     # sequence load, leading to all records being identical if you
 33 |     # kept a handle on the returned dictionary.
 34 | 
 35 |     s = StringIO("@1 FOO\nACTG\n+\nAAAA\n@2\nACGG\n+\nAAAA\n")
 36 | 
 37 |     records = list(iter(screed.fastq.fastq_iter(s, parse_description=True)))
 38 |     assert records[0]['name'] == '1'
 39 |     assert records[1]['name'] == '2'
 40 | 
 41 | 
 42 | def test_parse_description_false():
 43 |     # test for a bug where the record dict was not reset after each
 44 |     # sequence load, leading to all records being identical if you
 45 |     # kept a handle on the returned dictionary.
 46 | 
 47 |     s = StringIO("@1 FOO\nACTG\n+\nAAAA\n@2\nACGG\n+\nAAAA\n")
 48 | 
 49 |     records = list(iter(screed.fastq.fastq_iter(s, parse_description=False)))
 50 |     assert records[0]['name'] == '1 FOO'
 51 |     assert records[1]['name'] == '2'
 52 | 
 53 |     # also is default behavior
 54 |     s = StringIO("@1 FOO\nACTG\n+\nAAAA\n@2\nACGG\n+\nAAAA\n")
 55 | 
 56 |     records = list(iter(screed.fastq.fastq_iter(s)))
 57 |     assert records[0]['name'] == '1 FOO'
 58 |     assert records[1]['name'] == '2'
 59 | 
 60 | 
 61 | class Test_fastq(object):
 62 | 
 63 |     def setup_method(self):
 64 |         self._testfq = utils.get_temp_filename('test.fastq')
 65 |         shutil.copy(utils.get_test_data('test.fastq'), self._testfq)
 66 | 
 67 |         screed.read_fastq_sequences(self._testfq)
 68 |         self.db = screed.ScreedDB(self._testfq)
 69 | 
 70 |     def teardown_method(self):
 71 |         os.unlink(self._testfq + fileExtension)
 72 | 
 73 |     def test_length(self):
 74 |         assert len(self.db) == 125
 75 | 
 76 |     def test_keys(self):
 77 |         for key in self.db:
 78 |             assert key == self.db[key].name
 79 | 
 80 |     def test_id_retrieval(self):
 81 |         for key in self.db:
 82 |             record = self.db[key]
 83 |             intRcrd = self.db.loadRecordByIndex(record.id)
 84 |             assert record == intRcrd
 85 | 
 86 |     def test_contains_front(self):
 87 |         first = self.db[self.db.keys()[0]]
 88 |         assert first.id == 0
 89 |         assert first.name == 'HWI-EAS_4_PE-FC20GCB:2:1:492:573/2'
 90 |         assert first.sequence == 'ACAGCAAAATTGTGATTGAGGATGAAGAACTGCTGT'
 91 |         assert first.quality == 'AA7AAA3+AAAAAA.AAA.;7;AA;;;;*;<1;<<<'
 92 | 
 93 |     def test_contains_middle(self):
 94 |         middle = self.db[self.db.keys()[62]]
 95 |         assert middle.id == 62
 96 |         assert middle.name == 'HWI-EAS_4_PE-FC20GCB:2:1:245:483/2'
 97 |         assert middle.sequence == 'TGTCGAGCAAAGCAAAACAGGCGTAAAAATTGCCAT'
 98 |         assert middle.quality == 'AAAAAAAAAAAAAAAAAAAAA>AAAAAAAA?9>6><'
 99 | 
100 |     def test_contains_end(self):
101 |         end = self.db[self.db.keys()[124]]
102 |         assert end.id == 124
103 |         assert end.name == 'HWI-EAS_4_PE-FC20GCB:2:1:350:588/2'
104 |         assert end.sequence == 'GGTACAAAATAGATGCTGGACTCTCCGAATCCTATA'
105 |         assert end.quality == ';?5AAAAAAAAAA?A??;?AA;AAA>AAAA?4?844'
106 | 
107 |     def test_contains(self):
108 |         for k in self.db:
109 |             assert k in self.db
110 | 
111 |         assert self.db.get('FOO') is None
112 | 
113 |         assert 'FOO' not in self.db
114 | 
115 |     def test_iterv(self):
116 |         entries = []
117 |         for entry in self.db:
118 |             entries.append(self.db[entry])
119 | 
120 |         ivalues = list(self.db.itervalues())
121 |         assert all(entry in ivalues for entry in entries)
122 | 
123 |     def test_iteri(self):
124 |         for id, entry in self.db.iteritems():
125 |             assert id == self.db[entry.name].id
126 |             assert entry == self.db[entry.name]
127 | 
128 | 
129 | def test_output_sans_desc():
130 |     read = FakeRecord()
131 |     read.name = 'foo'
132 |     read.sequence = 'ATCG'
133 |     read.quality = '####'
134 | 
135 |     fileobj = BytesIO()
136 |     write_fastx(read, fileobj)
137 |     assert fileobj.getvalue().decode('utf-8') == '@foo\nATCG\n+\n####\n'
138 | 
139 | 
140 | def test_output_with_desc():
141 |     read = FakeRecord()
142 |     read.name = 'foo'
143 |     read.description = 'bar'
144 |     read.sequence = 'ATCG'
145 |     read.quality = '####'
146 | 
147 |     fileobj = BytesIO()
148 |     write_fastx(read, fileobj)
149 |     assert fileobj.getvalue().decode('utf-8') == '@foo bar\nATCG\n+\n####\n'
150 | 
151 | 
152 | def test_output_two_reads():
153 |     fileobj = BytesIO()
154 |     for i in range(2):
155 |         read = FakeRecord()
156 |         read.name = 'seq{}'.format(i)
157 |         read.sequence = 'GATTACA' * (i + 1)
158 |         read.quality = '#######' * (i + 1)
159 |         write_fastx(read, fileobj)
160 |     testoutput = ('@seq0\nGATTACA\n+\n#######\n'
161 |                   '@seq1\nGATTACAGATTACA\n+\n##############\n')
162 |     assert fileobj.getvalue().decode('utf-8') == testoutput
163 | 
164 | 
165 | def test_output_bad_mode():
166 |     read = FakeRecord()
167 |     read.name = 'foo'
168 |     read.description = 'bar'
169 |     read.sequence = 'ATCG'
170 |     read.quality = '####'
171 | 
172 |     fileobj = StringIO()
173 |     with pytest.raises(AttributeError) as ae:
174 |         write_fastx(read, fileobj)
175 |     assert 'cannot call "write_fastx" on object' in str(ae)
176 | 
177 | 
178 | def test_fastq_slicing():
179 |     testfq = utils.get_temp_filename('test.fastq')
180 |     shutil.copy(utils.get_test_data('test.fastq'), testfq)
181 | 
182 |     with screed.open(testfq) as sequences:
183 |         record = next(sequences)
184 | 
185 |     trimmed = record[:10]
186 |     assert trimmed['sequence'] == "ACAGCAAAAT"
187 |     assert trimmed['quality'] == "AA7AAA3+AA"
188 | 
189 |     for s in (slice(5, 10), slice(2, 26), slice(5, -1, 2),
190 |               slice(-2, -10, 1), slice(-1, 5, 2), slice(5)):
191 |         trimmed = record[s]
192 | 
193 |         assert trimmed['name'] == record['name']
194 |         assert trimmed.name == record.name
195 | 
196 |         assert trimmed['sequence'] == record['sequence'][s]
197 |         assert trimmed.sequence == record.sequence[s]
198 | 
199 |         assert trimmed['quality'] == record['quality'][s]
200 |         assert trimmed.quality == record.quality[s]
201 | 


--------------------------------------------------------------------------------
/screed/tests/test_fastq_recover.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from . import test_fastq
 3 | import os
 4 | import screed
 5 | from screed.DBConstants import fileExtension
 6 | from . import screed_tst_utils as utils
 7 | import shutil
 8 | 
 9 | 
10 | class test_fq_recover(test_fastq.Test_fastq):
11 | 
12 |     def setup_method(self):
13 |         self._fileName = utils.get_temp_filename('fastqRecovery')
14 | 
15 |         self._testfq = utils.get_temp_filename('test.fastq')
16 |         shutil.copy(utils.get_test_data('test.fastq'), self._testfq)
17 | 
18 |         screed.read_fastq_sequences(self._testfq)
19 |         screed.ToFastq(self._testfq, self._fileName)
20 |         screed.read_fastq_sequences(self._fileName)
21 |         self.db = screed.ScreedDB(self._fileName)
22 | 
23 |     def teardown_method(self):
24 |         os.unlink(self._fileName)
25 |         os.unlink(self._fileName + fileExtension)
26 |         os.unlink(self._testfq + fileExtension)
27 | 


--------------------------------------------------------------------------------
/screed/tests/test_hava_methods.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import screed
 3 | import screed.seqparse
 4 | from screed.DBConstants import fileExtension
 5 | import os
 6 | from . import screed_tst_utils as utils
 7 | import shutil
 8 | 
 9 | testha = utils.get_temp_filename('test.hava')
10 | shutil.copy(utils.get_test_data('test.hava'), testha)
11 | 
12 | 
13 | class test_hava(object):
14 | 
15 |     def setup_method(self):
16 |         screed.seqparse.read_hava_sequences(testha)
17 |         self._db = screed.ScreedDB(testha)
18 | 
19 |     def teardown_method(self):
20 |         b = 7
21 |         # os.unlink(testha + fileExtension)
22 | 
23 |     def test_contains(self):
24 |         assert 'test_006' in self._db
25 | 
26 |     def test_beginning_key_retrieval(self):
27 |         result = self._db['test_000']
28 |         assert result.hava == 'test_000'
29 |         assert result.quarzk == 'ACGGTGACGGTCACCGTCGACGGCCCAAGCCCATCGAACG'\
30 |             'TACCACCCCCACCTATCGTCACGCTGGTGGAGAGCCAATG'
31 |         assert result.muchalo == 'AFPPCLHBCCILGMMOCHKNNDBKCCPNHAMKJOCCDJA'\
32 |             'OEPNMHFHCBAJOKEMMMBHCPHIOAEPFFCAOJPGIMKGK'
33 |         assert result.fakours == '218583165871861127719451483455294521865'\
34 |             '68176931571171542294878855181415261425688'
35 |         assert result.selimizicka == 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'\
36 |             'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
37 |         assert result.marshoon == 'C7AF246AC7AAEABE5A557FCBC6FD5F5263BCDE'\
38 |             '4E745BEF1GG7DD1AB511GBC63A4GF1F4E1A154B35D'
39 | 
40 |     def test_middle_key_retrieval(self):
41 |         result = self._db['test_0063']
42 |         assert result.hava == 'test_0063'
43 |         assert result.quarzk == 'CAACACGATCAAGTTTGGTAAGAATTCCGCCTTAAGCTTT'\
44 |             'CTAGAACGATAGTTGCCCCCAATCTGGTTCGAAATCTCTT'
45 |         assert result.muchalo == 'GMDAPLMOOFANDHHMLBPIKGHIAFFFOABFMNNJNIJ'\
46 |             'ILEEFEPOCAJLNDLIFBPMGKOFJIEFAHNJPIOFAJMLM'
47 |         assert result.fakours == '392363971393898522756138876485334274384'\
48 |             '39122136418369146118333919885587613673488'
49 |         assert result.selimizicka == 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'\
50 |             'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
51 |         assert result.marshoon == 'FC25E2CFC2BAFA7A2AA4757F3GFFFEE37G7752'\
52 |             'FCDBAEADBA1AC7374FB5C15552E6E2GG6GFF62C6GE'
53 | 
54 |     def test_end_key_retrieval(self):
55 |         result = self._db['test_00124']
56 |         assert result.hava == 'test_00124'
57 |         assert result.quarzk == 'ATCGCAACCGTTTCCCCTATCTGGCAATTGAATCCGCGTC'\
58 |             'CTAAAACGAAAGCTTATCCCTGGCGAGGCACGCTAGGCCT'
59 |         assert result.muchalo == 'CIHNCECANFNLKGCHNOEHJDHADHPAEMMNKGMMMPD'\
60 |             'OBMOCKNBCMCPHEBEOINHMBMMGCHEMOIOAPEFPDDJP'
61 |         assert result.fakours == '327364511483537131695325595876269716778'\
62 |             '14946924334424648676283848861393812686731'
63 |         assert result.selimizicka == 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'\
64 |             'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
65 |         assert result.marshoon == '4FE5FDD76CC5DE4DC2F25AA2GFBD7BEG326C6D'\
66 |             '7AB5B71GA67BAFD63AE1A562CDC1C2D157G6EF17CD'
67 | 


--------------------------------------------------------------------------------
/screed/tests/test_open.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2008-2015, Michigan State University
  2 | """
  3 | Test `screed.open`.
  4 | """
  5 | 
  6 | from __future__ import absolute_import
  7 | 
  8 | import os.path
  9 | import sys
 10 | import subprocess
 11 | 
 12 | from . import screed_tst_utils as utils
 13 | import screed
 14 | import screed.openscreed
 15 | 
 16 | 
 17 | def test_empty_open():
 18 |     filename = utils.get_test_data('empty.fa')
 19 |     assert len(list(screed.open(filename))) == 0
 20 | 
 21 | 
 22 | def test_open_maps_dash():
 23 |     """Test mapping of '-'."""
 24 |     #  pylint: disable=protected-access
 25 |     filename = '-'
 26 |     mapped = screed.openscreed._normalize_filename(filename)
 27 | 
 28 |     assert '/dev/stdin' == mapped
 29 | 
 30 | 
 31 | def test_open_stdin():
 32 |     """Test feeding data through stdin.
 33 | 
 34 |     Uses a subprocess with the data file directlyused as stdin."""
 35 |     filename1 = utils.get_test_data('test.fa')
 36 |     command = ["python", "-c", "from __future__ import print_function;"
 37 |                "import screed; print(list(screed.open('-')))"]
 38 |     with open(filename1, 'rb') as data_file:
 39 |         output = subprocess.Popen(command,
 40 |                                   stdin=data_file, universal_newlines=True,
 41 |                                   stdout=subprocess.PIPE).communicate()[0]
 42 |         assert "'name': 'ENSMICT00000012722'" \
 43 |             or "'name': u'ENSMICT00000012722'" in output, output
 44 | 
 45 | 
 46 | def test_simple_open():
 47 |     filename = utils.get_test_data('test.fa')
 48 | 
 49 |     n = -1
 50 |     for n, record in enumerate(screed.open(filename, parse_description=True)):
 51 |         assert record.name == 'ENSMICT00000012722'
 52 |         break
 53 |     assert n == 0, n
 54 | 
 55 | 
 56 | def test_simple_open_fq():
 57 |     filename = utils.get_test_data('test.fastq')
 58 | 
 59 |     n = -1
 60 |     for n, record in enumerate(screed.open(filename)):
 61 |         assert record.name == 'HWI-EAS_4_PE-FC20GCB:2:1:492:573/2'
 62 |         break
 63 |     assert n == 0
 64 | 
 65 | 
 66 | def test_gz_open():
 67 |     filename1 = utils.get_test_data('test.fa')
 68 |     filename2 = utils.get_test_data('test.fa.gz')
 69 |     for n, (r1, r2) in enumerate(zip(screed.open(filename1),
 70 |                                      screed.open(filename2))):
 71 |         assert r1.name == r2.name
 72 | 
 73 |     assert n > 0
 74 | 
 75 | 
 76 | def test_bz2_open():
 77 |     filename1 = utils.get_test_data('test.fa')
 78 |     filename2 = utils.get_test_data('test.fa.bz2')
 79 |     for n, (r1, r2) in enumerate(zip(screed.open(filename1),
 80 |                                      screed.open(filename2))):
 81 |         assert r1.name == r2.name
 82 | 
 83 |     assert n > 0
 84 | 
 85 | 
 86 | def test_gz_open_fastq():
 87 |     filename1 = utils.get_test_data('test.fastq')
 88 |     filename2 = utils.get_test_data('test.fastq.gz')
 89 |     for n, (r1, r2) in enumerate(zip(screed.open(filename1),
 90 |                                      screed.open(filename2))):
 91 |         assert r1.name == r2.name
 92 | 
 93 |     assert n > 0
 94 | 
 95 | 
 96 | def test_unknown_fileformat():
 97 | 
 98 |     try:
 99 |         screed.open(__file__)
100 |     except ValueError as err:
101 |         assert "unknown file format" in str(err)
102 | 


--------------------------------------------------------------------------------
/screed/tests/test_open_cm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2008-2015, Michigan State University
 2 | """
 3 | Test the use of `screed.open` as a ContextManager.
 4 | """
 5 | 
 6 | from . import screed_tst_utils as utils
 7 | import screed
 8 | import screed.openscreed
 9 | 
10 | 
11 | def test_empty_open():
12 |     filename = utils.get_test_data('empty.fa')
13 |     with screed.open(filename) as f:
14 |         assert len(list(f)) == 0
15 | 
16 | 
17 | def test_simple_open():
18 |     filename = utils.get_test_data('test.fa')
19 | 
20 |     n = -1
21 |     with screed.open(filename, parse_description=True) as f:
22 |         for n, record in enumerate(f):
23 |             assert record.name == 'ENSMICT00000012722'
24 |             break
25 | 
26 |         assert n == 0, n
27 | 
28 | 
29 | def test_simple_close():
30 |     filename = utils.get_test_data('test.fa')
31 | 
32 |     n = -1
33 |     f = screed.open(filename, parse_description=True)
34 |     for n, record in enumerate(f):
35 |         assert record.name == 'ENSMICT00000012722'
36 |         break
37 | 
38 |     assert n == 0, n
39 |     f.close()
40 | 
41 | 
42 | def test_simple_open_fq():
43 |     filename = utils.get_test_data('test.fastq')
44 | 
45 |     n = -1
46 |     with screed.open(filename) as f:
47 |         for n, record in enumerate(f):
48 |             assert record.name == 'HWI-EAS_4_PE-FC20GCB:2:1:492:573/2'
49 |             break
50 | 
51 |         assert n == 0
52 | 
53 | 
54 | def test_gz_open():
55 |     filename1 = utils.get_test_data('test.fa')
56 |     filename2 = utils.get_test_data('test.fa.gz')
57 |     with screed.open(filename1) as f1, screed.open(filename2) as f2:
58 |         for n, (r1, r2) in enumerate(zip(f1, f2)):
59 |             assert r1.name == r2.name
60 | 
61 |         assert n > 0
62 | 
63 | 
64 | def test_bz2_open():
65 |     filename1 = utils.get_test_data('test.fa')
66 |     filename2 = utils.get_test_data('test.fa.bz2')
67 |     with screed.open(filename1) as f1, screed.open(filename2) as f2:
68 |         for n, (r1, r2) in enumerate(zip(f1, f2)):
69 |             assert r1.name == r2.name
70 | 
71 |         assert n > 0
72 | 
73 | 
74 | def test_gz_open_fastq():
75 |     filename1 = utils.get_test_data('test.fastq')
76 |     filename2 = utils.get_test_data('test.fastq.gz')
77 |     with screed.open(filename1) as f1, screed.open(filename2) as f2:
78 |         for n, (r1, r2) in enumerate(zip(f1, f2)):
79 |             assert r1.name == r2.name
80 | 
81 |         assert n > 0
82 | 
83 | 
84 | def test_unknown_fileformat():
85 |     try:
86 |         with screed.open(__file__):
87 |             pass
88 |     except ValueError as err:
89 |         assert "unknown file format" in str(err)
90 | 


--------------------------------------------------------------------------------
/screed/tests/test_pygr_api.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, unicode_literals
 2 | 
 3 | import pytest
 4 | pygr = pytest.importorskip("pygr")
 5 | 
 6 | import screed  # nopep8
 7 | from screed.DBConstants import fileExtension  # nopep8
 8 | from screed.pygr_api import ScreedSequenceDB, ScreedSequenceDB_ByIndex  # nopep8
 9 | from pickle import dump, load  # nopep8
10 | from io import StringIO  # nopep8
11 | import os  # nopep8
12 | 
13 | testfa = os.path.join(os.path.dirname(__file__), 'test.fa')
14 | 
15 | 
16 | def setup():
17 |     screed.read_fasta_sequences(testfa)
18 | 
19 | 
20 | def teardown():
21 |     os.unlink(testfa + fileExtension)
22 | 
23 | 
24 | def test_name_iterator_methods():
25 |     db = ScreedSequenceDB(testfa)
26 | 
27 |     # test the various iterator methods for equal results from db
28 |     a = sorted([(x, db[x]) for x in db])
29 |     b = sorted([i for i in db.iteritems()])
30 |     c = sorted([(v.name, v) for v in db.itervalues()])
31 | 
32 |     assert a == b
33 |     assert a == c
34 | 
35 | 
36 | def test_index_iterator_methods():
37 |     db = ScreedSequenceDB_ByIndex(testfa)
38 | 
39 |     # test the various iterator methods for equal results from db
40 |     m = sorted([(x, db[x]) for x in db])
41 |     n = sorted([i for i in db.iteritems()])
42 |     o = sorted([(v.record.id, v) for v in db.itervalues()])
43 | 
44 |     assert m == n
45 |     assert m == o, (m, o)
46 | 
47 | 
48 | def test_name_index_equality():
49 |     db1 = ScreedSequenceDB(testfa)
50 |     db2 = ScreedSequenceDB_ByIndex(testfa)
51 | 
52 |     # must use something other than the obj itself for comparison...
53 |     v1 = sorted([(v.name, v.seq) for v in db1.itervalues()])
54 |     v2 = sorted([(v.name, v.seq) for v in db2.itervalues()])
55 |     assert v1 == v2, (v1, v2)
56 | 
57 | 
58 | def test_seqinfodict_by_name():
59 |     db1 = ScreedSequenceDB(testfa)
60 |     sd = db1.seqInfoDict
61 | 
62 |     m = sorted([y.id for (x, y) in sd.iteritems()])
63 |     n = sorted([x.id for x in sd.itervalues()])
64 | 
65 |     assert m == n, (m, n)
66 | 
67 | 
68 | def test_seqinfodict_by_index():
69 |     db1 = ScreedSequenceDB_ByIndex(testfa)
70 |     sd = db1.seqInfoDict
71 | 
72 |     m = sorted([x for (x, y) in sd.iteritems()])
73 |     n = sorted([x for x in sd.iterkeys()])
74 | 
75 |     assert m == n, (m, n)
76 | 
77 | 
78 | def test_pickle_ByName():
79 |     db = ScreedSequenceDB(testfa)
80 |     ofp = StringIO()
81 | 
82 |     dump(db, ofp)
83 | 
84 |     ifp = StringIO(ofp.getvalue())
85 |     db2 = load(ifp)
86 |     assert db.filepath == db2.filepath
87 | 
88 | 
89 | def test_pickle_ByIndex():
90 |     db = ScreedSequenceDB_ByIndex(testfa)
91 |     ofp = StringIO()
92 | 
93 |     dump(db, ofp)
94 | 
95 |     ifp = StringIO(ofp.getvalue())
96 |     db2 = load(ifp)
97 |     assert db.filepath == db2.filepath
98 | 


--------------------------------------------------------------------------------
/screed/tests/test_record.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, unicode_literals, print_function
 2 | from screed import Record
 3 | import pytest
 4 | 
 5 | 
 6 | def test_create_quality_none():
 7 |     r = Record(name='foo', sequence='ATGACG', quality=None)
 8 |     assert not hasattr(r, 'quality')
 9 | 
10 | 
11 | def test_len():
12 |     r = Record(name='foo', sequence='ATGACG')
13 |     assert len(r) == 6
14 | 
15 | 
16 | # copied over from khmer tests/test_read_parsers.py
17 | def test_read_type_basic():
18 |     name = "895:1:1:1246:14654 1:N:0:NNNNN"
19 |     sequence = "ACGT"
20 |     r = Record(name, sequence)
21 | 
22 |     assert r.name == name
23 |     assert r.sequence == sequence
24 |     assert not hasattr(r, 'quality'), x
25 |     assert not hasattr(r, 'annotations'), x
26 | 
27 | 
28 | # copied over from khmer tests/test_read_parsers.py
29 | def test_read_type_attributes():
30 |     r = Record(sequence='ACGT', quality='good', name='1234', annotations='ann')
31 |     assert r.sequence == 'ACGT'
32 |     assert r.quality == 'good'
33 |     assert r.name == '1234'
34 |     assert r.annotations == 'ann'
35 | 


--------------------------------------------------------------------------------
/screed/tests/test_shell.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from . import test_fasta
  3 | from . import test_fastq
  4 | import os
  5 | import subprocess
  6 | import screed
  7 | from screed.DBConstants import fileExtension
  8 | from . import screed_tst_utils as utils
  9 | import shutil
 10 | 
 11 | 
 12 | class Test_fa_shell_command(test_fasta.Test_fasta):
 13 |     """
 14 |     Tests the functionality of the 'db' command in creating a
 15 |     screed database correctly from the shell
 16 |     """
 17 | 
 18 |     def setup_method(self):
 19 |         thisdir = os.path.dirname(__file__)
 20 | 
 21 |         self._testfa = utils.get_temp_filename('test.fa')
 22 |         shutil.copy(utils.get_test_data('test.fa'), self._testfa)
 23 | 
 24 |         cmd = ['screed', 'db', self._testfa]
 25 |         ret = subprocess.check_call(cmd, stdout=subprocess.PIPE)
 26 |         assert ret == 0, ret
 27 |         self.db = screed.ScreedDB(self._testfa)
 28 | 
 29 |     def teardown_method(self):
 30 |         os.unlink(self._testfa + fileExtension)
 31 | 
 32 | 
 33 | class Test_fq_shell_command(test_fastq.Test_fastq):
 34 | 
 35 |     """
 36 |     Tests the functionality of the 'db' command in creating a
 37 |     screed database correctly from the shell
 38 |     """
 39 | 
 40 |     def setup_method(self):
 41 |         thisdir = os.path.dirname(__file__)
 42 | 
 43 |         self._testfq = utils.get_temp_filename('test.fastq')
 44 |         shutil.copy(utils.get_test_data('test.fastq'), self._testfq)
 45 | 
 46 |         cmd = ['screed', 'db', self._testfq]
 47 |         ret = subprocess.check_call(cmd, stdout=subprocess.PIPE)
 48 |         assert ret == 0, ret
 49 |         self.db = screed.ScreedDB(self._testfq)
 50 | 
 51 |     def teardown_method(self):
 52 |         os.unlink(self._testfq + fileExtension)
 53 | 
 54 | 
 55 | class Test_fa_shell_module(test_fasta.Test_fasta):
 56 | 
 57 |     """
 58 |     Tests the functionality of the 'db' command in creating a
 59 |     screed database correctly from the shell
 60 |     """
 61 | 
 62 |     def setup_method(self):
 63 |         thisdir = os.path.dirname(__file__)
 64 | 
 65 |         self._testfa = utils.get_temp_filename('test.fa')
 66 |         shutil.copy(utils.get_test_data('test.fa'), self._testfa)
 67 | 
 68 |         cmd = ['python', '-m', 'screed', 'db', self._testfa]
 69 |         ret = subprocess.check_call(cmd, stdout=subprocess.PIPE)
 70 |         assert ret == 0, ret
 71 |         self.db = screed.ScreedDB(self._testfa)
 72 | 
 73 |     def teardown_method(self):
 74 |         os.unlink(self._testfa + fileExtension)
 75 | 
 76 | 
 77 | class Test_fq_shell_module(test_fastq.Test_fastq):
 78 | 
 79 |     """
 80 |     Tests the functionality of the 'db' command in creating a
 81 |     screed database correctly from the shell
 82 |     """
 83 | 
 84 |     def setup_method(self):
 85 |         thisdir = os.path.dirname(__file__)
 86 | 
 87 |         self._testfq = utils.get_temp_filename('test.fastq')
 88 |         shutil.copy(utils.get_test_data('test.fastq'), self._testfq)
 89 | 
 90 |         cmd = ['python', '-m', 'screed', 'db', self._testfq]
 91 |         ret = subprocess.check_call(cmd, stdout=subprocess.PIPE)
 92 |         assert ret == 0, ret
 93 |         self.db = screed.ScreedDB(self._testfq)
 94 | 
 95 |     def teardown_method(self):
 96 |         os.unlink(self._testfq + fileExtension)
 97 | 
 98 | 
 99 | class Test_convert_shell(test_fasta.Test_fasta):
100 | 
101 |     """
102 |     Tests the ability to convert a fasta db to a fastq file, parse it into
103 |     a fastq db, save to a fasta file, parse the fasta file into a fasta
104 |     db and then run the fasta suite, all from the command line.
105 |     """
106 | 
107 |     def setup_method(self):
108 | 
109 |         self._fqName = utils.get_temp_filename('fa_to_fq')
110 |         self._faName = utils.get_temp_filename('fq_to_fa')
111 |         self._testfa = utils.get_temp_filename('test.fa')
112 |         shutil.copy(utils.get_test_data('test.fa'), self._testfa)
113 | 
114 |         cmd = ['screed', 'db', self._testfa]
115 |         ret = subprocess.check_call(cmd, stdout=subprocess.PIPE)
116 |         assert ret == 0, ret
117 | 
118 |         cmd = ['screed', 'dump_fastq', self._testfa, self._fqName]
119 |         ret = subprocess.check_call(cmd, stdout=subprocess.PIPE)
120 |         assert ret == 0, ret
121 | 
122 |         cmd = ['screed', 'db', self._fqName]
123 |         ret = subprocess.check_call(cmd, stdout=subprocess.PIPE)
124 |         assert ret == 0, ret
125 | 
126 |         cmd = ['screed', 'dump_fasta', self._fqName, self._faName]
127 |         ret = subprocess.check_call(cmd, stdout=subprocess.PIPE)
128 |         assert ret == 0, ret
129 | 
130 |         cmd = ['screed', 'db', self._faName]
131 |         ret = subprocess.check_call(cmd, stdout=subprocess.PIPE)
132 |         assert ret == 0, ret
133 | 
134 |         self.db = screed.ScreedDB(self._faName)
135 | 
136 |     def teardown_method(self):
137 |         os.unlink(self._fqName)
138 |         os.unlink(self._fqName + fileExtension)
139 |         os.unlink(self._faName)
140 |         os.unlink(self._faName + fileExtension)
141 |         os.unlink(self._testfa + fileExtension)
142 | 


--------------------------------------------------------------------------------
/screed/tests/test_streaming.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2008-2015, Michigan State University
 2 | 
 3 | from __future__ import print_function
 4 | from __future__ import absolute_import
 5 | 
 6 | import tempfile
 7 | import os
 8 | import sys
 9 | import io
10 | import threading
11 | import subprocess
12 | 
13 | import pytest
14 | 
15 | import screed
16 | from . import screed_tst_utils as utils
17 | from . import test_fasta
18 | from . import test_fastq
19 | from screed.DBConstants import fileExtension
20 | 
21 | 
22 | def streamer_reader(ifilename, exception):
23 |     try:
24 |         for read in screed.open(ifilename):
25 |             pass
26 |     except Exception as e:
27 |         exception.append(e)
28 | 
29 | 
30 | def streamer(ifilename):
31 | 
32 |     # Get temp filenames, etc.
33 |     in_dir = tempfile.mkdtemp(prefix="screedtest_")
34 |     fifo = os.path.join(in_dir, 'fifo')
35 |     ifile = io.open(ifilename, 'rb')
36 | 
37 |     # make a fifo to simulate streaming
38 |     os.mkfifo(fifo)
39 | 
40 |     exception = []
41 |     # FIFOs MUST BE OPENED FOR READING BEFORE THEY ARE WRITTEN TO
42 |     # If this isn't done, they will BLOCK and things will hang.
43 |     # rvalues will hold the return from the threaded function
44 |     thread = threading.Thread(target=streamer_reader, args=[fifo, exception])
45 |     thread.start()
46 | 
47 |     fifofile = io.open(fifo, 'wb')
48 |     # read binary to handle compressed files
49 |     chunk = ifile.read(8192)
50 |     while len(chunk) > 0:
51 |         fifofile.write(chunk)
52 |         chunk = ifile.read(8192)
53 | 
54 |     fifofile.close()
55 | 
56 |     thread.join()
57 | 
58 |     if len(exception) > 0:
59 |         raise exception[0]
60 | 
61 | 
62 | def test_stream_fa():
63 |     streamer(utils.get_test_data('test.fa'))
64 | 
65 | 
66 | def test_stream_fq():
67 |     streamer(utils.get_test_data('test.fastq'))
68 | 
69 | 
70 | @pytest.mark.xfail()
71 | def test_stream_fa_gz():
72 |     streamer(utils.get_test_data('test.fa.gz'))
73 | 
74 | 
75 | def test_stream_gz_fail():
76 |     try:
77 |         streamer(utils.get_test_data('test.fastq.gz'))
78 |         assert 0, "This should not work yet"
79 |     except ValueError as err:
80 |         print(str(err))
81 | 
82 | 
83 | @pytest.mark.xfail()
84 | def test_stream_fq_gz():
85 |     streamer(utils.get_test_data('test.fastq.gz'))
86 | 
87 | 
88 | def test_stream_fa_bz2():
89 |     streamer(utils.get_test_data('test.fa.bz2'))
90 | 
91 | 
92 | def test_stream_fq_bz2():
93 |     streamer(utils.get_test_data('test.fastq.bz2'))
94 | 


--------------------------------------------------------------------------------
/screed/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2016, The Regents of the University of California.
 2 | 
 3 | 
 4 | def to_str(line):
 5 |     try:
 6 |         line = line.decode('utf-8')
 7 |     except AttributeError:
 8 |         pass
 9 | 
10 |     return line
11 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = screed
 3 | description = a Python library for loading FASTA and FASTQ sequences
 4 | long_description = file: README.md
 5 | long_description_content_type = text/markdown; charset=UTF-8
 6 | url = https://github.com/dib-lab/screed
 7 | author = Alex Nolley, C. Titus Brown
 8 | author_email = ctbrown@ucdavis.edu,
 9 | license = BSD 3-clause
10 | license_file = doc/LICENSE.rst
11 | classifiers =
12 |     Development Status :: 5 - Production/Stable
13 |     Environment :: Console
14 |     Environment :: MacOS X
15 |     Intended Audience :: Science/Research
16 |     License :: OSI Approved :: BSD License
17 |     Natural Language :: English
18 |     Operating System :: POSIX :: Linux
19 |     Operating System :: MacOS :: MacOS X
20 |     Programming Language :: Python :: 3.7
21 |     Programming Language :: Python :: 3.8
22 |     Programming Language :: Python :: 3.9
23 |     Topic :: Scientific/Engineering :: Bio-Informatics
24 | project_urls = 
25 |     Documentation = https://screed.readthedocs.io
26 |     Source = https://github.com/dib-lab/screed
27 |     Tracker = https://github.com/dib-lab/screed/issues
28 | 
29 | [options]
30 | zip_safe = False
31 | packages = find:
32 | platforms = any
33 | include_package_data = True
34 | python_requires = >=3.7
35 | setup_requires =
36 |     setuptools_scm
37 | 
38 | [bdist_wheel]
39 | universal = 1
40 | 
41 | [aliases]
42 | test=pytest
43 | 
44 | [options.entry_points]
45 | console_scripts =
46 |     screed = screed.__main__:main
47 | 
48 | [options.extras_require]
49 | test =
50 |     pytest >= 6.2.2
51 |     pycodestyle
52 |     pytest-cov
53 |     importlib_resources;python_version<'3.9'
54 | all =
55 |     %(test)s
56 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import setuptools
3 | 
4 | if __name__ == "__main__":
5 |     setuptools.setup()
6 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py38, py39, py310
 3 | minversion = 3.12
 4 | isolated_build = true
 5 | skip_missing_interpreters = true
 6 | 
 7 | [testenv]
 8 | passenv =
 9 |   CI
10 |   GITHUB_ACTION
11 |   GITHUB_REF
12 |   GITHUB_HEAD_REF
13 |   GITHUB_RUN_ID
14 |   GITHUB_SHA
15 |   GITHUB_REPOSITORY
16 | allowlist_externals = make
17 | commands =
18 |   make install-dependencies
19 |   pytest --cov -m 'not known_failing' --cov-report xml
20 |   make pep8
21 |   make doc
22 | deps =
23 |   pytest
24 |   pytest-cov
25 |   sphinx
26 | 
27 | [gh-actions]
28 | python =
29 |   3.8: py38
30 |   3.9: py39
31 |   3.10: py310
32 | 


--------------------------------------------------------------------------------