10 | # Reads: | {{mini_summary_d['n_reads']}} |
11 | {% if mini_summary_d['uniqmapped_pct'] == 'N/A' %}
12 | % of uniquely mapped reads: | N/A |
13 | % of multi-mapped reads: | N/A |
14 | % of unmapped reads: | N/A |
15 | % of filtered reads mapping to genome: | N/A |
16 | {% else %}
17 | % of uniquely mapped reads: | {{'%.2f%%' % mini_summary_d['uniqmapped_pct']}} |
18 | % of multi-mapped reads: | {{'%.2f%%' % mini_summary_d['multimapped_pct']}} |
19 | % of unmapped reads: | {{'%.2f%%' % mini_summary_d['unmapped_pct']}} |
20 | % of filtered reads mapping to genome: | {{'%.2f%%' % mini_summary_d['genomic_read_pct']}} |
21 | {% endif %}
22 | Sequencing saturation rate: | {{'%.2f%%' % mini_summary_d['seq_sat_rate']}} |
23 |   |
24 | # Cells: | {{'%d' % mini_summary_d['n_cells']}} |
25 | Median molecules per cell: | {{'%d' % mini_summary_d['med_molcs_per_cell']}} |
26 | Average reads per cell: | {{'%d' % mini_summary_d['avg_reads_per_cell']}} |
27 | Average reads per molecule: | {{'%.2f' % mini_summary_d['avg_reads_per_molc']}} |
28 | {% if 'mt_rna_fraction' in mini_summary_d %}
29 | % of cells filtered by high mt-RNA content: | {{'%.2f%%' % mini_summary_d['mt_rna_fraction']}} |
30 | {% endif %}
31 |
32 |
33 |
6 | {% for name, c in section.content.items() %}
7 |
8 |
{{name}}
9 |
10 | {% if c.keys is defined %}
11 |
12 | {% for k in c.keys %}
13 | {{k}}
14 | {% endfor %}
15 |
16 |
17 | {% for v in c.values %}
18 | {{v}}
19 | {% endfor %}
20 |
21 | {% elif c.text is defined %}
22 |
23 | {{c.text}}
24 |
25 | {% elif c.image is defined %}
26 |
27 |

28 |
29 |
30 | {{c.legend}}
31 |
32 | {% endif %}
33 |
34 | {% endfor %}
35 |
36 | {% endblock %}
--------------------------------------------------------------------------------
/src/seqc/summary/test.py:
--------------------------------------------------------------------------------
1 | import nose2
2 | import unittest
3 | from seqc.summary import summary
4 | from collections import OrderedDict
5 |
6 |
7 | class TestSummary(unittest.TestCase):
8 |
9 | def test_render_section(self):
10 | s1 = summary.Section.from_alignment_summary(
11 | '/var/folders/y3/ysxvl2w921d881nfpvx5ypvh0000gn/T/seqc/test_no_aws_in_drop_v2'
12 | '/alignment_summary.txt')
13 | s1.render('./src/seqc/summary/test_summary.html')
14 |
15 | if __name__ == "__main__":
16 | nose2.main()
17 |
--------------------------------------------------------------------------------
/src/seqc/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dpeerlab/seqc/d07836e430d56d2304e70bc042b483e8cbe22e00/src/seqc/tests/__init__.py
--------------------------------------------------------------------------------
/src/seqc/tests/test_args.py:
--------------------------------------------------------------------------------
1 | import nose2
2 | import unittest
3 |
4 | import seqc
5 | from seqc.core import main
6 |
7 |
8 | # class TestSEQC(unittest.TestCase):
9 | # def setUp(self):
10 | # pass
11 |
12 | # def tearDown(self):
13 | # pass
14 |
15 | # def test_args(self):
16 |
17 | # argv = ["start", "-k", "/Users/dchun/dpeerlab-chunj.pem", "-t", "t2.micro"]
18 |
19 | # self.assertRaises(ValueError, lambda: main.main(argv))
20 |
21 | # class MyUnitTest(unittest.TestCase):
22 | # def setUp(self):
23 | # pass
24 |
25 | # def tearDown(self):
26 | # pass
27 |
28 | # def test_args(self):
29 |
30 | # # argv = [
31 | # # "run", "ten_x_v2", "--local",
32 | # # "--index", "s3://seqc-public/genomes/hg38_chr19/",
33 | # # "--barcode-files", "s3://seqc-public/barcodes/ten_x_v2/flat/",
34 | # # "--genomic-fastq", "./test-data/genomic/",
35 | # # "--barcode-fastq", "./test-data/barcode/",
36 | # # "--output-prefix", "./test-data/seqc-results/",
37 | # # "--email", "jaeyoung.chun@gmail.com",
38 | # # "--star-args", "\"runRNGseed=0\""
39 | # # ]
40 |
41 | # argv = [
42 | # "run"
43 | # ]
44 |
45 | # try:
46 | # main.main(argv)
47 | # # self.assertRaises(BaseException, lambda: main.main(argv))
48 | # except:
49 | # pass
50 | # # self.assertRaises(ValueError, lambda: main.main(argv))
51 |
52 |
53 | # class TestSEQC(unittest.TestCase):
54 | # def setUp(self):
55 | # pass
56 |
57 | # def tearDown(self):
58 | # pass
59 |
60 | # def test_args(self):
61 |
62 | # from seqc.sequence import gtf
63 |
64 | # # remove any invalid ids from the annotation file
65 | # gr = gtf.Reader("./test-data/homo_sapiens.gtf.gz")
66 |
67 | # for line_fields in gr:
68 | # record = gtf.Record(line_fields)
69 | # print(record)
70 | # biotype = record.attribute("gene_biotype")
71 | # print(biotype)
72 |
73 | # # self.assertRaises(ValueError, lambda: main.main(argv))
74 |
75 |
76 | if __name__ == "__main__":
77 |
78 | unittest.main()
79 |
--------------------------------------------------------------------------------
/src/seqc/tests/test_dataset.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 |
3 | TestDataset = namedtuple(
4 | "datasets",
5 | ["barcode_fastq", "genomic_fastq", "merged_fastq", "bam", "index", "barcodes",],
6 | )
7 |
8 | dataset_s3 = TestDataset(
9 | barcode_fastq="s3://seqc-public/test/%s/barcode/", # platform
10 | genomic_fastq="s3://seqc-public/test/%s/genomic/", # platform
11 | merged_fastq="s3://seqc-public/test/%s/%s_merged.fastq.gz", # platform, platform
12 | bam="s3://seqc-public/test/%s/Aligned.out.bam", # platform
13 | index="s3://seqc-public/genomes/hg38_chr19/",
14 | barcodes="s3://seqc-public/barcodes/%s/flat/", # platform
15 | )
16 |
17 | dataset_local = TestDataset(
18 | barcode_fastq="test-data/datasets/%s/barcode/", # platform
19 | genomic_fastq="test-data/datasets/%s/genomic/", # platform
20 | merged_fastq=None,
21 | bam="test-data/datasets/%s/Aligned.out.bam", # platform
22 | index="test-data/datasets/genomes/hg38_chr19/",
23 | barcodes="test-data/datasets/barcodes/%s/flat/", # platform
24 | )
25 |
--------------------------------------------------------------------------------
/src/seqc/tests/test_run_e2e_local.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import os
3 | import uuid
4 | import shutil
5 | import subprocess
6 | import re
7 | from nose2.tools import params
8 | from seqc.core import main
9 | from test_dataset import dataset_local, dataset_s3
10 |
11 |
12 | def get_output_file_list(test_id, test_folder):
13 |
14 | proc = subprocess.Popen(
15 | ["find", test_folder, "-type", "f"],
16 | stdout=subprocess.PIPE,
17 | stderr=subprocess.PIPE,
18 | )
19 | stdout, _ = proc.communicate()
20 | files = stdout.decode().splitlines()
21 |
22 | # extract only filenames (i.e. remove directory hierarchy)
23 | # convert to a set for easy comparison
24 | files = set(map(lambda filename: filename.replace(test_folder + "/", ""), files))
25 |
26 | return files
27 |
28 |
29 | def expected_output_files(file_prefix):
30 |
31 | files = set(
32 | [
33 | f"{file_prefix}.h5",
34 | f"{file_prefix}_alignment_summary.txt",
35 | f"{file_prefix}_cell_filters.png",
36 | f"{file_prefix}_de_gene_list.txt",
37 | f"{file_prefix}_dense.csv",
38 | f"{file_prefix}_merged.fastq.gz",
39 | f"{file_prefix}_mini_summary.json",
40 | f"{file_prefix}_mini_summary.pdf",
41 | f"{file_prefix}_seqc_log.txt",
42 | f"{file_prefix}_sparse_counts_barcodes.csv",
43 | f"{file_prefix}_sparse_counts_genes.csv",
44 | f"{file_prefix}_sparse_molecule_counts.mtx",
45 | f"{file_prefix}_sparse_read_counts.mtx",
46 | f"{file_prefix}_summary.tar.gz",
47 | f"{file_prefix}_Aligned.out.bam",
48 | ]
49 | )
50 |
51 | return files
52 |
53 |
54 | class TestRunLocal(unittest.TestCase):
55 | @classmethod
56 | def setUp(cls):
57 | cls.test_id = str(uuid.uuid4())
58 | cls.path_temp = os.path.join(
59 | os.environ["TMPDIR"], "seqc-test", str(uuid.uuid4())
60 | )
61 | os.makedirs(cls.path_temp, exist_ok=True)
62 | with open("seqc_log.txt", "wt") as f:
63 | f.write("Dummy log.\n")
64 | f.write("nose2 captures input, so no log is produced.\n")
65 | f.write("This causes pipeline errors.\n")
66 |
67 | @classmethod
68 | def tearDown(self):
69 | if os.path.isdir(self.path_temp):
70 | shutil.rmtree(self.path_temp, ignore_errors=True)
71 |
72 | def test_using_dataset_in_s3(self, platform="ten_x_v2"):
73 | # must NOT end with a slash
74 | file_prefix = "test"
75 | output_prefix = os.path.join(self.path_temp, file_prefix)
76 |
77 | params = [
78 | ("run", platform),
79 | ("--local",),
80 | ("--output-prefix", output_prefix),
81 | ("--index", dataset_s3.index),
82 | ("--barcode-files", dataset_s3.barcodes % platform),
83 | ("--barcode-fastq", dataset_s3.barcode_fastq % platform),
84 | ("--genomic-fastq", dataset_s3.genomic_fastq % platform),
85 | ("--star-args", "runRNGseed=0"),
86 | ]
87 |
88 | argv = [element for tupl in params for element in tupl]
89 |
90 | if platform != "drop_seq":
91 | argv += ["--barcode-files", dataset_s3.barcodes % platform]
92 |
93 | main.main(argv)
94 |
95 | # get output file list
96 | files = get_output_file_list(self.test_id, self.path_temp)
97 |
98 | # check if each expected file is found in the list of files generated
99 | for file in expected_output_files(file_prefix):
100 | self.assertIn(file, files)
101 |
102 | def test_using_local_dataset(self, platform="ten_x_v2"):
103 | # must NOT end with a slash
104 | file_prefix = "test"
105 | output_prefix = os.path.join(self.path_temp, file_prefix)
106 |
107 | params = [
108 | ("run", platform),
109 | ("--local",),
110 | ("--output-prefix", output_prefix),
111 | ("--index", dataset_local.index),
112 | ("--barcode-files", dataset_local.barcodes % platform),
113 | ("--barcode-fastq", dataset_local.barcode_fastq % platform),
114 | ("--genomic-fastq", dataset_local.genomic_fastq % platform),
115 | ("--star-args", "runRNGseed=0"),
116 | ]
117 |
118 | argv = [element for tupl in params for element in tupl]
119 |
120 | if platform != "drop_seq":
121 | argv += ["--barcode-files", dataset_local.barcodes % platform]
122 |
123 | main.main(argv)
124 |
125 | # get output file list
126 | files = get_output_file_list(self.test_id, self.path_temp)
127 |
128 | # check if each expected file is found in the list of files generated
129 | for file in expected_output_files(file_prefix):
130 | self.assertIn(file, files)
131 |
--------------------------------------------------------------------------------
/src/seqc/tests/test_run_e2e_remote.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import os
3 | import uuid
4 | import shutil
5 | import re
6 | from seqc.core import main
7 | from seqc import io
8 | import boto3
9 | from nose2.tools import params
10 | from test_dataset import dataset_s3
11 |
12 |
13 | def get_instance_by_test_id(test_id):
14 |
15 | ec2 = boto3.resource("ec2")
16 | instances = ec2.instances.filter(
17 | Filters=[{"Name": "tag:TestID", "Values": [test_id]}]
18 | )
19 | instances = list(instances)
20 |
21 | if len(instances) != 1:
22 | raise Exception("Test ID is not found or not unique!")
23 |
24 | return instances[0]
25 |
26 |
27 | def expected_output_files(output_prefix):
28 |
29 | files = set(
30 | [
31 | f"{output_prefix}.h5",
32 | f"{output_prefix}_Aligned.out.bam",
33 | f"{output_prefix}_alignment_summary.txt",
34 | f"{output_prefix}_cell_filters.png",
35 | f"{output_prefix}_de_gene_list.txt",
36 | f"{output_prefix}_dense.csv",
37 | f"{output_prefix}_merged.fastq.gz",
38 | f"{output_prefix}_mini_summary.json",
39 | f"{output_prefix}_mini_summary.pdf",
40 | f"{output_prefix}_seqc_log.txt",
41 | f"{output_prefix}_sparse_counts_barcodes.csv",
42 | f"{output_prefix}_sparse_counts_genes.csv",
43 | f"{output_prefix}_sparse_molecule_counts.mtx",
44 | f"{output_prefix}_sparse_read_counts.mtx",
45 | f"{output_prefix}_summary.tar.gz",
46 | f"seqc_log.txt",
47 | ]
48 | )
49 |
50 | return files
51 |
52 |
53 | def expected_output_files_run_from_merged(output_prefix):
54 |
55 | files = expected_output_files(output_prefix)
56 |
57 | excludes = set([f"{output_prefix}_merged.fastq.gz"])
58 |
59 | return files - excludes
60 |
61 |
62 | def expected_output_files_run_from_bam(output_prefix):
63 |
64 | files = expected_output_files(output_prefix)
65 |
66 | excludes = set(
67 | [
68 | f"{output_prefix}_Aligned.out.bam",
69 | f"{output_prefix}_alignment_summary.txt",
70 | f"{output_prefix}_merged.fastq.gz",
71 | ]
72 | )
73 |
74 | return files - excludes
75 |
76 |
77 | def get_output_file_list(test_id, s3_bucket, test_folder):
78 |
79 | # get instance and wait until terminated
80 | instance = get_instance_by_test_id(test_id)
81 | instance.wait_until_terminated()
82 |
83 | # check files generated in S3
84 | files = io.S3.listdir(s3_bucket, test_folder)
85 |
86 | # extract only filenames (i.e. remove directory hierarchy)
87 | # convert to a set for easy comparison
88 | files = set(map(lambda filename: filename.replace(test_folder, ""), files))
89 |
90 | return files
91 |
92 |
93 | def check_for_success_msg(s3_seqc_log_uri, path_temp):
94 |
95 | # download seqc_log.txt
96 | io.S3.download(
97 | link=s3_seqc_log_uri, prefix=path_temp, overwrite=True, recursive=False
98 | )
99 |
100 | # check if seqc_log.txt has a successful message
101 | with open(os.path.join(path_temp, "seqc_log.txt"), "rt") as fin:
102 | logs = fin.read()
103 | match = re.search(r"Execution completed successfully", logs, re.MULTILINE)
104 |
105 | return True if match else False
106 |
107 |
108 | class TestRunRemote(unittest.TestCase):
109 |
110 | email = os.environ["SEQC_TEST_EMAIL"]
111 | rsa_key = os.environ["SEQC_TEST_RSA_KEY"]
112 | ami_id = os.environ["SEQC_TEST_AMI_ID"]
113 |
114 | s3_bucket = "dp-lab-cicd"
115 |
116 | @classmethod
117 | def setUp(cls):
118 | cls.test_id = str(uuid.uuid4())
119 | cls.path_temp = os.path.join(
120 | os.environ["TMPDIR"], "seqc-test", str(uuid.uuid4())
121 | )
122 | os.makedirs(cls.path_temp, exist_ok=True)
123 |
124 | @classmethod
125 | def tearDown(self):
126 | if os.path.isdir(self.path_temp):
127 | shutil.rmtree(self.path_temp, ignore_errors=True)
128 |
129 | @params("in_drop_v2", "ten_x_v2")
130 | def test_remote_from_raw_fastq(self, platform="ten_x_v2"):
131 | output_prefix = "from-raw-fastq"
132 | # must end with a slash
133 | test_folder = f"seqc/run-{platform}-{self.test_id}/"
134 |
135 | params = [
136 | ("run", platform),
137 | ("--output-prefix", "from-raw-fastq"),
138 | ("--upload-prefix", f"s3://{self.s3_bucket}/{test_folder}"),
139 | ("--index", dataset_s3.index),
140 | ("--email", self.email),
141 | ("--barcode-fastq", dataset_s3.barcode_fastq % platform),
142 | ("--genomic-fastq", dataset_s3.genomic_fastq % platform),
143 | ("--instance-type", "r5.2xlarge"),
144 | ("--spot-bid", "1.0"),
145 | ("--rsa-key", self.rsa_key),
146 | ("--debug",),
147 | ("--remote-update",),
148 | ("--ami-id", self.ami_id),
149 | ("--user-tags", f"TestID:{self.test_id}"),
150 | ]
151 |
152 | argv = [element for tupl in params for element in tupl]
153 |
154 | if platform != "drop_seq":
155 | argv += ["--barcode-files", dataset_s3.barcodes % platform]
156 |
157 | main.main(argv)
158 |
159 | # wait until terminated
160 | # get output file list
161 | files = get_output_file_list(self.test_id, self.s3_bucket, test_folder)
162 |
163 | # check for the exact same filenames
164 | self.assertSetEqual(files, expected_output_files(output_prefix))
165 |
166 | # check for success message in seqc_log.txt
167 | has_success_msg = check_for_success_msg(
168 | s3_seqc_log_uri="s3://{}/{}".format(
169 | self.s3_bucket, os.path.join(test_folder, "seqc_log.txt")
170 | ),
171 | path_temp=self.path_temp,
172 | )
173 |
174 | self.assertTrue(
175 | has_success_msg, msg="Unable to find the success message in the log"
176 | )
177 |
178 | def test_remote_from_merged(self, platform="in_drop_v2"):
179 | output_prefix = "from-merged"
180 | # must end with a slash
181 | test_folder = f"seqc/run-{platform}-{self.test_id}/"
182 |
183 | params = [
184 | ("run", platform),
185 | ("--output-prefix", output_prefix),
186 | ("--upload-prefix", f"s3://{self.s3_bucket}/{test_folder}"),
187 | ("--index", dataset_s3.index),
188 | ("--email", self.email),
189 | ("--merged-fastq", dataset_s3.merged_fastq % (platform, platform)),
190 | ("--rsa-key", self.rsa_key),
191 | ("--instance-type", "r5.2xlarge"),
192 | ("--ami-id", self.ami_id),
193 | ("--remote-update",),
194 | ("--user-tags", f"TestID:{self.test_id}")
195 | # ('--spot-bid', '1.0')
196 | ]
197 |
198 | argv = [element for tupl in params for element in tupl]
199 |
200 | if platform != "drop_seq":
201 | argv += ["--barcode-files", dataset_s3.barcodes % platform]
202 |
203 | main.main(argv)
204 |
205 | # wait until terminated
206 | # get output file list
207 | files = get_output_file_list(self.test_id, self.s3_bucket, test_folder)
208 |
209 | # check for the exact same filenames
210 | self.assertSetEqual(files, expected_output_files_run_from_merged(output_prefix))
211 |
212 | # check for success message in seqc_log.txt
213 | has_success_msg = check_for_success_msg(
214 | s3_seqc_log_uri="s3://{}/{}".format(
215 | self.s3_bucket, os.path.join(test_folder, "seqc_log.txt")
216 | ),
217 | path_temp=self.path_temp,
218 | )
219 |
220 | self.assertTrue(
221 | has_success_msg, msg="Unable to find the success message in the log"
222 | )
223 |
224 | def test_remote_from_bamfile(self, platform="in_drop_v2"):
225 | output_prefix = "from-bamfile"
226 | # must end with a slash
227 | test_folder = f"seqc/run-{platform}-{self.test_id}/"
228 |
229 | params = [
230 | ("run", platform),
231 | ("--output-prefix", output_prefix),
232 | ("--upload-prefix", f"s3://{self.s3_bucket}/{test_folder}"),
233 | ("--index", dataset_s3.index),
234 | ("--email", self.email),
235 | ("--alignment-file", dataset_s3.bam % platform),
236 | ("--rsa-key", self.rsa_key),
237 | ("--instance-type", "r5.2xlarge"),
238 | ("--debug",),
239 | ("--ami-id", self.ami_id),
240 | ("--remote-update",),
241 | ("--user-tags", f"TestID:{self.test_id}")
242 | # ('--spot-bid', '1.0')
243 | ]
244 |
245 | argv = [element for tupl in params for element in tupl]
246 |
247 | if platform != "drop_seq":
248 | argv += ["--barcode-files", dataset_s3.barcodes % platform]
249 |
250 | main.main(argv)
251 |
252 | # wait until terminated
253 | # get output file list
254 | files = get_output_file_list(self.test_id, self.s3_bucket, test_folder)
255 |
256 | # check for the exact same filenames
257 | self.assertSetEqual(files, expected_output_files_run_from_bam(output_prefix))
258 |
259 | # check for success message in seqc_log.txt
260 | has_success_msg = check_for_success_msg(
261 | s3_seqc_log_uri="s3://{}/{}".format(
262 | self.s3_bucket, os.path.join(test_folder, "seqc_log.txt")
263 | ),
264 | path_temp=self.path_temp,
265 | )
266 |
267 | self.assertTrue(
268 | has_success_msg, msg="Unable to find the success message in the log"
269 | )
270 |
--------------------------------------------------------------------------------
/src/seqc/tests/test_run_gtf.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase, mock
2 | import os
3 | import uuid
4 | import shutil
5 | import nose2
6 | from seqc.sequence import gtf
7 | from test_dataset import dataset_local
8 |
9 |
10 | class TestGtf(TestCase):
11 | @classmethod
12 | def setUp(cls):
13 | cls.test_id = str(uuid.uuid4())
14 | cls.path_temp = os.path.join(
15 | os.environ["TMPDIR"], "seqc-test", str(uuid.uuid4())
16 | )
17 | cls.annotation = os.path.join(dataset_local.index, "annotations.gtf")
18 |
19 | @classmethod
20 | def tearDown(self):
21 | if os.path.isdir(self.path_temp):
22 | shutil.rmtree(self.path_temp, ignore_errors=True)
23 |
24 | def test_construct_translator(self):
25 | translator = gtf.GeneIntervals(self.annotation)
26 | self.assertIsNotNone(translator)
27 |
28 | def test_num_of_transcripts(self):
29 | rd = gtf.Reader(self.annotation)
30 | num_transcripts = sum(1 for _ in rd.iter_transcripts())
31 | # awk -F'\t' '$3=="transcript" { print $0 }' annotations.gtf | wc -l
32 | self.assertEqual(num_transcripts, 12747)
33 |
34 | def test_iter_transcripts(self):
35 | rd = gtf.Reader(self.annotation)
36 | (transcript_chromosome, transcript_strand, transcript_gene_id), exons = next(
37 | rd.iter_transcripts()
38 | )
39 |
40 | # this should give us 3 exons of the first transcript of the first gene found in inverse order:
41 | #
42 | # chr19 HAVANA gene 60951 71626 . - . gene_id "ENSG00000282458.1"; gene_type "transcribed_processed_pseudogene"; gene_status "KNOWN"; gene_name "WASH5P"; level 2; havana_gene "OTTHUMG00000180466.8";
43 | # chr19 HAVANA transcript 60951 70976 . - . gene_id "ENSG00000282458.1"; transcript_id "ENST00000632506.1"; gene_type "transcribed_processed_pseudogene"; gene_status "KNOWN"; gene_name "WASH5P"; transcript_type "processed_transcript"; transcript_status "KNOWN"; transcript_name "WASH5P-008"; level 2; tag "basic"; transcript_support_level "1"; havana_gene "OTTHUMG00000180466.8"; havana_transcript "OTTHUMT00000471217.2";
44 | # chr19 HAVANA exon 70928 70976 . - . gene_id "ENSG00000282458.1"; transcript_id "ENST00000632506.1"; gene_type "transcribed_processed_pseudogene"; gene_status "KNOWN"; gene_name "WASH5P"; transcript_type "processed_transcript"; transcript_status "KNOWN"; transcript_name "WASH5P-008"; exon_number 1; exon_id "ENSE00003781173.1"; level 2; tag "basic"; transcript_support_level "1"; havana_gene "OTTHUMG00000180466.8"; havana_transcript "OTTHUMT00000471217.2";
45 | # chr19 HAVANA exon 66346 66499 . - . gene_id "ENSG00000282458.1"; transcript_id "ENST00000632506.1"; gene_type "transcribed_processed_pseudogene"; gene_status "KNOWN"; gene_name "WASH5P"; transcript_type "processed_transcript"; transcript_status "KNOWN"; transcript_name "WASH5P-008"; exon_number 2; exon_id "ENSE00003783498.1"; level 2; tag "basic"; transcript_support_level "1"; havana_gene "OTTHUMG00000180466.8"; havana_transcript "OTTHUMT00000471217.2";
46 | # chr19 HAVANA exon 60951 61894 . - . gene_id "ENSG00000282458.1"; transcript_id "ENST00000632506.1"; gene_type "transcribed_processed_pseudogene"; gene_status "KNOWN"; gene_name "WASH5P"; transcript_type "processed_transcript"; transcript_status "KNOWN"; transcript_name "WASH5P-008"; exon_number 3; exon_id "ENSE00003783010.1"; level 2; tag "basic"; transcript_support_level "1"; havana_gene "OTTHUMG00000180466.8"; havana_transcript "OTTHUMT00000471217.2";
47 |
48 | self.assertEqual(transcript_chromosome, "chr19")
49 | self.assertEqual(transcript_strand, "-")
50 | self.assertEqual(transcript_gene_id, 282458)
51 | self.assertEqual(len(exons), 3)
52 |
53 | # 8th column has exon ID
54 | self.assertIn("ENSE00003783010.1", exons[0][8]) # exon number 3
55 | self.assertIn("ENSE00003783498.1", exons[1][8]) # exon number 2
56 | self.assertIn("ENSE00003781173.1", exons[2][8]) # exon number 1
57 |
58 | def test_translate(self):
59 | translator = gtf.GeneIntervals(self.annotation)
60 | # chr19 HAVANA gene 60951 71626 . - . gene_id "ENSG00000282458.1"; gene_type "transcribed_processed_pseudogene"; gene_status "KNOWN"; gene_name "WASH5P"; level 2; havana_gene "OTTHUMG00000180466.8";
61 | gene_id = translator.translate("chr19", "-", 60951)
62 | self.assertEqual(gene_id, 282458)
63 |
64 |
65 | if __name__ == "__main__":
66 | nose2.main()
67 |
--------------------------------------------------------------------------------
/src/seqc/tests/test_run_readarray.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase, mock
2 | import os
3 | import uuid
4 | import shutil
5 | import nose2
6 | from test_dataset import dataset_local
7 | from seqc.sequence.encodings import DNA3Bit
8 | from seqc.read_array import ReadArray
9 | from seqc.sequence import gtf
10 |
11 |
12 | class TestReadArray(TestCase):
13 | @classmethod
14 | def setUp(cls):
15 | cls.test_id = str(uuid.uuid4())
16 | cls.path_temp = os.path.join(
17 | os.environ["TMPDIR"], "seqc-test", str(uuid.uuid4())
18 | )
19 | cls.annotation = os.path.join(dataset_local.index, "annotations.gtf")
20 | cls.translator = gtf.GeneIntervals(cls.annotation, 10000)
21 |
22 | @classmethod
23 | def tearDown(self):
24 | if os.path.isdir(self.path_temp):
25 | shutil.rmtree(self.path_temp, ignore_errors=True)
26 |
27 | def test_read_array_creation(self, platform="ten_x_v2"):
28 | ra, _ = ReadArray.from_alignment_file(
29 | dataset_local.bam % platform, self.translator, required_poly_t=0
30 | )
31 | self.assertIsNotNone(ra)
32 |
33 | def test_read_array_rmt_decode_10x_v2(self):
34 | platform = "ten_x_v2"
35 |
36 | # create a readarray
37 | ra, _ = ReadArray.from_alignment_file(
38 | dataset_local.bam % platform, self.translator, required_poly_t=0
39 | )
40 |
41 | # see if we can decode numeric UMI back to nucleotide sequence
42 | dna3bit = DNA3Bit()
43 | for rmt in ra.data["rmt"]:
44 | decoded = dna3bit.decode(rmt).decode()
45 | # ten_x_v2 UMI length = 10 nt
46 | self.assertEqual(len(decoded), 10)
47 |
48 | def test_read_array_rmt_decode_10x_v3(self):
49 | platform = "ten_x_v3"
50 |
51 | # create a readarray
52 | ra, _ = ReadArray.from_alignment_file(
53 | dataset_local.bam % platform, self.translator, required_poly_t=0
54 | )
55 |
56 | # see if we can decode numeric UMI back to nucleotide sequence
57 | dna3bit = DNA3Bit()
58 | for rmt in ra.data["rmt"]:
59 | decoded = dna3bit.decode(rmt).decode()
60 | # ten_x_v3 UMI length = 12 nt
61 | self.assertEqual(len(decoded), 12)
62 |
63 |
64 | if __name__ == "__main__":
65 | nose2.main()
66 |
--------------------------------------------------------------------------------
/src/seqc/tests/test_run_rmt_correction.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase, mock
2 | import nose2
3 | import os
4 | import numpy as np
5 | from seqc.read_array import ReadArray
6 | from seqc import rmt_correction
7 |
8 |
9 | class TestRmtCorrection(TestCase):
10 | @classmethod
11 | def setUp(self):
12 | # pre-allocate arrays
13 | n_barcodes = 183416337
14 | data = np.recarray((n_barcodes,), ReadArray._dtype)
15 | genes = np.zeros(n_barcodes, dtype=np.int32)
16 | positions = np.zeros(n_barcodes, dtype=np.int32)
17 | self.ra = ReadArray(data, genes, positions)
18 |
19 | @classmethod
20 | def tearDown(self):
21 | pass
22 |
23 | def test_should_return_correct_ra_size(self):
24 |
25 | ra_size = self.ra.data.nbytes + self.ra.genes.nbytes + self.ra.positions.nbytes
26 |
27 | self.assertEqual(4768824762, ra_size)
28 |
29 | # 64GB
30 | @mock.patch(
31 | "seqc.rmt_correction._get_available_memory", return_value=50 * 1024 ** 3
32 | )
33 | def test_should_return_correct_max_workers(self, mock_mem):
34 |
35 | n_workers = rmt_correction._calc_max_workers(self.ra)
36 |
37 | self.assertEqual(n_workers, 5)
38 |
39 | # 1TB
40 | @mock.patch("seqc.rmt_correction._get_available_memory", return_value=1079354630144)
41 | def test_should_return_correct_max_workers2(self, mock_mem):
42 |
43 | n_workers = rmt_correction._calc_max_workers(self.ra)
44 |
45 | self.assertEqual(n_workers, 119)
46 |
47 | # having less memory than ra size
48 | @mock.patch("seqc.rmt_correction._get_available_memory")
49 | def test_should_return_one_if_ra_larger_than_mem(self, mock_mem):
50 |
51 | ra_size = self.ra.data.nbytes + self.ra.genes.nbytes + self.ra.positions.nbytes
52 |
53 | # assume the available memory is a half of ra
54 | mock_mem.return_value = int(ra_size) / 2
55 |
56 | n_workers = rmt_correction._calc_max_workers(self.ra)
57 |
58 | self.assertEqual(n_workers, 1)
59 |
60 |
61 | class TestRmtCorrection2(TestCase):
62 | @classmethod
63 | def setUp(self):
64 | # pre-allocate arrays
65 | n_barcodes = 183416337
66 | data = np.recarray((n_barcodes,), ReadArray._dtype)
67 | genes = np.zeros(n_barcodes, dtype=np.int32)
68 | positions = np.zeros(n_barcodes, dtype=np.int32)
69 | self.ra = ReadArray(data, genes, positions)
70 |
71 | import pickle
72 |
73 | with open("pre-correction-ra.pickle", "wb") as fout:
74 | pickle.dump(self.ra, fout)
75 |
76 | @classmethod
77 | def tearDown(self):
78 | import os
79 |
80 | try:
81 | os.remove("pre-correction-ra.pickle")
82 | except:
83 | pass
84 |
85 | @mock.patch("seqc.rmt_correction._correct_errors_by_cell_group", return_value=0)
86 | def test_correct_errors_by_chunks(self, mock_correct):
87 | cell_group = [1, 2, 3]
88 | x = rmt_correction._correct_errors_by_cell_group_chunks(
89 | self.ra, cell_group, 0.02, 0.05
90 | )
91 | mock_correct.assert_called()
92 | self.assertEquals(len(cell_group), mock_correct.call_count)
93 | self.assertEquals([0, 0, 0], x)
94 |
95 |
96 | if __name__ == "__main__":
97 | nose2.main()
98 |
--------------------------------------------------------------------------------
/src/seqc/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.2.11"
2 |
--------------------------------------------------------------------------------