├── tests ├── __init__.py ├── fixtures │ └── alternative.ped ├── test_parser.py ├── test_standard_trio_missing_father.py ├── test_standard_trio_missing_column_son.py ├── test_standard_trio_wrong_gender_parents.py ├── test_standard_trio_extra_column_son.py ├── test_standard_trio.py ├── test_standard_trio_extra_daughter.py ├── test_individual.py └── test_family.py ├── MANIFEST.in ├── .gitignore ├── examples ├── my_family.ped ├── madeline_test.txt ├── multi_family.ped └── one_ind.txt ├── .travis.yml ├── setup.cfg ├── ped_parser ├── __init__.py ├── log.py ├── exceptions.py ├── individual.py ├── family.py └── parser.py ├── LICENSE.txt ├── setup.py ├── scripts └── ped_parser └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include examples/* -------------------------------------------------------------------------------- /tests/fixtures/alternative.ped: -------------------------------------------------------------------------------- 1 | #FamilyID SampleID Father Mother Sex Phenotype Capture_kit 2 | family_id sample_id 0 0 1 2 Agilent_SureSelect.V5 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /data 2 | /res 3 | /doc 4 | /dist 5 | /build 6 | /pickles 7 | *.pyc 8 | .DS_Store 9 | *.egg-info 10 | *.tmproj 11 | *.ipynb 12 | *.tmp 13 | htmlcov/ 14 | .coverage 15 | -------------------------------------------------------------------------------- /examples/my_family.ped: -------------------------------------------------------------------------------- 1 | #FamilyID SampleID Father Mother Sex Phenotype 2 | 1 proband father mother 1 2 3 | 1 mother 0 0 2 1 4 | 1 father 0 0 1 1 5 | 1 daughter father mother 1 2 6 | -------------------------------------------------------------------------------- /examples/madeline_test.txt: -------------------------------------------------------------------------------- 1 | #FamilyID SampleID Father Mother Sex Phenotype Proband Consultand Alive 2 | 1 proband father mother 1 2 Yes Yes Yes 3 | 1 mother 0 0 2 1 No Yes Yes 4 | 1 father 0 0 1 1 No Yes No 5 | 1 daughter father mother 1 2 No No Yes 6 | -------------------------------------------------------------------------------- /examples/multi_family.ped: -------------------------------------------------------------------------------- 1 | #FamilyID SampleID Father Mother Sex Phenotype 2 | 1 proband father mother 1 2 3 | 1 mother 0 0 2 1 4 | 1 father 0 0 1 1 5 | 1 daughter father mother 1 2 6 | 2 proband_2 father_2 mother_2 1 2 7 | 2 mother_2 0 0 2 1 8 | 2 father_2 0 0 1 1 9 | -------------------------------------------------------------------------------- /examples/one_ind.txt: -------------------------------------------------------------------------------- 1 | #FamilyID SampleID Father Mother Sex Phenotype CMMSID Tissue_origin Isolation_kit Isolation_date Isolation_personnel Medical_doctor Inheritance_model Phenotype_terms CMMS_seqID SciLifeID Capture_kit 2 | 109 109-1-1A 0 0 1 2 11-8280 Fibroblasts Qiagen midi 110621 AMS Naess AR Na Klinisk IEM Agilent_SureSelect.V5 -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # config file for automatic testing at travis-ci.org 2 | language: python 3 | 4 | python: 5 | - "2.7" 6 | 7 | install: 8 | # install test dependencies and package 9 | - pip install pytest 10 | - pip install . 11 | 12 | script: 13 | - py.test 14 | 15 | notifications: 16 | email: no 17 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | # for a pure Python package that supports Python 2 and 3 2 | [wheel] 3 | universal=1 4 | 5 | # make pypi render markdown files 6 | [metadata] 7 | description-file = README.md 8 | 9 | [pytest] 10 | flakes-ignore = 11 | __init__.py UnusedImport 12 | _compat.py UnusedImport 13 | tests/* ALL 14 | build/* ALL 15 | -------------------------------------------------------------------------------- /ped_parser/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function 3 | 4 | import logging 5 | 6 | logger = logging.getLogger(__name__) 7 | __version__ = '1.6.3' 8 | 9 | from ped_parser.individual import Individual 10 | from ped_parser.family import Family 11 | from ped_parser.parser import FamilyParser 12 | from ped_parser.log import init_log 13 | 14 | -------------------------------------------------------------------------------- /tests/test_parser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import codecs 3 | 4 | from ped_parser import parser 5 | 6 | 7 | def test_alternative_parser(): 8 | """Test parsing a ped file with alternative formatting.""" 9 | # test default 10 | with codecs.open('tests/fixtures/alternative.ped', 'r') as handle: 11 | family_parser = parser.FamilyParser(handle, family_type='alt') 12 | 13 | # we've only loaded one family 14 | ped = family_parser.families.values()[0] 15 | 16 | assert ped.family_id == 'family_id' 17 | assert len(ped.individuals) == 1 18 | 19 | sample = ped.individuals.values()[0] 20 | assert sample.extra_info['Capture_kit'] == 'Agilent_SureSelect.V5' 21 | 22 | # TODO: test with optional CMMS check 23 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014 Moonso Inc. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | import os 3 | from setuptools import setup, find_packages 4 | import sys 5 | 6 | # Shortcut for building/publishing to Pypi 7 | if sys.argv[-1] == 'publish': 8 | os.system('python setup.py sdist bdist_wheel upload') 9 | sys.exit() 10 | 11 | 12 | setup( 13 | name="ped_parser", 14 | version="1.6.5", 15 | description="A ped file parser.", 16 | author="Mans Magnusson", 17 | author_email="mans.magnusson@scilifelab.se", 18 | url='https://github.com/moonso/ped_parser', 19 | license='MIT License', 20 | install_requires=[ 21 | 'pytest', 22 | 'click' 23 | ], 24 | packages=[ 25 | 'ped_parser' 26 | ], 27 | scripts=[ 28 | 'scripts/ped_parser' 29 | ], 30 | classifiers = [ 31 | "Programming Language :: Python", 32 | "Programming Language :: Python :: 3", 33 | "Development Status :: 4 - Beta", 34 | "License :: OSI Approved :: BSD License", 35 | "Operating System :: MacOS :: MacOS X", 36 | "Intended Audience :: Science/Research", 37 | ], 38 | long_description = "Parse pedigree files in different formats", 39 | ) -------------------------------------------------------------------------------- /tests/test_standard_trio_missing_father.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | """ 4 | test_standard_trio_missing_father.py 5 | 6 | Test the family parser with a missing father. 7 | 8 | FAM file looks like: 9 | 10 | #Standard trio 11 | #FamilyID SampleID Father Mother Sex Phenotype 12 | healthyParentsAffectedSon proband father mother 1 2 13 | healthyParentsAffectedSon mother 0 0 2 1 14 | 15 | Should raise exception since father is not in family. 16 | 17 | Created by Måns Magnusson on 2014-05-08. 18 | Copyright (c) 2014 __MyCompanyName__. All rights reserved. 19 | """ 20 | 21 | import sys 22 | import os 23 | from tempfile import NamedTemporaryFile 24 | import pytest 25 | from ped_parser import parser 26 | from ped_parser.exceptions import PedigreeError 27 | 28 | 29 | class TestTrio(object): 30 | """Test class for testing how the individual class behave""" 31 | 32 | def setup_class(self): 33 | """Setup a standard trio with extra column in the 'proband' row.""" 34 | trio_lines = ['#Standard trio\n', 35 | '#FamilyID\tSampleID\tFather\tMother\tSex\tPhenotype\n', 36 | 'healthyParentsAffectedSon\tproband\tfather\tmother\t1\t2\n', 37 | 'healthyParentsAffectedSon\tmother\t0\t0\t2\t1\n', 38 | ] 39 | self.trio_file = NamedTemporaryFile(mode='w+t', delete=False, suffix='.vcf') 40 | self.trio_file.writelines(trio_lines) 41 | self.trio_file.seek(0) 42 | self.trio_file.close() 43 | 44 | 45 | def test_standard_trio_missing_father(self): 46 | """Test if the file is parsed in a correct way.""" 47 | with pytest.raises(PedigreeError): 48 | family_parser = parser.FamilyParser(open(self.trio_file.name, 'r')) 49 | 50 | 51 | def main(): 52 | pass 53 | 54 | 55 | if __name__ == '__main__': 56 | main() 57 | 58 | -------------------------------------------------------------------------------- /ped_parser/log.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import logging 4 | 5 | def init_log(logger, filename=None, loglevel=None): 6 | """ 7 | Initializes the log file in the proper format. 8 | 9 | Arguments: 10 | 11 | filename (str): Path to a file. Or None if logging is to 12 | be disabled. 13 | loglevel (str): Determines the level of the log output. 14 | """ 15 | 16 | formatter = logging.Formatter( 17 | '[%(asctime)s] %(levelname)s: %(name)s: %(message)s' 18 | ) 19 | 20 | if loglevel: 21 | logger.setLevel(getattr(logging, loglevel)) 22 | 23 | # We will allways print warnings and higher to stderr 24 | ch = logging.StreamHandler() 25 | ch.setLevel('WARNING') 26 | ch.setFormatter(formatter) 27 | 28 | if filename: 29 | fi = logging.FileHandler(filename, encoding='utf-8') 30 | if loglevel: 31 | fi.setLevel(getattr(logging, loglevel)) 32 | fi.setFormatter(formatter) 33 | logger.addHandler(fi) 34 | # If no logfile is provided we print all log messages that the user has 35 | # defined to stderr 36 | else: 37 | if loglevel: 38 | ch.setLevel(getattr(logging, loglevel)) 39 | 40 | logger.addHandler(ch) 41 | 42 | 43 | def get_log_stream(logger): 44 | """ 45 | Returns a log stream. 46 | If there is a file handler this stream will be used. 47 | If there is no logfile return the stderr log stream 48 | 49 | Returns: 50 | A stream to the root log file or stderr stream. 51 | """ 52 | 53 | file_stream = None 54 | log_stream = None 55 | for handler in logger.handlers: 56 | if isinstance(handler, logging.FileHandler): 57 | file_stream = handler.stream 58 | else: 59 | log_stream = handler.stream 60 | 61 | if file_stream: 62 | return file_stream 63 | 64 | return log_stream -------------------------------------------------------------------------------- /tests/test_standard_trio_missing_column_son.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | """ 4 | test_standard_trio_missing_col_for_son.py 5 | 6 | Test the family parser when a column is missing. 7 | 8 | #Standard trio 9 | #FamilyID SampleID Father Mother Sex Phenotype 10 | healthyParentsAffectedSon proband father mother 1 11 | healthyParentsAffectedSon mother 0 0 2 1 12 | healthyParentsAffectedSon father 0 0 1 1 13 | 14 | Should raise exception since column is missing. 15 | 16 | Created by Måns Magnusson on 2014-05-08. 17 | Copyright (c) 2014 __MyCompanyName__. All rights reserved. 18 | """ 19 | 20 | import sys 21 | import os 22 | from tempfile import NamedTemporaryFile 23 | import pytest 24 | from ped_parser import parser 25 | from ped_parser.exceptions import WrongLineFormat 26 | 27 | 28 | class TestTrio(object): 29 | """Test class for testing how the individual class behave""" 30 | 31 | def setup_class(self): 32 | """Setup a standard trio with extra column in the 'proband' row.""" 33 | trio_lines = ['#Standard trio\n', 34 | '#FamilyID\tSampleID\tFather\tMother\tSex\tPhenotype\n', 35 | 'healthyParentsAffectedSon\tproband\tfather\tmother\t1\n', 36 | 'healthyParentsAffectedSon\tmother\t0\t0\t2\t1\n', 37 | 'healthyParentsAffectedSon\tfather\t0\t0\t1\t1\n' 38 | ] 39 | self.trio_file = NamedTemporaryFile(mode='w+t', delete=False, suffix='.vcf') 40 | self.trio_file.writelines(trio_lines) 41 | self.trio_file.seek(0) 42 | self.trio_file.close() 43 | 44 | 45 | def test_standard_trio_proband_missing_column(self): 46 | """Test if the file is parsed in a correct way.""" 47 | with pytest.raises(WrongLineFormat): 48 | family_parser = parser.FamilyParser(open(self.trio_file.name, 'r')) 49 | 50 | 51 | def main(): 52 | pass 53 | 54 | 55 | if __name__ == '__main__': 56 | main() 57 | 58 | -------------------------------------------------------------------------------- /tests/test_standard_trio_wrong_gender_parents.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | """ 4 | test_standard_trio_wrong_gender_parents.py 5 | 6 | Test the family parser when a column is missing. 7 | 8 | #Standard trio 9 | #FamilyID SampleID Father Mother Sex Phenotype 10 | healthyParentsAffectedSon proband father mother 1 2 11 | healthyParentsAffectedSon mother 0 0 1 1 12 | healthyParentsAffectedSon father 0 0 2 1 13 | 14 | Should raise exception since parents have the wrong genders. 15 | 16 | Created by Måns Magnusson on 2014-05-08. 17 | Copyright (c) 2014 __MyCompanyName__. All rights reserved. 18 | """ 19 | 20 | import sys 21 | import os 22 | from tempfile import NamedTemporaryFile 23 | import pytest 24 | from ped_parser import parser 25 | from ped_parser.exceptions import PedigreeError 26 | 27 | class TestTrio(object): 28 | """Test class for testing how the individual class behave""" 29 | 30 | def setup_class(self): 31 | """Setup a standard trio with extra column in the 'proband' row.""" 32 | trio_lines = ['#Standard trio\n', 33 | '#FamilyID\tSampleID\tFather\tMother\tSex\tPhenotype\n', 34 | 'healthyParentsAffectedSon\tproband\tfather\tmother\t1\t2\n', 35 | 'healthyParentsAffectedSon\tmother\t0\t0\t1\t1\n', 36 | 'healthyParentsAffectedSon\tfather\t0\t0\t2\t1\n' 37 | ] 38 | self.trio_file = NamedTemporaryFile(mode='w+t', delete=False, suffix='.vcf') 39 | self.trio_file.writelines(trio_lines) 40 | self.trio_file.seek(0) 41 | self.trio_file.close() 42 | 43 | 44 | def test_standard_trio_proband_missing_column(self): 45 | """Test if the file is parsed in a correct way.""" 46 | with pytest.raises(PedigreeError): 47 | family_parser = parser.FamilyParser(open(self.trio_file.name, 'r')) 48 | 49 | 50 | def main(): 51 | pass 52 | 53 | 54 | if __name__ == '__main__': 55 | main() 56 | 57 | -------------------------------------------------------------------------------- /tests/test_standard_trio_extra_column_son.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | """ 4 | test_standard_trio_extra_col_for_son.py 5 | 6 | Test the family parser when an extra column is missing. 7 | 8 | FAM file looks like: 9 | 10 | #Standard trio 11 | #FamilyID SampleID Father Mother Sex Phenotype 12 | healthyParentsAffectedSon proband father mother 0 1 2 13 | healthyParentsAffectedSon mother 0 0 2 1 14 | healthyParentsAffectedSon father 0 0 1 1 15 | 16 | Should raise exception since there is an extra columnn in the proband line. 17 | 18 | Created by Måns Magnusson on 2014-05-08. 19 | Copyright (c) 2014 __MyCompanyName__. All rights reserved. 20 | """ 21 | 22 | import sys 23 | import os 24 | from tempfile import NamedTemporaryFile 25 | import pytest 26 | from ped_parser import parser 27 | from ped_parser.exceptions import WrongLineFormat 28 | 29 | 30 | class TestTrio(object): 31 | """Test class for testing how the individual class behave""" 32 | 33 | def setup_class(self): 34 | """Setup a standard trio with extra column in the 'proband' row.""" 35 | trio_lines = ['#Standard trio\n', 36 | '#FamilyID\tSampleID\tFather\tMother\tSex\tPhenotype\n', 37 | 'healthyParentsAffectedSon\tproband\tfather\tmother\t0\t1\t2\n', 38 | 'healthyParentsAffectedSon\tmother\t0\t0\t2\t1\n', 39 | 'healthyParentsAffectedSon\tfather\t0\t0\t1\t1\n' 40 | ] 41 | self.trio_file = NamedTemporaryFile(mode='w+t', delete=False, suffix='.vcf') 42 | self.trio_file.writelines(trio_lines) 43 | self.trio_file.seek(0) 44 | self.trio_file.close() 45 | 46 | 47 | def test_standard_trio_proband_extra_column(self): 48 | """Test if the file is parsed in a correct way.""" 49 | with pytest.raises(WrongLineFormat): 50 | family_parser = parser.FamilyParser(open(self.trio_file.name)) 51 | 52 | 53 | def main(): 54 | pass 55 | 56 | 57 | if __name__ == '__main__': 58 | main() 59 | 60 | -------------------------------------------------------------------------------- /tests/test_standard_trio.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | """ 4 | test_standard_trio.py 5 | 6 | Test the family parser when everything is correct. 7 | 8 | #Standard trio 9 | #FamilyID SampleID Father Mother Sex Phenotype 10 | healthyParentsAffectedSon proband father mother 1 2 11 | healthyParentsAffectedSon mother 0 0 2 1 12 | healthyParentsAffectedSon father 0 0 1 1 13 | 14 | Should run through smoothely... 15 | 16 | Created by Måns Magnusson on 2014-05-08. 17 | Copyright (c) 2014 __MyCompanyName__. All rights reserved. 18 | """ 19 | 20 | import sys 21 | import os 22 | from tempfile import NamedTemporaryFile 23 | from ped_parser import parser 24 | 25 | 26 | class TestTrio(object): 27 | """Test class for testing how the individual class behave""" 28 | 29 | def setup_class(self): 30 | """Setup a standard trio.""" 31 | trio_lines = ['#Standard trio\n', 32 | '#FamilyID\tSampleID\tFather\tMother\tSex\tPhenotype\n', 33 | 'healthyParentsAffectedSon\tproband\tfather\tmother\t1\t2\n', 34 | 'healthyParentsAffectedSon\tmother\t0\t0\t2\t1\n', 35 | 'healthyParentsAffectedSon\tfather\t0\t0\t1\t1\n' 36 | ] 37 | self.trio_file = NamedTemporaryFile(mode='w+t', delete=False, suffix='.vcf') 38 | self.trio_file.writelines(trio_lines) 39 | self.trio_file.seek(0) 40 | self.trio_file.close() 41 | 42 | 43 | def test_standard_trio(self): 44 | """Test if the file is parsed in a correct way.""" 45 | family_parser = parser.FamilyParser(open(self.trio_file.name, 'r')) 46 | assert family_parser.header == [ 47 | 'family_id', 48 | 'sample_id', 49 | 'father_id', 50 | 'mother_id', 51 | 'sex', 52 | 'phenotype' 53 | ] 54 | assert 'healthyParentsAffectedSon' in family_parser.families 55 | assert set(['proband', 'mother', 'father']) == set(family_parser.families['healthyParentsAffectedSon'].individuals.keys()) 56 | assert set(['proband', 'mother', 'father']) == set(family_parser.families['healthyParentsAffectedSon'].trios[0]) 57 | 58 | 59 | def main(): 60 | pass 61 | 62 | 63 | if __name__ == '__main__': 64 | main() 65 | 66 | -------------------------------------------------------------------------------- /tests/test_standard_trio_extra_daughter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | """ 4 | test_standard_trio extra daughter.py 5 | 6 | Test the family parser when everything is correct. 7 | 8 | #Standard trio 9 | #FamilyID SampleID Father Mother Sex Phenotype 10 | healthyParentsAffectedSon proband father mother 1 2 11 | healthyParentsAffectedSon mother 0 0 2 1 12 | healthyParentsAffectedSon father 0 0 1 1 13 | healthyParentsAffectedSon daughter father mother 2 1 14 | 15 | Should run through smoothely... 16 | 17 | Created by Måns Magnusson on 2014-05-08. 18 | Copyright (c) 2014 __MyCompanyName__. All rights reserved. 19 | """ 20 | 21 | import sys 22 | import os 23 | from tempfile import NamedTemporaryFile 24 | from ped_parser import FamilyParser 25 | 26 | 27 | class TestTrio(object): 28 | """Test class for testing how the individual class behave""" 29 | 30 | def setup_class(self): 31 | """Setup a standard trio.""" 32 | trio_lines = ['#Standard trio\n', 33 | '#FamilyID\tSampleID\tFather\tMother\tSex\tPhenotype\n', 34 | 'healthyParentsAffectedSon\tproband\tfather\tmother\t1\t2\n', 35 | 'healthyParentsAffectedSon\tmother\t0\t0\t2\t1\n', 36 | 'healthyParentsAffectedSon\tfather\t0\t0\t1\t1\n', 37 | 'healthyParentsAffectedSon\tdaughter\tfather\tmother\t2\t1\n', 38 | ] 39 | self.trio_file = NamedTemporaryFile(mode='w+t', delete=False, suffix='.vcf') 40 | self.trio_file.writelines(trio_lines) 41 | self.trio_file.seek(0) 42 | self.trio_file.close() 43 | 44 | def test_standard_trio_extra_daughter(self): 45 | """Test if the file is parsed in a correct way.""" 46 | family_parser = FamilyParser(open(self.trio_file.name, 'r')) 47 | trio_family = family_parser.families['healthyParentsAffectedSon'] 48 | 49 | assert family_parser.header == [ 50 | 'family_id', 51 | 'sample_id', 52 | 'father_id', 53 | 'mother_id', 54 | 'sex', 55 | 'phenotype' 56 | ] 57 | assert set(['proband', 'mother', 'father', 'daughter']) == set(family_parser.families['healthyParentsAffectedSon'].individuals.keys()) 58 | assert set(['proband', 'mother', 'father']) in trio_family.trios 59 | assert set(['daughter', 'mother', 'father']) in trio_family.trios 60 | assert 'daughter' in trio_family.individuals['proband'].siblings 61 | 62 | 63 | 64 | def main(): 65 | pass 66 | 67 | 68 | if __name__ == '__main__': 69 | main() 70 | 71 | -------------------------------------------------------------------------------- /ped_parser/exceptions.py: -------------------------------------------------------------------------------- 1 | class WrongAffectionStatus(Exception): 2 | """Error for wrong affection status""" 3 | def __init__(self, cmms_id, valid_statuses, message = ""): 4 | """ 5 | Arguments: 6 | cmms_id (str): A string that describes the id 7 | valid_statuses (list): A list with the valid affections statuses 8 | """ 9 | super(WrongAffectionStatus, self).__init__() 10 | self.cmms_id = cmms_id 11 | self.valid_statuses = valid_statuses 12 | self.message = message 13 | 14 | def __str__(self): 15 | return self.message 16 | 17 | class WrongPhenotype(Exception): 18 | """Error for wrong phenotype""" 19 | def __init__(self, cmms_id, phenotype, affection_status, message=""): 20 | """ 21 | Arguments: 22 | cmms_id (str): A string that describes the id 23 | phenotype (int): A int with the ped affections status 24 | affection_status (str): A str that describes the cmms 25 | affections status 26 | """ 27 | super(WrongPhenotype, self).__init__() 28 | self.cmms_id = cmms_id 29 | self.phenotype = phenotype 30 | self.affection_status = affection_status 31 | self.message = message 32 | 33 | def __str__(self): 34 | return self.message 35 | 36 | class WrongGender(Exception): 37 | """Error for wrong gender""" 38 | def __init__(self, cmms_id, sex, sex_code, message=""): 39 | """ 40 | Arguments: 41 | cmms_id (str): A string that describes the id 42 | sex (int): A int with the ped sex code 43 | sex_code (str): A str that describes the cmms 44 | sex 45 | """ 46 | super(WrongGender, self).__init__() 47 | self.cmms_id = cmms_id 48 | self.sex = sex 49 | self.sex_code = sex_code 50 | self.message = message 51 | 52 | def __str__(self): 53 | return self.message 54 | 55 | class PedigreeError(Exception): 56 | """Error inconcistenies in ped file""" 57 | def __init__(self, family_id, individual_id, message=""): 58 | """ 59 | Arguments: 60 | family_id (str): A string that describes the family id 61 | individual_id (str): A str with the individual id 62 | """ 63 | super(PedigreeError, self).__init__() 64 | self.family_id = family_id 65 | self.individual_id = individual_id 66 | self.message = message 67 | 68 | def __str__(self): 69 | return self.message 70 | 71 | class WrongLineFormat(Exception): 72 | """Error inconcistenies in ped line""" 73 | def __init__(self, message="", ped_line=""): 74 | """ 75 | Arguments: 76 | message (str): A string with error message 77 | ped_line (str): The wrong formatted line 78 | """ 79 | super(WrongLineFormat, self).__init__() 80 | self.message = message 81 | self.ped_line = ped_line 82 | 83 | def __str__(self): 84 | return self.message 85 | 86 | 87 | -------------------------------------------------------------------------------- /tests/test_individual.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | """ 4 | test_indivdual.py 5 | 6 | Test the individual class. 7 | 8 | Created by Måns Magnusson on 2013-03-07. 9 | Copyright (c) 2013 __MyCompanyName__. All rights reserved. 10 | """ 11 | 12 | import sys 13 | import os 14 | from ped_parser import individual 15 | 16 | 17 | class TestIndividual(object): 18 | """Test class for testing how the individual class behave""" 19 | 20 | def setup_class(self): 21 | """Setup a simple family with family id 1, sick daughter id 1, healthy father id 2, healthy mother id 3""" 22 | self.daughter = individual.Individual( 23 | ind='1', 24 | family='1', 25 | mother='3', 26 | father='2', 27 | sex=2, 28 | phenotype=2 29 | ) 30 | self.father = individual.Individual( 31 | ind='2', 32 | family='1', 33 | mother='0', 34 | father='0', 35 | sex=1, 36 | phenotype=1 37 | ) 38 | self.mother = individual.Individual( 39 | ind='3', 40 | family='1', 41 | mother='0', 42 | father='0', 43 | sex=2, 44 | phenotype=1 45 | ) 46 | self.random_individual = individual.Individual(ind='0') 47 | 48 | def test_daughter(self): 49 | """Test if the information about the daughter comes out correctly.""" 50 | assert self.daughter.affected 51 | assert self.daughter.has_parents 52 | assert self.daughter.sex == 2 53 | 54 | def test_father(self): 55 | """Test if the information about the father comes out correctly.""" 56 | assert not self.father.affected 57 | assert not self.father.has_parents 58 | assert self.father.sex == 1 59 | 60 | def test_mother(self): 61 | """Test if the information about the mother comes out correctly.""" 62 | assert not self.mother.affected 63 | assert not self.mother.has_parents 64 | assert self.mother.sex == 2 65 | 66 | def test_random_individual(self): 67 | """Test if the information about the father comes out correctly.""" 68 | assert not self.random_individual.affected 69 | assert not self.random_individual.has_parents 70 | assert self.random_individual.sex == 0 71 | 72 | 73 | 74 | def main(): 75 | pass 76 | 77 | 78 | if __name__ == '__main__': 79 | main() 80 | 81 | -------------------------------------------------------------------------------- /tests/test_family.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | """ 4 | test_family.py 5 | 6 | Tests for the family class 7 | 8 | Created by Måns Magnusson on 2013-03-13. 9 | Copyright (c) 2013 __MyCompanyName__. All rights reserved. 10 | """ 11 | 12 | import sys 13 | import os 14 | from ped_parser import family, individual 15 | 16 | 17 | 18 | class TestFamily(object): 19 | """Test class for testing how the individual class behave""" 20 | 21 | def setup_class(self): 22 | """Setup a simple family with family id 1, sick daughter id 1, healthy father id 2, healthy mother id 3""" 23 | # Create a family 24 | self.fam_id = '1' 25 | self.family = family.Family(family_id = self.fam_id) 26 | # Create a sick daughter: 27 | self.daughter = individual.Individual( 28 | ind = '1', 29 | family = self.fam_id, 30 | mother = '3', 31 | father = '2', 32 | sex = 2, 33 | phenotype = 2 34 | ) 35 | # Create a healthy son: 36 | self.son = individual.Individual( 37 | ind = '4', 38 | family = self.fam_id, 39 | mother = '3', 40 | father = '2', 41 | sex = 1, 42 | phenotype = 1 43 | ) 44 | # Create a healthy father 45 | self.father = individual.Individual( 46 | ind = '2', 47 | family = self.fam_id, 48 | mother = '0', 49 | father = '0', 50 | sex = 1, 51 | phenotype = 1 52 | ) 53 | # Create a healthy mother 54 | self.mother = individual.Individual( 55 | ind = '3', 56 | family = self.fam_id, 57 | mother = '0', 58 | father = '0', 59 | sex = 2, 60 | phenotype = 1 61 | ) 62 | self.family.add_individual(self.daughter) 63 | self.family.add_individual(self.son) 64 | self.family.add_individual(self.father) 65 | self.family.add_individual(self.mother) 66 | self.family.family_check() 67 | 68 | def test_individuals(self): 69 | """Test if all individuals are at place""" 70 | assert self.daughter.individual_id in self.family.individuals 71 | assert self.son.individual_id in self.family.individuals 72 | assert self.mother.individual_id in self.family.individuals 73 | assert self.father.individual_id in self.family.individuals 74 | assert not '5' in self.family.individuals 75 | 76 | def test_family_relations(self): 77 | """Test if the family relations are correct""" 78 | assert self.daughter.individual_id in self.son.siblings 79 | assert self.son.individual_id in self.daughter.siblings 80 | # Mother and father should not be siblings in this case: 81 | assert not self.father.individual_id in self.mother.siblings 82 | 83 | 84 | def main(): 85 | pass 86 | 87 | 88 | if __name__ == '__main__': 89 | main() 90 | 91 | -------------------------------------------------------------------------------- /scripts/ped_parser: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | """ 4 | ped_parser 5 | 6 | Command Line Interface for ped_parser 7 | 8 | Created by Måns Magnusson on 2014-12-22. 9 | Copyright (c) 2014 __MoonsoInc__. All rights reserved. 10 | """ 11 | 12 | from __future__ import print_function 13 | 14 | import sys 15 | import os 16 | import click 17 | 18 | from datetime import datetime 19 | 20 | from codecs import open 21 | 22 | from ped_parser import FamilyParser, init_log, logger, __version__ 23 | 24 | 25 | def print_version(ctx, param, value): 26 | """Callback function for printing version and exiting 27 | Args: 28 | ctx (object) : Current context 29 | param (object) : Click parameter(s) 30 | value (boolean) : Click parameter was supplied or not 31 | Returns: 32 | None: 33 | """ 34 | if not value or ctx.resilient_parsing: 35 | return 36 | click.echo('ped_parser version: ' + __version__) 37 | ctx.exit() 38 | 39 | 40 | ### This is the main script ### 41 | 42 | @click.command() 43 | @click.argument('family_file', 44 | nargs=1, 45 | type=click.File('r'), 46 | metavar=' or -' 47 | ) 48 | @click.option('-t', '--family_type', 49 | type=click.Choice(['ped', 'alt', 'cmms', 'mip']), 50 | default='ped', 51 | help='If the analysis use one of the known setups, please specify which one. Default is ped' 52 | ) 53 | @click.option('-o', '--outfile', 54 | type=click.File('a'), 55 | help='Specify the path to a file where results should be stored.' 56 | ) 57 | @click.option('--cmms_check', 58 | is_flag=True, 59 | help='If the id is in cmms format.' 60 | ) 61 | @click.option('--to_json', 62 | is_flag=True, 63 | help='Print the ped file in json format.' 64 | ) 65 | @click.option('--to_madeline', 66 | is_flag=True, 67 | help='Print the ped file in madeline format.' 68 | ) 69 | @click.option('--to_ped', 70 | is_flag=True, 71 | help='Print the ped file in ped format with headers.' 72 | ) 73 | @click.option('--to_dict', 74 | is_flag=True, 75 | help='Print the ped file in ped format with headers.' 76 | ) 77 | @click.option('-v', '--verbose', 78 | is_flag=True, 79 | help='Increase output verbosity.' 80 | ) 81 | @click.option('--version', 82 | is_flag=True, 83 | callback=print_version, 84 | expose_value=False, 85 | is_eager=True 86 | ) 87 | @click.option('-l', '--logfile', 88 | type=click.Path(exists=False), 89 | help="Path to log file. If none logging is "\ 90 | "printed to stderr." 91 | ) 92 | @click.option('--loglevel', 93 | type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 94 | 'CRITICAL']), 95 | help="Set the level of log output." 96 | ) 97 | def cli(family_file, family_type, outfile, to_json, to_madeline, 98 | cmms_check, to_ped, to_dict, verbose, logfile, loglevel): 99 | """Tool for parsing ped files.\n 100 | Default is to prints the family file to in ped format to output. 101 | For more information, please see github.com/moonso/ped_parser. 102 | """ 103 | from pprint import pprint as pp 104 | 105 | if not loglevel: 106 | if verbose: 107 | loglevel = 'INFO' 108 | else: 109 | loglevel = 'WARNING' 110 | 111 | # Setup the logging environment 112 | init_log(logger, logfile, loglevel) 113 | 114 | my_parser = FamilyParser(family_info=family_file, family_type=family_type, 115 | cmms_check=cmms_check) 116 | 117 | start = datetime.now() 118 | logger.info('Families found in file: {0}'.format( 119 | ','.join(list(my_parser.families.keys())) 120 | ) 121 | ) 122 | 123 | if to_json: 124 | if outfile: 125 | outfile.write(my_parser.to_json()) 126 | else: 127 | print(my_parser.to_json()) 128 | 129 | elif to_madeline: 130 | for line in my_parser.to_madeline(): 131 | if outfile: 132 | outfile.write(line + '\n') 133 | else: 134 | print(line) 135 | 136 | elif to_ped: 137 | for line in my_parser.to_ped(): 138 | if outfile: 139 | outfile.write(line + '\n') 140 | else: 141 | print(line) 142 | 143 | elif to_dict: 144 | pp(my_parser.to_dict()) 145 | 146 | else: 147 | # If no specific output is choosen, write a summary about the families to screen 148 | for family in my_parser.families: 149 | logger.info('Fam: {0}'.format(family)) 150 | if family_type in ['cmms', 'mip']: 151 | logger.info('Expected Inheritance Models: {0}'.format( 152 | my_parser.families[family].models_of_inheritance 153 | ) 154 | ) 155 | logger.info('Individuals: ') 156 | for individual in my_parser.families[family].individuals: 157 | logger.info(my_parser.families[family].individuals[individual]) 158 | logger.info('Affected individuals: {0} \n'.format( 159 | ','.join(my_parser.families[family].affected_individuals) 160 | ) 161 | ) 162 | 163 | 164 | if __name__ == '__main__': 165 | cli() 166 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PEDIGREE PARSER # 2 | 3 | [![Build Status](https://travis-ci.org/moonso/ped_parser.svg)](https://travis-ci.org/moonso/ped_parser) 4 | 5 | A small tool for parsing files in the [pedigree (.ped) format](http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped). 6 | The parser will create a family object for each family found in the pedigree file and a individual object for each individual found. 7 | The tool can be used to access information from ped files or convert the data to [madeline2](http://eyegene.ophthy.med.umich.edu/madeline/index.php) format for drawing pedigree trees. 8 | Also it is possible to create family objects and individual object and print the in ped and madeline formats. 9 | 10 | ## Installation ## 11 | 12 | pip install ped_parser 13 | 14 | 15 | ## General ## 16 | 17 | Parse a file with family info, this can be a .ped file, a .fam, a .txt(alternative ped style) 18 | file or another ped based alternative. 19 | 20 | .ped and .fam always have 6 columns, these are 21 | 22 | ``` 23 | Family_ID - '.' or '0' for unknown 24 | Individual_ID - '.' or '0' for unknown 25 | Paternal_ID - '.' or '0' for unknown 26 | Maternal_ID - '.' or '0' for unknown 27 | Sex - '1'=male; '2'=female; ['other', '0', '.']=unknown 28 | Phenotype - '1'=unaffected, '2'=affected, ['-9', '0', '.']= missing 29 | ``` 30 | 31 | The other types must specify the columns in the header. 32 | Header always start with '#'. 33 | These files always start with the ped columns described above. 34 | 35 | The following column names will be treated with care, which means that they will be used when outputting a madeline type of file or makes accesable variables in the parser. 36 | 37 | ```InheritanceModel``` - a ';'-separated list of expected inheritance models. 38 | 39 | Choices are 40 | 41 | ``` 42 | ['AR','AR_hom','AR_denovo','AR_hom_denovo','AR_hom_dn','AR_dn','AR_compound','AR_comp','AD','AD_dn','AD_denovo','X','X_dn','X_denovo','NA','Na','na','.'] 43 | ``` 44 | 45 | A proband is the first affected member of a pedigree coming to medical attention. They are annotated with: 46 | ```Proband - 'Yes', 'No', 'Unknown' or '.'``` 47 | A consultand is an individual who has sought genetic counseling or testing. 48 | ```Consultand - 'Yes', 'No', 'Unknown' or '.'``` 49 | ```Alive - 'Yes', 'No', 'Unknown' or '.'``` 50 | 51 | ##Usage## 52 | 53 | ped_parser can be used as a standalone command line tool to convert ped files and ped like files to json or madeline2 format. 54 | Or just to get information about the content of a pedigree file. 55 | 56 | When installed, try: 57 | 58 | ped_parser --help 59 | 60 | for more information. 61 | 62 | When parsing the .ped file the following will be checked: 63 | 64 | - That the family bindings are consistent and that all mandatory values exist and have correct values. Exceptions are raised if the number of columns differ between individuals 65 | - That mother and father have correct gender, if not an exception is raised 66 | - If two individuals are siblings 67 | - Identify all trios (or duos) found in the pedigree 68 | 69 | 70 | ##Alternative .ped files## 71 | 72 | ped\_parser does also support modified .ped files (some users want to store extra family and/or individual information in the pedigree file). In this case ped\_parser will look at the first 6 columns and work as described above. 73 | In this case use: 74 | 75 | ped_parser infile.ped --family_type alt 76 | 77 | ### Madeline2 conversion ### 78 | 79 | 80 | [Madeline2](http://eyegene.ophthy.med.umich.edu/madeline/index.php) is an excellent tool to draw pedigrees but they use there own input formats. ped_parser can now produce madeline2 input files from ped files by using 81 | 82 | ped_parser input.ped --to_madeline [-o output.txt] 83 | 84 | The following columns will be added to the madeline file: 85 | 86 | 'FamilyID', 'IndividualID', 'Gender', 'Father', 'Mother', 'Affected', 'Proband', 'Consultand', 'Alive' 87 | 88 | Since only the first six of these columns are the standard ped format columns ped parser allows for alternative pedigree files with the following rules: 89 | 90 | 91 | ### json conversion ### 92 | 93 | 94 | 95 | ped_parser input.ped --to_json [-o output.txt] 96 | 97 | This is a list with lists that represents families, families have 98 | dictionaries that represents individuals like 99 | 100 | ```json 101 | [ 102 | [ 103 | { 104 | 'family_id:family_id', 105 | 'id':individual_id, 106 | 'sex':gender_code, 107 | 'phenotype': phenotype_code, 108 | 'mother': mother_id, 109 | 'father': father_id 110 | }, 111 | { 112 | ... 113 | } 114 | ], 115 | [ 116 | 117 | ] 118 | ] 119 | ``` 120 | 121 | ### Create ped like objects ### 122 | 123 | Ped like objects can be created from within a python program and convert them to ped, json or madeline output like this 124 | 125 | ```python 126 | 127 | >from ped_parser import Individual, Family 128 | 129 | >outfile = open('my_family.ped','a') 130 | >my_individuals = [] 131 | >my_individuals.append(Individual( 132 | 'proband', 133 | family='1', 134 | mother='mother', 135 | father='father', 136 | sex='1', 137 | phenotype='2' 138 | ) 139 | ) 140 | >my_individuals.append(Individual( 141 | 'mother', 142 | family='1', 143 | mother='0', 144 | father='0', 145 | sex='2', 146 | phenotype='1' 147 | ) 148 | ) 149 | >my_individuals.append(Individual( 150 | 'father', 151 | 'family'='1', 152 | 'mother'='0', 153 | 'father'='0', 154 | 'sex'='1', 155 | 'phenotype'='1' 156 | ) 157 | ) 158 | >my_family = Family(family_id='1') 159 | >for individual in my_individuals: 160 | my_family.add_individual(individual) 161 | >my_family.to_ped(outfile) 162 | 163 | ``` 164 | -------------------------------------------------------------------------------- /ped_parser/individual.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | """ 4 | Individual.py 5 | 6 | Holds the information of an individual 7 | 8 | Attributes: 9 | 10 | ind STRING Can be any id unique within the family 11 | family STRING Can be any unique id within the cohort 12 | mother STRING The ind_id of the mother or [0,-9] if info is missing 13 | father STRING ---------||------ father --------------||--------------- 14 | sex INT 1=male 2=female 0=unknown 15 | phenotype INT 1=unaffected, 2=affected, missing = [0,-9] 16 | genotypes DICT Container with genotype information on the form {: } 17 | phasing BOOL If the genotype information includes phasing for this individual 18 | 19 | Created by Måns Magnusson on 2012-10-31. 20 | Copyright (c) 2012 __MyCompanyName__. All rights reserved. 21 | """ 22 | 23 | from __future__ import print_function 24 | 25 | import sys 26 | import os 27 | import logging 28 | 29 | 30 | class Individual(object): 31 | """docstring for Individual""" 32 | def __init__(self, ind, family='0', mother='0', father='0',sex='0',phenotype='0', 33 | genetic_models=None, proband='.', consultand='.', alive='.'): 34 | 35 | #TODO write test to throw exceptions if malformed input. 36 | self.logger = logging.getLogger(__name__) 37 | 38 | self.logger.debug("Creating individual") 39 | self.individual_id = ind #Individual Id STRING 40 | self.logger.debug("Individual id: {0}".format(self.individual_id)) 41 | 42 | self.family = family #Family Id STRING 43 | self.logger.debug("Family id: {0}".format(self.family)) 44 | 45 | self.mother = mother #Mother Id STRING 46 | self.logger.debug("Mother id: {0}".format(self.mother)) 47 | 48 | self.father = father # Father Id STRING 49 | self.logger.debug("Father id: {0}".format(self.father)) 50 | 51 | self.affected = False 52 | self.healthy = False 53 | self.extra_info = {} 54 | 55 | # For madeline: 56 | self.proband = proband 57 | self.logger.debug("Proband: {0}".format(self.proband)) 58 | 59 | self.consultand = consultand 60 | self.logger.debug("Consultand: {0}".format(self.consultand)) 61 | 62 | self.alive = alive 63 | self.logger.debug("Alive: {0}".format(self.alive)) 64 | 65 | try: 66 | self.sex = int(sex) # Sex Integer 67 | self.logger.debug("Sex: {0}".format(self.sex)) 68 | 69 | self.phenotype = int(phenotype) # Phenotype INTEGER 70 | self.logger.debug("Phenotype: {0}".format(self.phenotype)) 71 | 72 | except ValueError: 73 | raise SyntaxError('Sex and phenotype have to be integers.') 74 | 75 | self.has_parents = False 76 | self.has_both_parents = False 77 | 78 | if self.mother != '0': 79 | self.has_parents = True 80 | if self.father != '0': 81 | self.has_both_parents = True 82 | elif self.father != '0': 83 | self.has_parents = True 84 | 85 | self.logger.debug("Individual has parents: {0}".format(self.has_parents)) 86 | # These features will be added 87 | #TODO make use of family relations: 88 | self.siblings = set() 89 | self.grandparents = dict() 90 | self.first_cousins = set() 91 | self.second_cousins = set() 92 | 93 | if self.phenotype == 2: 94 | self.affected = True 95 | elif self.phenotype == 1: 96 | self.healthy = True 97 | 98 | def check_grandparents(self, mother = None, father = None): 99 | """ 100 | Check if there are any grand parents. 101 | 102 | Set the grandparents id:s 103 | 104 | Arguments: 105 | mother (Individual): An Individual object that represents the mother 106 | father (Individual): An Individual object that represents the father 107 | 108 | 109 | """ 110 | if mother: 111 | if mother.mother != '0': 112 | self.grandparents[mother.mother] = '' 113 | elif mother.father != '0': 114 | self.grandparents[mother.father] = '' 115 | if father: 116 | if father.mother != '0': 117 | self.grandparents[father.mother] = '' 118 | elif father.father != '0': 119 | self.grandparents[father.father] = '' 120 | return 121 | 122 | def to_json(self): 123 | """ 124 | Return the individual info in a dictionary for json. 125 | """ 126 | self.logger.debug("Returning json info") 127 | individual_info = { 128 | 'family_id': self.family, 129 | 'id':self.individual_id, 130 | 'sex':str(self.sex), 131 | 'phenotype': str(self.phenotype), 132 | 'mother': self.mother, 133 | 'father': self.father, 134 | 'extra_info': self.extra_info 135 | } 136 | return individual_info 137 | 138 | def to_madeline(self): 139 | """ 140 | Return the individual info in a madeline formated string 141 | """ 142 | #Convert sex to madeleine type 143 | self.logger.debug("Returning madeline info") 144 | if self.sex == 1: 145 | madeline_gender = 'M' 146 | elif self.sex == 2: 147 | madeline_gender = 'F' 148 | else: 149 | madeline_gender = '.' 150 | #Convert father to madeleine type 151 | if self.father == '0': 152 | madeline_father = '.' 153 | else: 154 | madeline_father = self.father 155 | #Convert mother to madeleine type 156 | if self.mother == '0': 157 | madeline_mother = '.' 158 | else: 159 | madeline_mother = self.mother 160 | #Convert phenotype to madeleine type 161 | if self.phenotype == 1: 162 | madeline_phenotype = 'U' 163 | elif self.phenotype == 2: 164 | madeline_phenotype = 'A' 165 | else: 166 | madeline_phenotype = '.' 167 | 168 | return "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}".format( 169 | self.family, self.individual_id, madeline_gender, 170 | madeline_father, madeline_mother, madeline_phenotype, 171 | self.proband, self.consultand, self.alive 172 | ) 173 | 174 | def __repr__(self): 175 | return "Individual(individual_id={0}, family={1}, mother={2}, " \ 176 | "father={3}, sex={4}, phenotype={5})".format( 177 | self.individual_id, self.family, self.mother, self.father, 178 | self.sex, self.phenotype 179 | ) 180 | 181 | def __str__(self): 182 | ind_info = ['ind_id:', self.individual_id, 183 | 'family:', self.family, 184 | 'mother:', self.mother, 185 | 'father:', self.father, 186 | 'sex:', str(self.sex), 187 | 'phenotype:', str(self.phenotype), 188 | ] 189 | if len(self.siblings) > 0: 190 | ind_info.append('siblings:') 191 | ind_info.append(','.join(self.siblings)) 192 | 193 | return ' '.join(ind_info) 194 | -------------------------------------------------------------------------------- /ped_parser/family.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | """ 4 | family.py 5 | 6 | Holds the meta information of a family and its individuals. 7 | 8 | - has a Individual 9 | 10 | Attributes: 11 | 12 | individuals DICT dictionary with family members on the form {:} 13 | variants DICT dictionary with all the variants that exists in the family on the form {:} 14 | 15 | 16 | Created by Måns Magnusson on 2014-02-05. 17 | Copyright (c) 2014 __MyCompanyName__. All rights reserved. 18 | """ 19 | 20 | from __future__ import print_function 21 | 22 | import sys 23 | import os 24 | import logging 25 | import click 26 | 27 | from ped_parser.exceptions import PedigreeError 28 | 29 | class Family(object): 30 | """Base class for the family parsers.""" 31 | def __init__(self, family_id, individuals = {}, models_of_inheritance=set([]), 32 | logger=None, logfile=None, loglevel=None): 33 | super(Family, self).__init__() 34 | self.logger = logging.getLogger(__name__) 35 | # Each family needs to have a family id 36 | self.family_id = family_id 37 | self.logger.debug("Initiating family with id:{0}".format(self.family_id)) 38 | 39 | # This is a dict with individual objects 40 | self.individuals = individuals 41 | self.logger.debug("Adding individuals:{0}".format( 42 | ','.join([ind for ind in self.individuals]) 43 | )) 44 | 45 | # List of models of inheritance that should be prioritized. 46 | self.models_of_inheritance = models_of_inheritance 47 | self.logger.debug("Adding models of inheritance:{0}".format( 48 | ','.join(self.models_of_inheritance) 49 | ) 50 | ) 51 | 52 | #Trios are a list of sets with trios. 53 | self.trios = [] 54 | #Duos are a list of sets with trios. 55 | self.duos = [] 56 | # Bool if there are any relations in the family 57 | self.no_relations = True 58 | # Set of affected individual id:s 59 | self.affected_individuals = set() 60 | 61 | def family_check(self): 62 | """ 63 | Check if the family members break the structure of the family. 64 | 65 | eg. nonexistent parent, wrong sex on parent etc. 66 | 67 | Also extracts all trios found, this is of help for many at the moment 68 | since GATK can only do phasing of trios and duos. 69 | """ 70 | #TODO Make some tests for these 71 | self.logger.info("Checking family relations for {0}".format( 72 | self.family_id) 73 | ) 74 | for individual_id in self.individuals: 75 | 76 | self.logger.debug("Checking individual {0}".format(individual_id)) 77 | individual = self.individuals[individual_id] 78 | 79 | self.logger.debug("Checking if individual {0} is affected".format( 80 | individual_id)) 81 | 82 | if individual.affected: 83 | self.logger.debug("Found affected individual {0}".format( 84 | individual_id) 85 | ) 86 | self.affected_individuals.add(individual_id) 87 | 88 | father = individual.father 89 | mother = individual.mother 90 | 91 | if individual.has_parents: 92 | self.logger.debug("Individual {0} has parents".format( 93 | individual_id)) 94 | self.no_relations = False 95 | try: 96 | self.check_parent(father, father=True) 97 | self.check_parent(mother, father=False) 98 | except PedigreeError as e: 99 | self.logger.error(e.message) 100 | raise e 101 | 102 | # Check if there is a trio 103 | if individual.has_both_parents: 104 | self.trios.append(set([individual_id, father, mother])) 105 | elif father != '0': 106 | self.duos.append(set([individual_id, father])) 107 | else: 108 | self.duos.append(set([individual_id, mother])) 109 | 110 | ##TODO self.check_grandparents(individual) 111 | 112 | # Annotate siblings: 113 | for individual_2_id in self.individuals: 114 | if individual_id != individual_2_id: 115 | if self.check_siblings(individual_id, individual_2_id): 116 | individual.siblings.add(individual_2_id) 117 | ##TODO elif self.check_cousins(individual_id, individual_2_id): 118 | # individual.cousins.add(individual_2_id) 119 | 120 | def check_parent(self, parent_id, father = False): 121 | """ 122 | Check if the parent info is correct. If an individual is not present in file raise exeption. 123 | 124 | Input: An id that represents a parent 125 | father = True/False 126 | 127 | Raises SyntaxError if 128 | The parent id is not present 129 | The gender of the parent is wrong. 130 | """ 131 | self.logger.debug("Checking parent {0}".format(parent_id)) 132 | if parent_id != '0': 133 | if parent_id not in self.individuals: 134 | raise PedigreeError(self.family_id, parent_id, 135 | 'Parent is not in family.') 136 | if father: 137 | if self.individuals[parent_id].sex != 1: 138 | raise PedigreeError(self.family_id, parent_id, 139 | 'Father is not specified as male.') 140 | else: 141 | if self.individuals[parent_id].sex != 2: 142 | raise PedigreeError(self.family_id, parent_id, 143 | 'Mother is not specified as female.') 144 | return 145 | 146 | def check_siblings(self, individual_1_id, individual_2_id): 147 | """ 148 | Check if two family members are siblings. 149 | 150 | Arguments: 151 | individual_1_id (str): The id of an individual 152 | individual_2_id (str): The id of an individual 153 | 154 | Returns: 155 | bool : True if the individuals are siblings 156 | False if they are not siblings 157 | """ 158 | 159 | self.logger.debug("Checking if {0} and {1} are siblings".format( 160 | individual_1_id, individual_2_id 161 | )) 162 | ind_1 = self.individuals[individual_1_id] 163 | ind_2 = self.individuals[individual_2_id] 164 | if ((ind_1.father != '0' and ind_1.father == ind_2.father) or 165 | (ind_1.mother != '0' and ind_1.mother == ind_2.mother)): 166 | return True 167 | else: 168 | return False 169 | 170 | def check_cousins(self, individual_1_id, individual_2_id): 171 | """ 172 | Check if two family members are cousins. 173 | 174 | If two individuals share any grandparents they are cousins. 175 | 176 | Arguments: 177 | individual_1_id (str): The id of an individual 178 | individual_2_id (str): The id of an individual 179 | 180 | Returns: 181 | bool : True if the individuals are cousins 182 | False if they are not cousins 183 | 184 | """ 185 | self.logger.debug("Checking if {0} and {1} are cousins".format( 186 | individual_1_id, individual_2_id 187 | )) 188 | 189 | #TODO check if any of the parents are siblings 190 | pass 191 | 192 | def add_individual(self, individual_object): 193 | """ 194 | Add an individual to the family. 195 | 196 | Arguments: 197 | individual_object (Individual) 198 | 199 | """ 200 | ind_id = individual_object.individual_id 201 | self.logger.info("Adding individual {0}".format(ind_id)) 202 | family_id = individual_object.family 203 | if family_id != self.family_id: 204 | raise PedigreeError(self.family, individual_object.individual_id, 205 | "Family id of individual is not the same as family id for "\ 206 | "Family object!") 207 | else: 208 | self.individuals[ind_id] = individual_object 209 | self.logger.debug("Individual {0} added to family {1}".format( 210 | ind_id, family_id 211 | )) 212 | return 213 | 214 | def get_phenotype(self, individual_id): 215 | """ 216 | Return the phenotype of an individual 217 | 218 | If individual does not exist return 0 219 | 220 | Arguments: 221 | individual_id (str): Represents the individual id 222 | 223 | Returns: 224 | int : Integer that represents the phenotype 225 | """ 226 | phenotype = 0 # This is if unknown phenotype 227 | if individual_id in self.individuals: 228 | phenotype = self.individuals[individual_id].phenotype 229 | 230 | return phenotype 231 | 232 | def get_trios(self): 233 | """ 234 | Return the trios found in family 235 | """ 236 | return self.trios 237 | 238 | def to_json(self): 239 | """ 240 | Return the family in json format. 241 | 242 | The family will be represented as a list with dictionarys that 243 | holds information for the individuals. 244 | 245 | Returns: 246 | list : A list with dictionaries 247 | """ 248 | 249 | return [self.individuals[ind].to_json() for ind in self.individuals] 250 | 251 | def to_ped(self, outfile=None): 252 | """ 253 | Print the individuals of the family in ped format 254 | 255 | The header will be the original ped header plus all headers found in 256 | extra info of the individuals 257 | """ 258 | 259 | ped_header = [ 260 | '#FamilyID', 261 | 'IndividualID', 262 | 'PaternalID', 263 | 'MaternalID', 264 | 'Sex', 265 | 'Phenotype', 266 | ] 267 | 268 | extra_headers = [ 269 | 'InheritanceModel', 270 | 'Proband', 271 | 'Consultand', 272 | 'Alive' 273 | ] 274 | 275 | for individual_id in self.individuals: 276 | individual = self.individuals[individual_id] 277 | for info in individual.extra_info: 278 | if info in extra_headers: 279 | if info not in ped_header: 280 | ped_header.append(info) 281 | 282 | self.logger.debug("Ped headers found: {0}".format( 283 | ', '.join(ped_header) 284 | )) 285 | 286 | if outfile: 287 | outfile.write('\t'.join(ped_header)+'\n') 288 | else: 289 | print('\t'.join(ped_header)) 290 | 291 | for individual in self.to_json(): 292 | ped_info = [] 293 | ped_info.append(individual['family_id']) 294 | ped_info.append(individual['id']) 295 | ped_info.append(individual['father']) 296 | ped_info.append(individual['mother']) 297 | ped_info.append(individual['sex']) 298 | ped_info.append(individual['phenotype']) 299 | 300 | if len(ped_header) > 6: 301 | for header in ped_header[6:]: 302 | ped_info.append(individual['extra_info'].get(header, '.')) 303 | 304 | if outfile: 305 | outfile.write('\t'.join(ped_info)+'\n') 306 | else: 307 | print('\t'.join(ped_info)) 308 | 309 | def __repr__(self): 310 | return "Family(family_id={0}, individuals={1}, " \ 311 | "models_of_inheritance={2}".format( 312 | self.family_id, self.individuals.keys(), 313 | self.models_of_inheritance 314 | ) 315 | 316 | def __str__(self): 317 | """Print the family members of this family""" 318 | family = list(self.individuals.keys()) 319 | return "\t".join(family) 320 | 321 | @click.command() 322 | @click.option('-o', '--outfile', 323 | type=click.File('a') 324 | ) 325 | def cli(outfile): 326 | from ped_parser.individual import Individual 327 | proband = Individual('proband', family='1', mother='mother', father='father',sex='1',phenotype='2') 328 | mother = Individual('mother', family='1', mother='0', father='0',sex='2',phenotype='1') 329 | father = Individual('father', family='1', mother='0', father='0',sex='1',phenotype='1') 330 | proband.extra_info['Proband'] = 'Yes' 331 | my_family = Family(family_id='1') 332 | my_family.add_individual(proband) 333 | my_family.add_individual(mother) 334 | my_family.add_individual(father) 335 | my_family.to_ped(outfile) 336 | # print(repr(proband)) 337 | 338 | 339 | if __name__ == '__main__': 340 | from ped_parser import logger 341 | from ped_parser import init_log 342 | init_log(logger, loglevel="DEBUG") 343 | cli() 344 | -------------------------------------------------------------------------------- /ped_parser/parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | """ 4 | parser.py 5 | 6 | 7 | Parse a iterator with family info, this can be a file handle, a file stream, 8 | a list of strings etc. 9 | The family info can be in several formats, these are .ped , .fam, 10 | .txt(extended ped format). 11 | 12 | .ped and .fam always have 6 columns, these are 13 | 14 | Family_ID - '.' or '0' for unknown 15 | Individual_ID - '.' or '0' for unknown 16 | Paternal_ID - '.' or '0' for unknown 17 | Maternal_ID - '.' or '0' for unknown 18 | Sex - '1'=male; '2'=female; ['other', '0', '.']=unknown 19 | Phenotype - '1'=unaffected, '2'=affected, ['-9', '0', '.']= missing, 20 | 21 | The other types must specify the columns in the header. 22 | Header allways start with '#'. 23 | These files allways start with the ped columns described above. 24 | 25 | The following column names will be treated with care, which means that they 26 | will be used when outputting a madeline type of file or makes accesable 27 | variables in the parser: 28 | 29 | 'InheritanceModel' - a ';'-separated list of expected inheritance models. 30 | Choices are: 31 | ['AR','AR_hom','AR_denovo','AR_hom_denovo','AR_hom_dn','AR_dn', 32 | 'AR_compound','AR_comp','AD','AD_dn','AD_denovo','X','X_dn', 33 | 'X_denovo','NA','Na','na','.'] 34 | 35 | 'Proband' - 'Yes', 'No', 'Unknown' or '.'. A proband is the first affected 36 | member of a pedigree coming to medical attention. 37 | 'Consultand' - 'Yes', 'No', 'Unknown' or '.'. A consultand is an individual 38 | who has sought genetic counseling or testing. 39 | 'Alive' - 'Yes', 'No', 'Unknown' or '.' 40 | 41 | Create a family object and its family members from different types of input file 42 | Created by Måns Magnusson on 2013-01-17. 43 | Copyright (c) 2013 __MoonsoInc__. All rights reserved. 44 | """ 45 | 46 | from __future__ import print_function 47 | 48 | import json 49 | import logging 50 | import click 51 | 52 | from string import whitespace 53 | from ped_parser import (Individual, Family) 54 | from ped_parser.log import init_log 55 | from ped_parser.exceptions import (WrongAffectionStatus, WrongPhenotype, 56 | WrongGender, PedigreeError, WrongLineFormat) 57 | 58 | 59 | ############### Names of genetic models ############### 60 | # These are stored as global variables and can be altered is the user 61 | # prefer other model names or want to add names 62 | 63 | AR_HOM_NAMES = ['AR', 'AR_hom'] 64 | AR_HOM_DN_NAMES = ['AR_denovo', 'AR_hom_denovo', 'AR_hom_dn', 'AR_dn'] 65 | COMPOUND_NAMES = ['AR_compound', 'AR_comp'] 66 | AD_NAMES = ['AD', 'AD_dn', 'AD_denovo'] 67 | X_NAMES = ['X', 'X_dn', 'X_denovo'] 68 | NA_NAMES = ['NA', 'Na', 'na', '.'] 69 | 70 | 71 | 72 | class FamilyParser(object): 73 | """ 74 | Parses a iterator with family info and creates a family object with 75 | individuals. 76 | """ 77 | def __init__(self, family_info, family_type = 'ped', cmms_check=False): 78 | """ 79 | 80 | Arguments: 81 | family_info (iterator) 82 | family_type (str): Any of [ped, alt, cmms, fam, mip] 83 | cmms_check (bool, optional): Perform CMMS validations? 84 | 85 | """ 86 | super(FamilyParser, self).__init__() 87 | 88 | if __name__ == "__main__": 89 | self.logger = logging.getLogger("ped_parser.FamilyParser") 90 | else: 91 | self.logger = logging.getLogger(__name__) 92 | 93 | self.logger.info("Initializing family parser") 94 | 95 | self.cmms_check = cmms_check 96 | self.family_type = family_type 97 | self.logger.info("Family type:{0}".format(family_type)) 98 | self.families = {} 99 | self.individuals = {} 100 | self.legal_ar_hom_names = AR_HOM_NAMES 101 | self.logger.debug("Legal AR hom names:{0}".format(AR_HOM_NAMES)) 102 | self.legal_ar_hom_dn_names = AR_HOM_DN_NAMES 103 | self.logger.debug("Legal AR dn names:{0}".format(AR_HOM_DN_NAMES)) 104 | self.legal_compound_names = COMPOUND_NAMES 105 | self.logger.debug("Legal AR compound names:{0}".format(COMPOUND_NAMES)) 106 | self.legal_ad_names = AD_NAMES 107 | self.logger.debug("Legal AD compound names:{0}".format(AD_NAMES)) 108 | self.legal_x_names = X_NAMES 109 | self.logger.debug("Legal X hom names:{0}".format(X_NAMES)) 110 | self.legal_na_names = NA_NAMES 111 | self.logger.debug("Legal NA names:{0}".format(NA_NAMES)) 112 | 113 | self.header = ['family_id', 'sample_id', 'father_id', 114 | 'mother_id', 'sex', 'phenotype'] 115 | 116 | if self.family_type in ['ped', 'fam']: 117 | self.ped_parser(family_info) 118 | elif self.family_type == 'alt': 119 | self.alternative_parser(family_info) 120 | elif self.family_type in ['cmms', 'mip']: 121 | self.alternative_parser(family_info) 122 | # elif family_type == 'broad': 123 | # self.broad_parser(individual_line, line_count) 124 | for fam in self.families: 125 | self.families[fam].family_check() 126 | 127 | def get_individual(self, family_id, sample_id, father_id, mother_id, sex, phenotype, 128 | genetic_models = None, proband='.', consultand='.', alive='.'): 129 | """ 130 | Return a individual object based on the indata. 131 | 132 | Arguments: 133 | family_id (str): The id for this family 134 | sample_id (str): The id for this sample 135 | father_id (str): The id for this samples father 136 | mother_id (str): The id for this samples mother 137 | sex (str): The id for the sex of this sample 138 | phenotype (str): The id for the phenotype of this sample 139 | genetic_models (str): A ';'-separated string with the expected 140 | models of inheritance for this sample 141 | proband (str): 'Yes', 'No' or '.' 142 | consultand (str): 'Yes', 'No' or '.' if the individual is sequenced 143 | alive (str): 'Yes', 'No' or '.' 144 | 145 | returns: 146 | individual (Individual): A Individual object with the information 147 | """ 148 | if sex not in ['1', '2']: 149 | sex = '0' 150 | if phenotype not in ['1', '2']: 151 | phenotype = '0' 152 | if mother_id == '.': 153 | mother_id = '0' 154 | if father_id == '.': 155 | father_id = '0' 156 | if genetic_models: 157 | genetic_models = genetic_models.split(';') 158 | 159 | if proband == 'Yes': 160 | proband = 'Y' 161 | elif proband == 'No': 162 | proband = 'N' 163 | else: 164 | proband = '.' 165 | 166 | if consultand == 'Yes': 167 | consultand = 'Y' 168 | elif consultand == 'No': 169 | consultand = 'N' 170 | else: 171 | consultand = '.' 172 | 173 | if alive == 'Yes': 174 | alive = 'Y' 175 | elif alive == 'No': 176 | alive = 'N' 177 | else: 178 | alive = '.' 179 | 180 | individual = Individual( 181 | sample_id, 182 | family_id, 183 | mother_id, 184 | father_id, 185 | sex, 186 | phenotype, 187 | genetic_models, 188 | proband, 189 | consultand, 190 | alive 191 | ) 192 | 193 | return individual 194 | 195 | def check_line_length(self, splitted_line, expected_length): 196 | """ 197 | Check if the line is correctly formated. Throw a SyntaxError if it is not. 198 | """ 199 | if len(splitted_line) != expected_length: 200 | raise WrongLineFormat( 201 | message='WRONG FORMATED PED LINE!', 202 | ped_line = '\t'.join(splitted_line)) 203 | return 204 | 205 | def ped_parser(self, family_info): 206 | """ 207 | Parse .ped formatted family info. 208 | 209 | Add all family info to the parser object 210 | 211 | Arguments: 212 | family_info (iterator): An iterator with family info 213 | 214 | """ 215 | 216 | for line in family_info: 217 | # Check if commented line or empty line: 218 | if not line.startswith('#') and not all(c in whitespace for c in line.rstrip()): 219 | splitted_line = line.rstrip().split('\t') 220 | if len(splitted_line) != 6: 221 | # Try to split the line on another symbol: 222 | splitted_line = line.rstrip().split() 223 | try: 224 | self.check_line_length(splitted_line, 6) 225 | except WrongLineFormat as e: 226 | self.logger.error(e) 227 | self.logger.info("Ped line: {0}".format(e.ped_line)) 228 | raise e 229 | 230 | sample_dict = dict(zip(self.header, splitted_line)) 231 | family_id = sample_dict['family_id'] 232 | 233 | if sample_dict['family_id'] not in self.families: 234 | self.families[family_id] = Family(family_id, {}) 235 | 236 | ind_object = self.get_individual(**sample_dict) 237 | self.individuals[ind_object.individual_id] = ind_object 238 | self.families[ind_object.family].add_individual(ind_object) 239 | 240 | 241 | def alternative_parser(self, family_file): 242 | """ 243 | Parse alternative formatted family info 244 | 245 | This parses a information with more than six columns. 246 | For alternative information header comlumn must exist and each row 247 | must have the same amount of columns as the header. 248 | First six columns must be the same as in the ped format. 249 | 250 | Arguments: 251 | family_info (iterator): An iterator with family info 252 | """ 253 | 254 | alternative_header = None 255 | 256 | for line in family_file: 257 | if line.startswith('#'): 258 | alternative_header = line[1:].rstrip().split('\t') 259 | self.logger.info("Alternative header found: {0}".format(line)) 260 | elif line.strip(): 261 | if not alternative_header: 262 | raise WrongLineFormat(message="Alternative ped files must have "\ 263 | "headers! Please add a header line.") 264 | 265 | splitted_line = line.rstrip().split('\t') 266 | if len(splitted_line) < 6: 267 | # Try to split the line on another symbol: 268 | splitted_line = line.rstrip().split() 269 | try: 270 | self.check_line_length(splitted_line, len(alternative_header)) 271 | except SyntaxError as e: 272 | self.logger.error('Number of entrys differ from header.') 273 | self.logger.error("Header:\n{0}".format('\t'.join(alternative_header))) 274 | self.logger.error("Ped Line:\n{0}".format('\t'.join(splitted_line))) 275 | self.logger.error("Length of Header: {0}. Length of "\ 276 | "Ped line: {1}".format( 277 | len(alternative_header), 278 | len(splitted_line)) 279 | ) 280 | raise e 281 | 282 | if len(line) > 1: 283 | 284 | sample_dict = dict(zip(self.header, splitted_line[:6])) 285 | 286 | family_id = sample_dict['family_id'] 287 | 288 | all_info = dict(zip(alternative_header, splitted_line)) 289 | 290 | if sample_dict['family_id'] not in self.families: 291 | self.families[family_id] = Family(family_id, {}) 292 | 293 | sample_dict['genetic_models'] = all_info.get('InheritanceModel', None) 294 | # Try other header naming: 295 | if not sample_dict['genetic_models']: 296 | sample_dict['genetic_models'] = all_info.get('Inheritance_model', None) 297 | 298 | sample_dict['proband'] = all_info.get('Proband', '.') 299 | sample_dict['consultand'] = all_info.get('Consultand', '.') 300 | sample_dict['alive'] = all_info.get('Alive', '.') 301 | 302 | ind_object = self.get_individual(**sample_dict) 303 | 304 | self.individuals[ind_object.individual_id] = ind_object 305 | self.families[ind_object.family].add_individual(ind_object) 306 | 307 | if sample_dict['genetic_models']: 308 | for model in self.get_models(sample_dict['genetic_models']): 309 | self.families[ind_object.family].models_of_inheritance.add(model) 310 | 311 | # If requested, we try is it is an id in the CMMS format: 312 | sample_id_parts = ind_object.individual_id.split('-') 313 | if self.cmms_check and (len(sample_id_parts) == 3): 314 | # If the id follow the CMMS convention we can 315 | # do a sanity check 316 | if self.check_cmms_id(ind_object.individual_id): 317 | self.logger.debug("Id follows CMMS convention: {0}".format( 318 | ind_object.individual_id 319 | )) 320 | self.logger.debug("Checking CMMS id affections status") 321 | try: 322 | self.check_cmms_affection_status(ind_object) 323 | except WrongAffectionStatus as e: 324 | self.logger.error("Wrong affection status for"\ 325 | " {0}. Affection status can be in"\ 326 | " {1}".format(e.cmms_id, e.valid_statuses)) 327 | raise e 328 | except WrongPhenotype as e: 329 | self.logger.error("Affection status for {0} "\ 330 | "({1}) disagrees with phenotype ({2})".format( 331 | e.cmms_id, e.phenotype, e.affection_status 332 | )) 333 | raise e 334 | 335 | try: 336 | self.check_cmms_gender(ind_object) 337 | except WrongGender as e: 338 | self.logger.error("Gender code for id {0}"\ 339 | "({1}) disagrees with sex:{2}".format( 340 | e.cmms_id, e.sex_code, e.sex 341 | )) 342 | raise e 343 | 344 | for i in range(6, len(splitted_line)): 345 | ind_object.extra_info[alternative_header[i]] = splitted_line[i] 346 | 347 | def check_cmms_id(self, ind_id): 348 | """ 349 | Take the ID and check if it is following the cmms standard. 350 | The standard is year:id-generation-indcode:affectionstatus. 351 | Year is two digits, id three digits, generation in roman letters 352 | indcode are digits and affection status are in ['A', 'U', 'X']. 353 | Example 11001-II-1A. 354 | 355 | Input: 356 | ind_obj : A individual object 357 | 358 | Yields: 359 | bool : True if it is correct 360 | """ 361 | ind_id = ind_id.split('-') 362 | # This in A (=affected), U (=unaffected) or X (=unknown) 363 | family_id = ind_id[0] 364 | try: 365 | int(family_id) 366 | except ValueError: 367 | return False 368 | affection_status = ind_id[-1][-1] 369 | try: 370 | type(affection_status.isalpha()) 371 | except ValueError: 372 | return False 373 | 374 | return True 375 | 376 | def check_cmms_affection_status(self, ind_object): 377 | """ 378 | Check if the affection status is correct. 379 | 380 | Args: 381 | ind_object : An Individuals object 382 | 383 | Yields: 384 | bool : True if affection status is correct 385 | False otherwise 386 | """ 387 | valid_affection_statuses = ['A', 'U', 'X'] 388 | ind_id = ind_object.individual_id.split('-') 389 | phenotype = ind_object.phenotype 390 | affection_status = ind_id[-1][-1] 391 | 392 | if affection_status not in valid_affection_statuses: 393 | raise WrongAffectionStatus(ind_object.individual_id, 394 | valid_affection_statuses) 395 | 396 | if (affection_status == 'A' and phenotype != 2 or 397 | affection_status == 'U' and phenotype != 1): 398 | raise WrongPhenotype(ind_object.individual_id, phenotype, 399 | affection_status) 400 | 401 | return True 402 | 403 | def check_cmms_gender(self, ind_object): 404 | """ 405 | Check if the phenotype is correct. 406 | 407 | Args: 408 | ind_object : An Individuals object 409 | 410 | Yields: 411 | bool : True if phenotype status is correct 412 | False otherwise 413 | """ 414 | ind_id = ind_object.individual_id.split('-') 415 | sex = ind_object.sex 416 | sex_code = int(ind_id[-1][:-1])# Males allways have odd numbers and womans even 417 | if (sex_code % 2 == 0 and sex != 2) or (sex_code % 2 != 0 and sex != 1): 418 | raise WrongGender(ind_object.individual_id, sex, sex_code) 419 | 420 | return True 421 | 422 | def get_models(self, genetic_models): 423 | """ 424 | Check what genetic models that are found and return them as a set. 425 | 426 | Args: 427 | genetic_models : A string with genetic models 428 | 429 | Yields: 430 | correct_model_names : A set with the correct model names 431 | """ 432 | correct_model_names = set() 433 | genetic_models = genetic_models.split(';') 434 | correct_model_names = set() 435 | for model in genetic_models: 436 | # We need to allow typos 437 | if model in self.legal_ar_hom_names: 438 | model = 'AR_hom' 439 | elif model in self.legal_ar_hom_dn_names: 440 | model = 'AR_hom_dn' 441 | elif model in self.legal_ad_names: 442 | model = 'AD_dn' 443 | elif model in self.legal_compound_names: 444 | model = 'AR_comp' 445 | elif model in self.legal_x_names: 446 | model = 'X' 447 | elif model in self.legal_na_names: 448 | model = 'NA' 449 | else: 450 | self.logger.warning("Incorrect model name: {0}."\ 451 | " Ignoring model.".format(model)) 452 | correct_model_names.add(model) 453 | return correct_model_names 454 | 455 | def to_dict(self): 456 | """ 457 | Return the information from the pedigree file as a dictionary. 458 | family id is key and a list with dictionarys for each individual 459 | as value. 460 | 461 | Returns: 462 | families (dict): A dictionary with the families 463 | """ 464 | 465 | self.logger.debug("Return the information as a dictionary") 466 | families = {} 467 | for family_id in self.families: 468 | family = [] 469 | for individual_id in self.families[family_id].individuals: 470 | individual = self.families[family_id].individuals[individual_id] 471 | family.append(individual.to_json()) 472 | self.logger.debug("Adding individual {0} to family {1}".format( 473 | individual_id, family_id 474 | )) 475 | self.logger.debug("Adding family {0}".format(family_id)) 476 | families[family_id] = family 477 | 478 | return families 479 | 480 | 481 | def to_json(self): 482 | """ 483 | Yield the information from the pedigree file as a json object. 484 | This is a list with lists that represents families, families have 485 | dictionaries that represents individuals like 486 | [ 487 | [ 488 | { 489 | 'family_id:family_id', 490 | 'id':individual_id, 491 | 'sex':gender_code, 492 | 'phenotype': phenotype_code, 493 | 'mother': mother_id, 494 | 'father': father_id 495 | }, 496 | { 497 | ... 498 | } 499 | ], 500 | [ 501 | 502 | ] 503 | ] 504 | This object can easily be converted to a json object. 505 | 506 | Yields: 507 | the information in json format 508 | """ 509 | #json_families = [] 510 | for family_id in self.families: 511 | #json_families.append(self.families[family_id].to_json()) 512 | yield self.families[family_id].to_json() 513 | #return json.dumps(json_families) 514 | 515 | def to_madeline(self): 516 | """ 517 | Return a generator with the info in madeline format. 518 | 519 | Yields: 520 | An iterator with family info in madeline format 521 | """ 522 | 523 | madeline_header = [ 524 | 'FamilyID', 525 | 'IndividualID', 526 | 'Gender', 527 | 'Father', 528 | 'Mother', 529 | 'Affected', 530 | 'Proband', 531 | 'Consultand', 532 | 'Alive' 533 | ] 534 | 535 | yield '\t'.join(madeline_header) 536 | 537 | for family_id in self.families: 538 | for individual_id in self.families[family_id].individuals: 539 | individual = self.families[family_id].individuals[individual_id] 540 | 541 | yield individual.to_madeline() 542 | 543 | def to_ped(self): 544 | """ 545 | Return a generator with the info in ped format. 546 | 547 | Yields: 548 | An iterator with the family info in ped format 549 | """ 550 | 551 | ped_header = [ 552 | '#FamilyID', 553 | 'IndividualID', 554 | 'PaternalID', 555 | 'MaternalID', 556 | 'Sex', 557 | 'Phenotype', 558 | ] 559 | 560 | extra_headers = [ 561 | 'InheritanceModel', 562 | 'Proband', 563 | 'Consultand', 564 | 'Alive' 565 | ] 566 | 567 | for individual_id in self.individuals: 568 | individual = self.individuals[individual_id] 569 | for info in individual.extra_info: 570 | if info in extra_headers: 571 | if info not in ped_header: 572 | ped_header.append(info) 573 | 574 | self.logger.debug("Ped headers found: {0}".format( 575 | ', '.join(ped_header) 576 | )) 577 | 578 | 579 | 580 | yield '\t'.join(ped_header) 581 | 582 | for family_id in self.families: 583 | for individual_id in self.families[family_id].individuals: 584 | individual = self.families[family_id].individuals[individual_id].to_json() 585 | ped_info = [] 586 | ped_info.append(individual['family_id']) 587 | ped_info.append(individual['id']) 588 | ped_info.append(individual['father']) 589 | ped_info.append(individual['mother']) 590 | ped_info.append(individual['sex']) 591 | ped_info.append(individual['phenotype']) 592 | 593 | if len(ped_header) > 6: 594 | for header in ped_header[6:]: 595 | ped_info.append(individual['extra_info'].get(header, '.')) 596 | 597 | yield '\t'.join(ped_info) 598 | 599 | 600 | @click.command() 601 | @click.argument('family_file', 602 | nargs=1, 603 | type=click.File(), 604 | metavar=" or '-'" 605 | ) 606 | @click.option('-t', '--family_type', 607 | type=click.Choice(['ped', 'alt', 'cmms', 'mip']), 608 | default='ped', 609 | help='If the analysis use one of the known setups, please specify which one. Default is ped' 610 | ) 611 | @click.option('--to_json', 612 | is_flag=True, 613 | help='Print the ped file in json format' 614 | ) 615 | @click.option('--to_madeline', 616 | is_flag=True, 617 | help='Print the ped file in madeline format' 618 | ) 619 | @click.option('--to_ped', 620 | is_flag=True, 621 | help='Print the ped file in ped format with headers' 622 | ) 623 | @click.option('--to_dict', 624 | is_flag=True, 625 | help='Print the ped file in ped format with headers' 626 | ) 627 | @click.option('-o', '--outfile', 628 | type=click.File('a') 629 | ) 630 | @click.option('-l', '--logfile', 631 | type=click.Path(exists=False), 632 | help="Path to log file. If none logging is "\ 633 | "printed to stderr." 634 | ) 635 | @click.option('--loglevel', 636 | type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 637 | 'CRITICAL']), 638 | default='INFO', 639 | help="Set the level of log output." 640 | ) 641 | def cli(family_file, family_type, to_json, to_madeline, to_ped, to_dict, 642 | outfile, logfile, loglevel): 643 | """Cli for testing the ped parser.""" 644 | from pprint import pprint as pp 645 | 646 | my_parser = FamilyParser(family_file, family_type) 647 | 648 | if to_json: 649 | if outfile: 650 | outfile.write(my_parser.to_json()) 651 | else: 652 | print(my_parser.to_json()) 653 | elif to_madeline: 654 | for line in my_parser.to_madeline(): 655 | if outfile: 656 | outfile.write(line + '\n') 657 | else: 658 | print(line) 659 | 660 | elif to_ped: 661 | for line in my_parser.to_ped(): 662 | if outfile: 663 | outfile.write(line + '\n') 664 | else: 665 | print(line) 666 | 667 | elif to_dict: 668 | pp(my_parser.to_dict()) 669 | 670 | 671 | 672 | 673 | if __name__ == '__main__': 674 | from ped_parser import init_log, logger 675 | init_log(logger, loglevel='DEBUG') 676 | cli() 677 | --------------------------------------------------------------------------------