├── .gitignore ├── LICENSE.md ├── README.md ├── ct2json ├── ct2json.py ├── data │ ├── sdtm-ct.json │ └── sdtm-ct.xml └── schema │ ├── controlledterminology-extension.xsd │ ├── controlledterminology-ns.xsd │ ├── controlledterminology1-1-1.xsd │ └── foundation │ ├── ODM1-3-2-foundation.xsd │ ├── ODM1-3-2.xsd │ ├── xlink.xsd │ ├── xml.xsd │ └── xmldsig-core-schema.xsd ├── ct2odm ├── ct2odm.py └── data │ ├── sdtm-ct.txt │ ├── sdtm-ct.xls │ └── sdtm-ct.xml ├── define2-1-to-xlsx ├── README.md ├── codelists.py ├── comments.py ├── data │ ├── define.xml │ ├── defineV21-SDTM.xml │ ├── odmlib-define-metadata.xlsx │ └── odmlib-roundtrip-define.xml ├── datasets.py ├── define2-1-to-xlsx.py ├── dictionaries.py ├── documents.py ├── excel_define_file.py ├── methods.py ├── requirements.txt ├── standards.py ├── study.py ├── value_level.py ├── variables.py └── where_clauses.py ├── define2xls ├── .idea │ ├── .gitignore │ ├── define2xls.iml │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ └── vcs.xml ├── README.md ├── codelists.py ├── comments.py ├── data │ ├── codelists.csv │ ├── comments.csv │ ├── datasets.csv │ ├── dictionaries.csv │ ├── documents.csv │ ├── methods.csv │ ├── odmlib-define-metadata-clean.xlsx │ ├── odmlib-define-metadata-save.xlsx │ ├── odmlib-define-metadata-temp.xlsx │ ├── odmlib-define-metadata.xlsx │ ├── odmlib-roundtrip-define.xml │ ├── odmlib-rt-test-define.xml │ ├── sdtm-xls-define.xml │ ├── study.csv │ ├── valuelevel.csv │ ├── variables.csv │ └── whereclauses.csv ├── datasets.py ├── define2xls.py ├── dictionaries.py ├── documents.py ├── excel_define_file.py ├── methods.py ├── requirements.txt ├── study.py ├── value_level.py ├── variables.py └── where_clauses.py ├── get_started ├── .idea │ ├── .gitignore │ ├── get_started.iml │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ └── vcs.xml ├── README.md ├── data │ └── odm_demo.xml ├── get_started.py └── requirements.txt ├── library_xml ├── .idea │ ├── .gitignore │ ├── .name │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── library_1_0.iml │ ├── misc.xml │ ├── modules.xml │ └── vcs.xml ├── README.md ├── __init__.py ├── data │ ├── library-cdash-2-2.xml │ ├── library-odmlib-cdashig2-2.json │ ├── library-odmlib.json │ ├── library-sdtm-3-4.xml │ └── odmlib.xml ├── library_define_1_0 │ ├── __init__.py │ └── model.py ├── library_odm_1_0 │ ├── __init__.py │ └── model.py ├── library_xml.py ├── requirements.txt └── tests │ └── test_local_library_loader.py ├── merge_odm ├── .idea │ ├── .gitignore │ ├── .name │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── merge_odm.iml │ ├── misc.xml │ ├── modules.xml │ └── vcs.xml ├── README.md ├── data │ ├── cdash-odm-source.xml │ ├── cdash-odm-target-clean.xml │ └── cdash-odm-target.xml ├── merge_odm.py └── requirements.txt ├── notebooks ├── data │ ├── cosa_define_demo.xml │ └── cosa_demo.xml ├── first_define.ipynb ├── first_odm.ipynb └── generate_define.ipynb ├── snippets ├── data │ ├── cdash-odm-test.xml │ ├── defineV21-SDTM.xml │ └── simple_create.xml ├── odmlib_first_define.py ├── simple_create_odm.py ├── validate_define.py ├── validate_odm.py └── validate_odm_metadata.py ├── xls2define ├── .idea │ ├── .gitignore │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ ├── vcs.xml │ └── xls2define.iml ├── CodeLists.py ├── Comments.py ├── Datasets.py ├── Dictionaries.py ├── Documents.py ├── Methods.py ├── README.md ├── Study.py ├── ValueLevel.py ├── Variables.py ├── WhereClauses.py ├── data │ ├── SDTM-Metadata-Worksheet.xlsx │ ├── odmlib-define-metadata.xlsx │ ├── odmlib-roundtrip-define.xml │ └── odmlib-rt-test-define.xml ├── define_object.py ├── odm.py ├── requirements.txt ├── supporting_docs.py └── xls2define.py └── xlsx2define2-1 ├── CodeLists.py ├── Comments.py ├── Datasets.py ├── Dictionaries.py ├── Documents.py ├── Methods.py ├── README.md ├── Standards.py ├── Study.py ├── ValueLevel.py ├── Variables.py ├── WhereClauses.py ├── data ├── odmlib-define-metadata.xlsx └── odmlib-roundtrip-define.xml ├── define_object.py ├── odm.py ├── requirements.txt ├── supporting_docs.py └── xlsx2define2-1.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | define2-1-to-xlsx/venv/ 6 | xlsx2define2-1/venv/ 7 | ct2json/venv/ 8 | ct2odm/venv/ 9 | define2xls/venv/ 10 | get_started/venv/ 11 | merge_odm/venv/ 12 | snippets/venv/ 13 | xls2define/venv/ 14 | *.egg-info/ 15 | docs/build/ 16 | odmlib.egg-info/ 17 | define2-1-to-xlsx/data/*.csv 18 | define2xls/data/*.csv 19 | define2-1-to-xlsx/.idea 20 | xlsx2define2-1/.idea 21 | xlsx2define2-1/data/t1d*.* 22 | xlsx2define2-1/data/T1-Dexi*.xlsx 23 | ct2json/.idea 24 | ct2odm/.idea 25 | define2xls/.idea 26 | get_started/.idea 27 | merge_odm/.idea 28 | snippets/.idea 29 | snippets/data/ODM*.xml 30 | snippets/data/t1d-define.xml 31 | notebooks/.idea 32 | notebooks/.ipynb_checkpoints 33 | .gitignore.swp 34 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | ===================== 3 | 4 | Copyright © 2022 Sam Hume 5 | 6 | Permission is hereby granted, free of charge, to any person 7 | obtaining a copy of this software and associated documentation 8 | files (the “Software”), to deal in the Software without 9 | restriction, including without limitation the rights to use, 10 | copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the 12 | Software is furnished to do so, subject to the following 13 | conditions: 14 | 15 | The above copyright notice and this permission notice shall be 16 | included in all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, 19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 20 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 22 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 25 | OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # odmlib examples 2 | 3 | ## Introduction 4 | The odmlib examples are small applications that demonstrate the use of the odmlib Python package for creating 5 | and process ODM files, including extensions like Define-XML. The examples are intended to make it easier 6 | to get started using the odmlib package. 7 | 8 | The odmlib package simplifies working with the CDISC ODM data exchange standard and its extensions, such as 9 | Define-XML, in Python. The odmlib package provides an object-oriented interface to working with ODM documents 10 | that simplifies creating and processing them. 11 | 12 | ## Why odmlib? 13 | The odmlib package satisfies my personal interest in working with ODM using an object-oriented 14 | interface in Python. 15 | 16 | ## Getting Started 17 | See the [odmlib repository](https://github.com/swhume/odmlib) to get the odmlib pacakge. Eventually, it may 18 | make its way into PyPi, but for now you'll need to install from the source. The odmlib README provides 19 | instructions for getting started. 20 | 21 | ## Note 22 | Effort will be made to update the odmlib_examples as odmlib and it's associated models are updated. If 23 | an example doesn't run correctly, please update to the latest version of odmlib. 24 | 25 | ## Limitations 26 | The odmlib examples are simple programs intended to demonstrate some of the basic capabilities of odmlib. 27 | The examples are not complete, production ready applications. 28 | 29 | The odmlib package is still in development. Although is being actively used on several projects, additional 30 | use and testing may trigger updates or bug fixes. Create an issue in GitHub if you need some help getting 31 | an example to run. Thanks for your patience. -------------------------------------------------------------------------------- /ct2json/ct2json.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import odmlib.loader as LD 3 | import odmlib.odm_loader as OL 4 | import odmlib.odm_parser as P 5 | import xmlschema as XSD 6 | import os 7 | 8 | CT_SCHEMA = "./schema/controlledterminology1-1-1.xsd" 9 | 10 | """ 11 | ct2json.py - an example program using odmlib to read a CT-XML ODM file and convert it to JSON. 12 | Command-line examples: 13 | python ct2json.py -x ./data/sdtm-ct.xml -j ./data/sdtm-ct.json 14 | python ct2json.py -v -x ./data/sdtm-ct.xml -j ./data/sdtm-ct.json 15 | python ct2json.py -v -x ./data/sdtm-ct.xml -j ./data/sdtm-ct.json -s "/home/sam/src/ct2json/schema/controlledterminology1-1-1.xsd 16 | """ 17 | 18 | 19 | class CT2Json: 20 | """ generate a CT JSON file from a CT-XML ODM file """ 21 | def __init__(self, ct_file, json_file, language="en"): 22 | self.ct_file = ct_file 23 | self.json_file = json_file 24 | self.lang = language 25 | 26 | def create(self): 27 | loader = LD.ODMLoader(OL.XMLODMLoader(model_package="ct_1_1_1", ns_uri="http://ncicb.nci.nih.gov/xml/odm/EVS/CDISC")) 28 | loader.open_odm_document(self.ct_file) 29 | ct_odmlib = loader.root() 30 | ct_odmlib.write_json(self.json_file) 31 | 32 | 33 | class CTValidator: 34 | """ CT-XML schema validation """ 35 | def __init__(self, schema, ct_file): 36 | """ 37 | :param schema: str - the path and filename for the Define-XML schema 38 | :param define_file: str - the path and filename for the Define-XML to validate 39 | """ 40 | self.schema_file = schema 41 | self.ct_file = ct_file 42 | 43 | def validate(self): 44 | """" execute the schema validation and report the results """ 45 | validator = P.ODMSchemaValidator(self.schema_file) 46 | try: 47 | validator.validate_file(self.ct_file) 48 | print("CT-XML schema validation completed successfully...") 49 | except XSD.validators.exceptions.XMLSchemaChildrenValidationError as ve: 50 | print(f"schema validation errors: {ve}") 51 | 52 | def _check_file_existence(self): 53 | """ throw an error if the schema of Define-XML file cannot be found """ 54 | if not os.path.isfile(self.schema_file): 55 | raise ValueError("The schema validate flag is set, but the schema file cannot be found.") 56 | if not os.path.isfile(self.ct_file): 57 | raise ValueError("The CT-XML file cannot be found.") 58 | 59 | 60 | def set_cmd_line_args(): 61 | """ 62 | get the command-line arguments needed to convert the CT-XML input file into JSON 63 | :return: return the argparse object with the command-line parameters 64 | """ 65 | parser = argparse.ArgumentParser() 66 | parser.add_argument("-x", "--ct", help="path and file name of CT-XML input file", required=True, 67 | dest="ct_file") 68 | parser.add_argument("-j", "--json", help="path and file to write the generated JSON file to", required=False, 69 | dest="json_file", default="./") 70 | parser.add_argument("-s", "--schema", help="path and file name of CT-XML schema", dest="schema_file", 71 | default=CT_SCHEMA) 72 | parser.add_argument("-v", "--validate", help="schema validate the CT-XML file", default=False, const=True, 73 | nargs='?', dest="is_validate") 74 | parser.add_argument("-l", "--lang", help="language code", default="en", dest="language", required=False) 75 | args = parser.parse_args() 76 | return args 77 | 78 | 79 | def main(): 80 | """ main driver method that generates an Excel file using tje Define-XML v2.0 metadata """ 81 | args = set_cmd_line_args() 82 | if args.is_validate: 83 | validator = CTValidator(args.schema_file, args.ct_file) 84 | validator.validate() 85 | ct2json = CT2Json(args.ct_file, args.json_file, args.language) 86 | ct2json.create() 87 | 88 | 89 | if __name__ == "__main__": 90 | main() 91 | -------------------------------------------------------------------------------- /ct2json/schema/controlledterminology-extension.xsd: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /ct2json/schema/controlledterminology-ns.xsd: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 9 | 10 | 11 | The version of the CT-XML standard. 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /ct2json/schema/controlledterminology1-1-1.xsd: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /ct2json/schema/foundation/ODM1-3-2.xsd: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /ct2json/schema/foundation/xlink.xsd: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 8 | 9 | Comment describing your root element 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /ct2json/schema/foundation/xml.xsd: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | See http://www.w3.org/XML/1998/namespace.html and 9 | http://www.w3.org/TR/REC-xml for information about this namespace. 10 | 11 | This schema document describes the XML namespace, in a form 12 | suitable for import by other schema documents. 13 | 14 | Note that local names in this namespace are intended to be defined 15 | only by the World Wide Web Consortium or its subgroups. The 16 | following names are currently defined in this namespace and should 17 | not be used with conflicting semantics by any Working Group, 18 | specification, or document instance: 19 | 20 | base (as an attribute name): denotes an attribute whose value 21 | provides a URI to be used as the base for interpreting any 22 | relative URIs in the scope of the element on which it 23 | appears; its value is inherited. This name is reserved 24 | by virtue of its definition in the XML Base specification. 25 | 26 | lang (as an attribute name): denotes an attribute whose value 27 | is a language code for the natural language of the content of 28 | any element; its value is inherited. This name is reserved 29 | by virtue of its definition in the XML specification. 30 | 31 | space (as an attribute name): denotes an attribute whose 32 | value is a keyword indicating what whitespace processing 33 | discipline is intended for the content of the element; its 34 | value is inherited. This name is reserved by virtue of its 35 | definition in the XML specification. 36 | 37 | Father (in any context at all): denotes Jon Bosak, the chair of 38 | the original XML Working Group. This name is reserved by 39 | the following decision of the W3C XML Plenary and 40 | XML Coordination groups: 41 | 42 | In appreciation for his vision, leadership and dedication 43 | the W3C XML Plenary on this 10th day of February, 2000 44 | reserves for Jon Bosak in perpetuity the XML name 45 | xml:Father 46 | 47 | 48 | 49 | 50 | This schema defines attributes and an attribute group 51 | suitable for use by 52 | schemas wishing to allow xml:base, xml:lang or xml:space attributes 53 | on elements they define. 54 | 55 | To enable this, such a schema must import this schema 56 | for the XML namespace, e.g. as follows: 57 | <schema . . .> 58 | . . . 59 | <import namespace="http://www.w3.org/XML/1998/namespace" 60 | schemaLocation="http://www.w3.org/2001/03/xml.xsd"/> 61 | 62 | Subsequently, qualified reference to any of the attributes 63 | or the group defined below will have the desired effect, e.g. 64 | 65 | <type . . .> 66 | . . . 67 | <attributeGroup ref="xml:specialAttrs"/> 68 | 69 | will define a type which will schema-validate an instance 70 | element with any of those attributes 71 | 72 | 73 | 74 | In keeping with the XML Schema WG's standard versioning 75 | policy, this schema document will persist at 76 | http://www.w3.org/2001/03/xml.xsd. 77 | At the date of issue it can also be found at 78 | http://www.w3.org/2001/xml.xsd. 79 | The schema document at that URI may however change in the future, 80 | in order to remain compatible with the latest version of XML Schema 81 | itself. In other words, if the XML Schema namespace changes, the version 82 | of this document at 83 | http://www.w3.org/2001/xml.xsd will change 84 | accordingly; the version at 85 | http://www.w3.org/2001/03/xml.xsd will not change. 86 | 87 | 88 | 89 | 90 | 91 | In due course, we should install the relevant ISO 2- and 3-letter 92 | codes as the enumerated possible values . . . 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | See http://www.w3.org/TR/xmlbase/ for 109 | information about this attribute. 110 | 111 | 112 | 113 | 114 | 121 | 122 | -------------------------------------------------------------------------------- /ct2odm/ct2odm.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from odmlib.ct_1_1_1 import model as CT 3 | import datetime 4 | 5 | 6 | class CT2ODM: 7 | def __init__(self, csv_file, odm_file, standard, package_date): 8 | self.csv_file = csv_file 9 | self.odm_file = odm_file 10 | self.standard = standard 11 | self.pkg_date = package_date 12 | 13 | def create(self): 14 | odm = self._create_odm() 15 | odm.Study.append(self._create_study()) 16 | odm.Study[0].MetaDataVersion.append(self._create_mdv()) 17 | with open(self.csv_file, "r") as csv_file: 18 | csv_reader = csv.DictReader(csv_file, delimiter='\t') 19 | line_count = 0 20 | cl_dict = {} 21 | cl_c_code = "" 22 | cl = None 23 | for row in csv_reader: 24 | if row["Code"] and row["Codelist Extensible (Yes/No)"] and not row["Codelist Code"]: 25 | if cl_dict and cl_c_code != row["Code"]: 26 | self._complete_codelist(odm, cl, cl_dict) 27 | # assumes Codelist comes before associated terms 28 | cl, cl_dict = self._create_codelist(row) 29 | cl_c_code = row["Code"] 30 | else: 31 | cl.EnumeratedItem.append(self._create_enumerated_item(row)) 32 | line_count += 1 33 | self._complete_codelist(odm, cl, cl_dict) 34 | print(f'Processed {line_count} lines.') 35 | odm.write_xml(self.odm_file) 36 | 37 | def _create_enumerated_item(self, row): 38 | ei = CT.EnumeratedItem(CodedValue=row["CDISC Submission Value"], ExtCodeID=row["Code"]) 39 | if row["CDISC Synonym(s)"]: 40 | for synonym in self._get_synonyms(row["CDISC Synonym(s)"]): 41 | ei.CDISCSynonym.append(CT.CDISCSynonym(_content=synonym)) 42 | ei.CDISCDefinition = CT.CDISCDefinition(_content=row["CDISC Definition"]) 43 | ei.PreferredTerm = CT.PreferredTerm(_content=row["NCI Preferred Term"]) 44 | return ei 45 | 46 | def _complete_codelist(self, odm, cl, cl_dict): 47 | self._update_codelist(cl, cl_dict) 48 | odm.Study[0].MetaDataVersion[0].CodeList.append(cl) 49 | 50 | def _update_codelist(self, cl, cl_dict): 51 | cl.CDISCSubmissionValue = CT.CDISCSubmissionValue(_content=cl_dict["sub_val"]) 52 | cl.CDISCSynonym = CT.CDISCSynonym(_content=cl_dict["synonyms"]) 53 | cl.PreferredTerm = CT.PreferredTerm(_content=cl_dict["preferred_term"]) 54 | 55 | def _create_codelist(self, row): 56 | cl = CT.CodeList( 57 | OID="CL." + row["Code"] + "." + row["CDISC Submission Value"], 58 | Name=row["CDISC Synonym(s)"], 59 | DataType="text", 60 | ExtCodeID=row["Code"], 61 | CodeListExtensible=row["Codelist Extensible (Yes/No)"] 62 | ) 63 | cl.Description = CT.Description() 64 | cl.Description.TranslatedText.append(CT.TranslatedText(_content=row["CDISC Definition"], lang="en")) 65 | cl_dict = { 66 | "sub_val": row["CDISC Submission Value"], 67 | "synonyms": row["CDISC Synonym(s)"], 68 | "preferred_term": row["NCI Preferred Term"] 69 | } 70 | return cl, cl_dict 71 | 72 | def _get_synonyms(self, synonyms_string): 73 | synonyms = [] 74 | for synonym in synonyms_string.split(";"): 75 | synonyms.append(synonym.strip()) 76 | return synonyms 77 | 78 | def _create_odm(self): 79 | odm = CT.ODM( 80 | FileOID="CDISC_CT." + self.standard + "." + self.pkg_date, 81 | AsOfDateTime=self.pkg_date + "T00:00:00", 82 | CreationDateTime=self._set_datetime(), 83 | ODMVersion="1.3.2", 84 | FileType="Snapshot", 85 | Granularity="Metadata", 86 | Originator="Sam Hume", 87 | SourceSystem="NCI Thesaurus", 88 | SourceSystemVersion=self.pkg_date 89 | ) 90 | return odm 91 | 92 | def _create_study(self): 93 | """ 94 | create the study ODMLIB object from the Study worksheet and return it 95 | :param rows: dictionary created from the rows in the study worksheet 96 | :return: odmlib Study object 97 | """ 98 | study = CT.Study(OID="CDISC_CT." + self.standard + "." + self.pkg_date) 99 | gv = CT.GlobalVariables() 100 | gv.StudyName = CT.StudyName(_content="CDISC " + self.standard + " Controlled Terminology") 101 | gv.StudyDescription = CT.StudyDescription(_content="CDISC " + self.standard + " Controlled Terminology, " + self.pkg_date) 102 | gv.ProtocolName = CT.ProtocolName(_content="CDISC " + self.standard + " Controlled Terminology") 103 | study.GlobalVariables = gv 104 | return study 105 | 106 | 107 | def _create_mdv(self): 108 | """ 109 | create the MetaDataVersion ODMLIB object and return it 110 | :return: odmlib MetaDataVersion object 111 | """ 112 | mdv = CT.MetaDataVersion( 113 | OID="CDISC_CT_MetaDataVersion." + self.standard + "." + self.pkg_date, 114 | Name="CDISC " + self.standard + " Controlled Terminology", 115 | Description="CDISC " + self.standard + " Controlled Terminology, " + self.pkg_date, 116 | ) 117 | return mdv 118 | 119 | def _set_datetime(self): 120 | """return the current datetime in ISO 8601 format""" 121 | return datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat() 122 | 123 | 124 | if __name__ == '__main__': 125 | ct2odm = CT2ODM(csv_file="./data/sdtm-ct.txt", odm_file="./data/sdtm-ct.xml", standard="SDTM", package_date="2021-06-25") 126 | ct2odm.create() 127 | -------------------------------------------------------------------------------- /ct2odm/data/sdtm-ct.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/ct2odm/data/sdtm-ct.xls -------------------------------------------------------------------------------- /define2-1-to-xlsx/README.md: -------------------------------------------------------------------------------- 1 | # define2-1-to-xlsx 2 | 3 | ## Introduction 4 | The define2-1-to-xlsx program is an odmlib example application that generates an Excel spreadsheet that contains the 5 | content of a Define-XML v2.1 file. The Exel spreadsheet version of the makes it easier for many to edit or create new 6 | content to include in a Define-XML v2.1 file. The companion xlsx2define2-1 program takes the updated spreadsheet and 7 | generates a Define-XML v2.1 file. This example demonstrates some basic odmlib features. 8 | 9 | ## Getting Started 10 | To run define2-1-to-xlsx.py from the command-line: 11 | 12 | `python define2-1-to-xlsx.py -d ./data/sdtm-xls-define.xml -p ./data/` 13 | 14 | The odmlib package must be installed to run define2-1-to-xlsx. See the 15 | [odmlib repository](https://github.com/swhume/odmlib) to get the source code and the latest version of the odmlib 16 | package. You may also install odmlib from PyPi with the understanding that it is still in development so might 17 | not have everything available in the odmlib repository. To install from PyPi: 18 | 19 | 'pip install odmlib' 20 | 21 | The odmlib README provides instructions for getting started. 22 | 23 | ## Limitations 24 | The odmlib examples are basic programs intended to demonstrate some of the basic capabilities of odmlib. 25 | The examples are not complete, production ready applications. However, I'm happy to update these applications to 26 | accommodate new feature or bug fixes and will also review pull requests. 27 | 28 | The odmlib package is still in development. -------------------------------------------------------------------------------- /define2-1-to-xlsx/codelists.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class CodeLists: 6 | HEADERS = ["OID", "Name", "NCI Codelist Code", "Data Type", "Order", "Term", "NCI Term Code", "Decoded Value", 7 | "Comment", "IsNonStandard", "StandardOID"] 8 | 9 | def __init__(self, odmlib_mdv, data_path): 10 | self.mdv = odmlib_mdv 11 | self.path = data_path 12 | self.file_name = os.path.join(self.path, "codelists.csv") 13 | 14 | def extract(self): 15 | with open(self.file_name, 'w', newline='') as f: 16 | writer = csv.writer(f, dialect="excel") 17 | writer.writerow(self.HEADERS) 18 | for cl in self.mdv.CodeList: 19 | if cl.EnumeratedItem: 20 | self._write_enumerated_item_row(cl, writer) 21 | elif cl.CodeListItem: 22 | self._write_code_list_item_row(cl, writer) 23 | 24 | def _write_enumerated_item_row(self, cl, writer): 25 | attr = self._conditional_codelist_content(cl) 26 | for ei in cl.EnumeratedItem: 27 | order_number = "" 28 | if ei.OrderNumber: 29 | order_number = ei.OrderNumber 30 | ei_c_code = "" 31 | if ei.Alias: 32 | ei_c_code = ei.Alias[0].Name 33 | writer.writerow([cl.OID, cl.Name, attr["cl_c_code"], cl.DataType, order_number, ei.CodedValue, ei_c_code, "", 34 | attr["comment_oid"], attr["is_non_std"], attr["standard_oid"]]) 35 | 36 | def _write_code_list_item_row(self, cl, writer): 37 | attr = self._conditional_codelist_content(cl) 38 | for cli in cl.CodeListItem: 39 | order_number = "" 40 | if cli.OrderNumber: 41 | order_number = cli.OrderNumber 42 | cli_c_code = "" 43 | if cli.Alias: 44 | cli_c_code = cli.Alias[0].Name 45 | decode = cli.Decode.TranslatedText[0]._content 46 | writer.writerow([cl.OID, cl.Name, attr["cl_c_code"], cl.DataType, order_number, cli.CodedValue, cli_c_code, 47 | decode, attr["comment_oid"], attr["is_non_std"], attr["standard_oid"]]) 48 | 49 | def _conditional_codelist_content(self ,cl): 50 | attr = {"cl_c_code": ""} 51 | if cl.Alias: 52 | attr["cl_c_code"] = cl.Alias[0].Name 53 | attr["comment_oid"] = "" 54 | if cl.CommentOID: 55 | attr["comment_oid"] = cl.CommentOID 56 | attr["is_non_std"] = "" 57 | if cl.IsNonStandard: 58 | attr["is_non_std"] = cl.IsNonStandard 59 | attr["standard_oid"] = "" 60 | if cl.StandardOID: 61 | attr["standard_oid"] = cl.StandardOID 62 | return attr 63 | -------------------------------------------------------------------------------- /define2-1-to-xlsx/comments.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class Comments: 6 | HEADERS = ["OID", "Description", "Document", "Pages"] 7 | 8 | def __init__(self, odmlib_mdv, data_path): 9 | self.mdv = odmlib_mdv 10 | self.path = data_path 11 | self.file_name = os.path.join(self.path, "comments.csv") 12 | 13 | def extract(self): 14 | with open(self.file_name, 'w', newline='') as f: 15 | writer = csv.writer(f, dialect="excel") 16 | writer.writerow(self.HEADERS) 17 | for com in self.mdv.CommentDef: 18 | leaf_id = "" 19 | page_refs = "" 20 | if com.DocumentRef: 21 | leaf_id = com.DocumentRef[0].leafID 22 | if com.DocumentRef[0].PDFPageRef: 23 | page_refs = com.DocumentRef[0].PDFPageRef[0].PageRefs 24 | comment = " ".join(com.Description.TranslatedText[0]._content.split()) 25 | writer.writerow([com.OID, comment, leaf_id, page_refs]) 26 | -------------------------------------------------------------------------------- /define2-1-to-xlsx/data/odmlib-define-metadata.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/define2-1-to-xlsx/data/odmlib-define-metadata.xlsx -------------------------------------------------------------------------------- /define2-1-to-xlsx/datasets.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class Datasets: 6 | HEADERS = ["OID", "Dataset", "Description", "Class", "Structure", "Purpose", "Repeating", "Reference Data", "Comment", 7 | "IsNonStandard", "StandardOID", "HasNoData"] 8 | 9 | def __init__(self, odmlib_mdv, data_path): 10 | self.mdv = odmlib_mdv 11 | self.path = data_path 12 | self.file_name = os.path.join(self.path, "datasets.csv") 13 | 14 | def extract(self): 15 | with open(self.file_name, 'w', newline='') as f: 16 | writer = csv.writer(f, dialect="excel") 17 | writer.writerow(self.HEADERS) 18 | for ig in self.mdv.ItemGroupDef: 19 | writer.writerow([ig.OID, ig.Name, ig.Description.TranslatedText[0]._content, ig.Class.Name, ig.Structure, ig.Purpose, 20 | ig.Repeating, ig.IsReferenceData, ig.CommentOID, ig.IsNonStandard, ig.StandardOID, 21 | ig.HasNoData]) 22 | -------------------------------------------------------------------------------- /define2-1-to-xlsx/define2-1-to-xlsx.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import odmlib.define_loader as OL 3 | import odmlib.loader as LD 4 | import excel_define_file as EX 5 | import odmlib.odm_parser as P 6 | import xmlschema as XSD 7 | import os 8 | import study, standards, datasets, variables, value_level as valuelevel, where_clauses as whereclauses, codelists 9 | import dictionaries, methods, comments, documents 10 | 11 | WORKSHEETS = ["Study", "Standards", "Datasets", "Variables", "ValueLevel", "WhereClauses", "CodeLists", "Dictionaries", 12 | "Methods", "Comments", "Documents"] 13 | EXCEL_NAME = "odmlib-define-metadata.xlsx" 14 | 15 | """ 16 | define2-1-to-xlsx.py - an example program using odmlib to convert a Define-XML file into a metadata spreadsheet 17 | ex. cmd-line args: -d ./data/odmlib-roundtrip-define.xml -p ./data/ 18 | ex. cmd-line args: -d ./data/odmlib-roundtrip-define.xml -p ./data/ -v 19 | -s "/home/sam/standards/DefineV211/schema/cdisc-define-2.1/define2-1-0.xsd 20 | """ 21 | 22 | class Define2Xls: 23 | """ generate a metadata spreadsheet from a Define-XML v2.1 file """ 24 | def __init__(self, define_file, excel_path, excel_filename=EXCEL_NAME, language="en"): 25 | self.define_file = define_file 26 | self.data_path = excel_path 27 | self.excel_filename = excel_filename 28 | self.lang = language 29 | self.acrf = "" 30 | 31 | def create(self): 32 | loader = LD.ODMLoader(OL.XMLDefineLoader(model_package="define_2_1", ns_uri="http://www.cdisc.org/ns/def/v2.1")) 33 | loader.open_odm_document(self.define_file) 34 | mdv_odmlib = loader.MetaDataVersion() 35 | study_odmlib = loader.Study() 36 | self._set_acrf(mdv_odmlib) 37 | ws_files = [] 38 | for worksheet in WORKSHEETS: 39 | if worksheet == "Study": 40 | ws = eval(worksheet.lower() + "." + worksheet + "(study_odmlib, mdv_odmlib, self.data_path, self.lang, self.acrf)") 41 | else: 42 | ws = eval(worksheet.lower() + "." + worksheet + "(mdv_odmlib, self.data_path)") 43 | ws.extract() 44 | ws_files.append(ws.file_name) 45 | self._write_excel(ws_files) 46 | 47 | def _set_acrf(self, mdv): 48 | if mdv.AnnotatedCRF.DocumentRef: 49 | self.acrf = mdv.AnnotatedCRF.DocumentRef.leafID 50 | else: 51 | for leaf in mdv.leaf: 52 | if leaf.title and "annotated" in leaf.title._content.lower(): 53 | self.acrf = leaf.ID 54 | break 55 | 56 | def _write_excel(self, ws_files): 57 | excel = EX.ExcelDefineFile(ws_files, WORKSHEETS, self.data_path, self.excel_filename) 58 | excel.create_excel() 59 | 60 | 61 | class DefineValidator: 62 | """ Define-XML schema validation """ 63 | def __init__(self, schema, define_file): 64 | """ 65 | :param schema: str - the path and filename for the Define-XML schema 66 | :param define_file: str - the path and filename for the Define-XML to validate 67 | """ 68 | self.schema_file = schema 69 | self.define_file = define_file 70 | 71 | def validate(self): 72 | """" execute the schema validation and report the results """ 73 | validator = P.ODMSchemaValidator(self.schema_file) 74 | try: 75 | validator.validate_file(self.define_file) 76 | print("define-XML schema validation completed successfully...") 77 | except XSD.validators.exceptions.XMLSchemaChildrenValidationError as ve: 78 | print(f"schema validation errors: {ve}") 79 | 80 | def _check_file_existence(self): 81 | """ throw an error if the schema of Define-XML file cannot be found """ 82 | if not os.path.isfile(self.schema_file): 83 | raise ValueError("The schema validate flag is set, but the schema file cannot be found.") 84 | if not os.path.isfile(self.define_file): 85 | raise ValueError("The define-xml file cannot be found.") 86 | 87 | 88 | def set_cmd_line_args(): 89 | """ 90 | get the command-line arguments needed to convert the Define-XML input file into Excel 91 | :return: return the argparse object with the command-line parameters 92 | """ 93 | parser = argparse.ArgumentParser() 94 | parser.add_argument("-d", "--define", help="path and file name of Define-XML v2 input file", required=True, 95 | dest="define_file") 96 | parser.add_argument("-p", "--path", help="path to write the generated Excel file to", required=False, 97 | dest="excel_path", default="./") 98 | parser.add_argument("-e", "--excel", help="Name of Excel file without path", required=False, 99 | dest="excel_filename", default=EXCEL_NAME) 100 | parser.add_argument("-s", "--schema", help="path and file name of Define-XML schema", dest="schema_file") 101 | parser.add_argument("-v", "--validate", help="schema validate the Define-XML file", default=False, const=True, 102 | nargs='?', dest="is_validate") 103 | parser.add_argument("-l", "--lang", help="language code", default="en", dest="language", required=False) 104 | args = parser.parse_args() 105 | return args 106 | 107 | 108 | def main(): 109 | """ main driver method that generates an Excel file using tje Define-XML v2.0 metadata """ 110 | args = set_cmd_line_args() 111 | if args.is_validate: 112 | validator = DefineValidator(args.schema_file, args.define_file) 113 | validator.validate() 114 | d2x = Define2Xls(args.define_file, args.excel_path, args.excel_filename, args.language) 115 | d2x.create() 116 | 117 | 118 | if __name__ == "__main__": 119 | main() 120 | -------------------------------------------------------------------------------- /define2-1-to-xlsx/dictionaries.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class Dictionaries: 6 | HEADERS = ["OID", "Name", "Data Type", "Dictionary", "Version"] 7 | 8 | def __init__(self, odmlib_mdv, data_path): 9 | self.mdv = odmlib_mdv 10 | self.path = data_path 11 | self.file_name = os.path.join(self.path, "dictionaries.csv") 12 | 13 | def extract(self): 14 | with open(self.file_name, 'w', newline='') as f: 15 | writer = csv.writer(f, dialect="excel") 16 | writer.writerow(self.HEADERS) 17 | for cl in self.mdv.CodeList: 18 | if cl.ExternalCodeList.Dictionary: 19 | self._write_external_code_list_row(cl, writer) 20 | 21 | def _write_external_code_list_row(self, cl, writer): 22 | ext_cl = cl.ExternalCodeList 23 | version = "" 24 | if ext_cl.Version: 25 | version = ext_cl.Version 26 | writer.writerow([cl.OID, cl.Name, cl.DataType, ext_cl.Dictionary, version]) 27 | -------------------------------------------------------------------------------- /define2-1-to-xlsx/documents.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class Documents: 6 | HEADERS = ["ID", "Title", "Href"] 7 | 8 | def __init__(self, odmlib_mdv, data_path): 9 | self.mdv = odmlib_mdv 10 | self.path = data_path 11 | self.file_name = os.path.join(self.path, "documents.csv") 12 | 13 | def extract(self): 14 | with open(self.file_name, 'w', newline='') as f: 15 | writer = csv.writer(f, dialect="excel") 16 | writer.writerow(self.HEADERS) 17 | for lf in self.mdv.leaf: 18 | writer.writerow([lf.ID, lf.title._content, lf.href]) 19 | -------------------------------------------------------------------------------- /define2-1-to-xlsx/excel_define_file.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import xlsxwriter as XLS 3 | import os 4 | 5 | 6 | class ExcelDefineFile: 7 | def __init__(self, files, tabs, data_path, excel_filename): 8 | self.xlsx_file = os.path.join(data_path, excel_filename) 9 | self.files = files 10 | self.tabs = tabs 11 | 12 | def create_excel(self): 13 | workbook = XLS.Workbook(self.xlsx_file, {"strings_to_numbers": False}) 14 | header_format = workbook.add_format({"bold": True, "bg_color": "#CCFFFF", "border": True, "border_color": "black"}) 15 | for index, csv_file in enumerate(self.files): 16 | worksheet = workbook.add_worksheet(self.tabs[index]) 17 | is_header_row = True 18 | try: 19 | with open(csv_file, 'rt', encoding='utf8') as f: 20 | reader = csv.reader(f) 21 | for r, row in enumerate(reader): 22 | for c, col in enumerate(row): 23 | if is_header_row: 24 | worksheet.write(r, c, col, header_format) 25 | worksheet.set_column(r, c, 30) 26 | else: 27 | worksheet.write(r, c, col) 28 | is_header_row = False 29 | except UnicodeDecodeError as ue: 30 | print(f"Encoding error writing load file for row {row} and col {col}: {ue}") 31 | workbook.close() 32 | return len(workbook.sheetnames) 33 | -------------------------------------------------------------------------------- /define2-1-to-xlsx/methods.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class Methods: 6 | HEADERS = ["OID", "Name", "Type", "Description", "Expression Context", "Expression Code", "Document", "Pages"] 7 | 8 | def __init__(self, odmlib_mdv, data_path): 9 | self.mdv = odmlib_mdv 10 | self.path = data_path 11 | self.file_name = os.path.join(self.path, "methods.csv") 12 | 13 | def extract(self): 14 | with open(self.file_name, 'w', newline='') as f: 15 | writer = csv.writer(f, dialect="excel") 16 | writer.writerow(self.HEADERS) 17 | for md in self.mdv.MethodDef: 18 | context = "" 19 | code = "" 20 | if md.FormalExpression: 21 | context = md.FormalExpression[0].Context 22 | code = md.FormalExpression[0]._content 23 | leaf_id = "" 24 | page_refs = "" 25 | if md.DocumentRef: 26 | leaf_id = md.DocumentRef[0].leafID 27 | page_refs = md.DocumentRef[0].PDFPageRef[0].PageRefs 28 | description = " ".join(md.Description.TranslatedText[0]._content.split()) 29 | writer.writerow([md.OID, md.Name, md.Type, description, context, code, 30 | leaf_id, page_refs]) 31 | -------------------------------------------------------------------------------- /define2-1-to-xlsx/requirements.txt: -------------------------------------------------------------------------------- 1 | odmlib>=0.1.4 2 | xmlschema>=1.10.0 3 | XlsxWriter>=3.0.3 4 | -------------------------------------------------------------------------------- /define2-1-to-xlsx/standards.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class Standards: 6 | HEADERS = ["OID", "Name", "Type", "Publishing Set", "Version", "Status", "Comment"] 7 | 8 | def __init__(self, odmlib_mdv, data_path): 9 | self.mdv = odmlib_mdv 10 | self.path = data_path 11 | self.file_name = os.path.join(self.path, "standards.csv") 12 | 13 | def extract(self): 14 | with open(self.file_name, 'w', newline='') as f: 15 | writer = csv.writer(f, dialect="excel") 16 | writer.writerow(self.HEADERS) 17 | for std in self.mdv.Standards.Standard: 18 | pset = "" 19 | status = "" 20 | comment = "" 21 | if std.PublishingSet: 22 | pset = std.PublishingSet 23 | if std.Status: 24 | status = std.Status 25 | if std.CommentOID: 26 | comment = std.CommentOID 27 | writer.writerow([std.OID, std.Name, std.Type, pset, std.Version, status, comment]) 28 | -------------------------------------------------------------------------------- /define2-1-to-xlsx/study.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class Study: 6 | HEADERS = ["Attribute", "Value"] 7 | 8 | def __init__(self, odmlib_study, odmlib_mdv, data_path, language="en", acrf="LF.acrf"): 9 | self.study = odmlib_study 10 | self.mdv = odmlib_mdv 11 | self.path = data_path 12 | self.acrf = acrf 13 | self.language = language 14 | self.file_name = os.path.join(self.path, "study.csv") 15 | 16 | def extract(self): 17 | print(f"Study OID: {self.study.GlobalVariables.StudyName}") 18 | with open(self.file_name, 'w', newline='') as f: 19 | writer = csv.writer(f, dialect="excel") 20 | writer.writerow(self.HEADERS) 21 | writer.writerow(["StudyName", self.study.GlobalVariables.StudyName]) 22 | writer.writerow(["StudyDescription", self.study.GlobalVariables.StudyDescription]) 23 | writer.writerow(["ProtocolName", self.study.GlobalVariables.ProtocolName]) 24 | writer.writerow(["Language", self.language]) 25 | writer.writerow(["Annotated CRF", self.acrf]) 26 | -------------------------------------------------------------------------------- /define2-1-to-xlsx/value_level.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class ValueLevel: 6 | # does not include the IsNonStandard and HasNoData attributes for value level ItemRefs 7 | HEADERS = ["OID", "Order", "Dataset", "Variable", "ItemOID", "Where Clause", "Data Type", "Length", 8 | "Significant Digits", "Format", "Mandatory", "Codelist", "Origin Type", "Origin Source", "Pages", 9 | "Method", "Predecessor", "Comment"] 10 | 11 | def __init__(self, odmlib_mdv, data_path): 12 | self.mdv = odmlib_mdv 13 | self.path = data_path 14 | self.file_name = os.path.join(self.path, "valuelevel.csv") 15 | 16 | def extract(self): 17 | with open(self.file_name, 'w', newline='') as f: 18 | writer = csv.writer(f, dialect="excel") 19 | writer.writerow(self.HEADERS) 20 | for vl in self.mdv.ValueListDef: 21 | dataset = self._get_dataset_name(vl.OID) 22 | for ir in vl.ItemRef: 23 | # assumes all ItemDefs are referenced by an ItemRef 24 | ird = self._load_item_ref(ir) 25 | idd = self._load_item_def(ir.ItemOID) 26 | writer.writerow([vl.OID, ird["Order"], dataset, idd["Variable"], ir.ItemOID, ird["Where Clause"], 27 | idd["Data Type"], idd["Length"], idd["Significant Digits"], idd["Format"], 28 | ird["Mandatory"], idd["Codelist"], idd["Origin Type"], idd["Origin Source"], 29 | idd["Pages"], ird["Method"], idd["Predecessor"], idd["Comment"]]) 30 | 31 | def _get_dataset_name(self, vl_oid): 32 | for item in self.mdv.ItemDef: 33 | if item.ValueListRef and item.ValueListRef.ValueListOID == vl_oid: 34 | for igd in self.mdv.ItemGroupDef: 35 | ir = igd.find("ItemRef", "ItemOID", item.OID) 36 | if ir: 37 | return igd.Name 38 | raise ValueError(f"Dataset for ValueListDef {vl_oid} not found in the Define-XML file") 39 | 40 | 41 | def _load_item_ref(self, ir): 42 | ird = {} 43 | ird["Order"] = ir.OrderNumber 44 | ird["Mandatory"] = ir.Mandatory 45 | ird["Method"] = ir.MethodOID 46 | ird["Where Clause"] = self._get_where_clause_oid(ir) 47 | return ird 48 | 49 | def _load_item_def(self, item_oid): 50 | idd = {} 51 | it = self.mdv.find("ItemDef", "OID", item_oid) 52 | idd["Variable"] = it.Name 53 | idd["Data Type"] = it.DataType 54 | idd["Length"] = it.Length 55 | idd["Significant Digits"] = it.SignificantDigits 56 | idd["Format"] = it.DisplayFormat 57 | idd["Codelist"] = it.CodeListRef.CodeListOID if it.CodeListRef else "" 58 | # TODO add support for multiple Origins 59 | idd["Origin Type"] = it.Origin[0].Type if it.Origin else "" 60 | idd["Origin Source"] = it.Origin[0].Source if it.Origin and it.Origin[0].Source else "" 61 | idd["Pages"] = it.Origin[0].DocumentRef[0].PDFPageRef[0].PageRefs \ 62 | if it.Origin and it.Origin[0].DocumentRef and it.Origin[0].DocumentRef[0].PDFPageRef else "" 63 | idd["Predecessor"] = it.Origin[0].Description.TranslatedText[0]._content \ 64 | if it.Origin and it.Origin[0].Type == "Predecessor" else "" 65 | idd["Comment"] = it.CommentOID if it.CommentOID else "" 66 | return idd 67 | 68 | def _get_where_clause_oid(self, item): 69 | wc_oids = [] 70 | for wc in item.WhereClauseRef: 71 | wc_oids.append(wc.WhereClauseOID) 72 | return "'".join(wc_oids) -------------------------------------------------------------------------------- /define2-1-to-xlsx/variables.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class Variables: 6 | HEADERS = ["OID", "Order", "Dataset", "Variable", "Label", "Data Type", "Length", "Significant Digits", "Format", 7 | "KeySequence", "Mandatory", "CodeList", "Valuelist", "Origin Type", "Origin Source", "Pages", "Method", 8 | "Predecessor", "Role", "Comment", "IsNonStandard", "HasNoData"] 9 | 10 | def __init__(self, odmlib_mdv, data_path): 11 | self.mdv = odmlib_mdv 12 | self.path = data_path 13 | self.file_name = os.path.join(self.path, "variables.csv") 14 | 15 | def extract(self): 16 | with open(self.file_name, 'w', newline='') as f: 17 | writer = csv.writer(f, dialect="excel") 18 | writer.writerow(self.HEADERS) 19 | for ig in self.mdv.ItemGroupDef: 20 | for ir in ig.ItemRef: 21 | # assumes all ItemDefs are referenced by an ItemRef 22 | ird = self._load_item_ref(ir) 23 | idd = self._load_item_def(ir.ItemOID) 24 | writer.writerow([idd["OID"], ird["Order"], ig.Name, idd["Variable"], idd["Label"], idd["Data Type"], idd["Length"], 25 | idd["Significant Digits"], idd["Format"], ird["KeySequence"], ird["Mandatory"], 26 | idd["Codelist"], idd["Valuelist"], idd["Origin Type"], idd["Origin Source"], 27 | idd["Pages"], ird["Method"], idd["Predecessor"], ird["Role"], idd["Comment"], 28 | ird["IsNonStandard"], ird["HasNoData"]]) 29 | 30 | def _load_item_ref(self, ir): 31 | ird = {} 32 | ird["Order"] = ir.OrderNumber 33 | ird["Mandatory"] = ir.Mandatory 34 | ird["KeySequence"] = ir.KeySequence 35 | ird["Method"] = ir.MethodOID 36 | ird["Role"] = ir.Role 37 | ird["IsNonStandard"] = ir.IsNonStandard if ir.IsNonStandard else "" 38 | ird["HasNoData"] = ir.HasNoData if ir.HasNoData else "" 39 | return ird 40 | 41 | def _load_item_def(self, item_oid): 42 | idd = {} 43 | it = self.mdv.find("ItemDef", "OID", item_oid) 44 | idd["OID"] = item_oid 45 | idd["Variable"] = it.Name 46 | idd["Data Type"] = it.DataType 47 | idd["Length"] = it.Length 48 | idd["Significant Digits"] = it.SignificantDigits 49 | idd["Format"] = it.DisplayFormat 50 | idd["Label"] = " ".join(it.Description.TranslatedText[0]._content.split()) 51 | idd["Codelist"] = it.CodeListRef.CodeListOID if it.CodeListRef else "" 52 | idd["Valuelist"] = it.ValueListRef.ValueListOID if it.ValueListRef else "" 53 | # TODO add support for multiple Origins 54 | idd["Origin Type"] = it.Origin[0].Type if it.Origin else "" 55 | idd["Origin Source"] = it.Origin[0].Source if it.Origin and it.Origin[0].Source else "" 56 | idd["Pages"] = it.Origin[0].DocumentRef[0].PDFPageRef[0].PageRefs \ 57 | if it.Origin and it.Origin[0].DocumentRef and it.Origin[0].DocumentRef[0].PDFPageRef else "" 58 | idd["Predecessor"] = it.Origin[0].Description.TranslatedText[0]._content \ 59 | if it.Origin and it.Origin[0].Type == "Predecessor" else "" 60 | idd["Comment"] = it.CommentOID if it.CommentOID else "" 61 | return idd 62 | -------------------------------------------------------------------------------- /define2-1-to-xlsx/where_clauses.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class WhereClauses: 6 | HEADERS = ["OID", "Dataset", "Variable", "Comparator", "Value", "Comment"] 7 | 8 | def __init__(self, odmlib_mdv, data_path): 9 | self.mdv = odmlib_mdv 10 | self.path = data_path 11 | self.file_name = os.path.join(self.path, "whereclauses.csv") 12 | 13 | def extract(self): 14 | with open(self.file_name, 'w', newline='') as f: 15 | writer = csv.writer(f, dialect="excel") 16 | writer.writerow(self.HEADERS) 17 | wc_oid = "" 18 | for wc in self.mdv.WhereClauseDef: 19 | comment_oid = "" 20 | if wc.CommentOID: 21 | comment_oid = wc.CommentOID 22 | for rc in wc.RangeCheck: 23 | dataset = self._get_dataset_name(rc.ItemOID) 24 | variable_name = self._get_variable_name(rc.ItemOID) 25 | value = self._load_check_values(rc) 26 | # TODO fix the multiple level RC with join 27 | writer.writerow([wc.OID, dataset, variable_name, rc.Comparator, value, comment_oid]) 28 | 29 | def _get_dataset_name(self, item_oid): 30 | for igd in self.mdv.ItemGroupDef: 31 | ir = igd.find("ItemRef", "ItemOID", item_oid) 32 | if ir: 33 | return igd.Name 34 | raise ValueError(f"Dataset for ItemRef {item_oid} not found in the Define-XML file") 35 | 36 | def _get_variable_name(self, item_oid): 37 | item = self.mdv.find("ItemDef", "OID", item_oid) 38 | if item: 39 | return item.Name 40 | else: 41 | raise ValueError(f"ItemDef for ItemRef {item_oid} not found in the Define-XML file") 42 | 43 | def _load_check_values(self, rc): 44 | check_values = [] 45 | for cv in rc.CheckValue: 46 | if cv._content: 47 | check_values.append(cv._content) 48 | else: 49 | check_values.append("") 50 | return ",".join(check_values) 51 | -------------------------------------------------------------------------------- /define2xls/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /../../../../../../../:\Users\shume\Dropbox\odmlib_examples\define2xls\.idea/dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /define2xls/.idea/define2xls.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /define2xls/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /define2xls/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /define2xls/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /define2xls/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /define2xls/README.md: -------------------------------------------------------------------------------- 1 | # define2xls 2 | 3 | ## Introduction 4 | Use the define2-1-to-xlsx example instead of this one. The Define-XML v2.1 examples are getting more use and testing 5 | creating study Define-XML files, so they're getting updated more frequently. 6 | 7 | The define2xls program is an odmlib example application that generates an Excel spreadsheet that contains the content 8 | of a Define-XML v2.0 file. The Exel spreadsheet version of the makes it easier for many to edit or create new content 9 | to include in a Define-XML v2.0 file. The companion xls2define program takes the updated spreadsheet and generates a 10 | Define-XML file. This example demonstrates some basic odmlib 11 | features. 12 | 13 | ## Getting Started 14 | To run define2xls.py from the command-line: 15 | 16 | `python define2xls.py -d ./data/sdtm-xls-define.xml -p ./data/` 17 | 18 | The odmlib package must be installed to run define2xls. See the [odmlib repository](https://github.com/swhume/odmlib) 19 | to get the odmlib package. Eventually, it may make its way into PyPi, but for now you'll need to install from the 20 | source. The odmlib README provides instructions for getting started. 21 | 22 | ## Limitations 23 | The odmlib examples are basic programs intended to demonstrate some of the basic capabilities of odmlib. 24 | The examples are not complete, production ready applications. 25 | 26 | The odmlib package is still in development. Although odmlib supports all of ODM more work remains 27 | to complete all features for processing ClinicalData. The initial focus has been on getting 28 | the metadata sections complete. -------------------------------------------------------------------------------- /define2xls/codelists.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class CodeLists: 6 | HEADERS = ["OID", "Name", "NCI Codelist Code", "Data Type", "Order", "Term", "NCI Term Code", "Decoded Value"] 7 | 8 | def __init__(self, odmlib_mdv, data_path): 9 | self.mdv = odmlib_mdv 10 | self.path = data_path 11 | self.file_name = os.path.join(self.path, "codelists.csv") 12 | 13 | def extract(self): 14 | with open(self.file_name, 'w', newline='') as f: 15 | writer = csv.writer(f, dialect="excel") 16 | writer.writerow(self.HEADERS) 17 | for cl in self.mdv.CodeList: 18 | if cl.EnumeratedItem: 19 | self._write_enumerated_item_row(cl, writer) 20 | elif cl.CodeListItem: 21 | self._write_code_list_item_row(cl, writer) 22 | 23 | def _write_enumerated_item_row(self, cl, writer): 24 | cl_c_code = "" 25 | if cl.Alias: 26 | cl_c_code = cl.Alias[0].Name 27 | for ei in cl.EnumeratedItem: 28 | order_number = "" 29 | if ei.OrderNumber: 30 | order_number = ei.OrderNumber 31 | ei_c_code = "" 32 | if ei.Alias: 33 | ei_c_code = ei.Alias[0].Name 34 | writer.writerow([cl.OID, cl.Name, cl_c_code, cl.DataType, order_number, ei.CodedValue, ei_c_code, ""]) 35 | 36 | def _write_code_list_item_row(self, cl, writer): 37 | cl_c_code = "" 38 | if cl.Alias: 39 | cl_c_code = cl.Alias[0].Name 40 | for cli in cl.CodeListItem: 41 | order_number = "" 42 | if cli.OrderNumber: 43 | order_number = cli.OrderNumber 44 | cli_c_code = "" 45 | if cli.Alias: 46 | cli_c_code = cli.Alias[0].Name 47 | decode = cli.Decode.TranslatedText[0]._content 48 | writer.writerow([cl.OID, cl.Name, cl_c_code, cl.DataType, order_number, cli.CodedValue, cli_c_code, decode]) 49 | -------------------------------------------------------------------------------- /define2xls/comments.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class Comments: 6 | HEADERS = ["OID", "Description", "Document", "Pages"] 7 | 8 | def __init__(self, odmlib_mdv, data_path): 9 | self.mdv = odmlib_mdv 10 | self.path = data_path 11 | self.file_name = os.path.join(self.path, "comments.csv") 12 | 13 | def extract(self): 14 | with open(self.file_name, 'w', newline='') as f: 15 | writer = csv.writer(f, dialect="excel") 16 | writer.writerow(self.HEADERS) 17 | for com in self.mdv.CommentDef: 18 | leaf_id = "" 19 | page_refs = "" 20 | if com.DocumentRef: 21 | leaf_id = com.DocumentRef[0].leafID 22 | if com.DocumentRef[0].PDFPageRef: 23 | page_refs = com.DocumentRef[0].PDFPageRef[0].PageRefs 24 | comment = " ".join(com.Description.TranslatedText[0]._content.split()) 25 | writer.writerow([com.OID, comment, leaf_id, page_refs]) 26 | -------------------------------------------------------------------------------- /define2xls/data/comments.csv: -------------------------------------------------------------------------------- 1 | OID,Description,Document,Pages 2 | COM.AGEU,Defaulted to YEARS,, 3 | COM.ARM,Assigned from TA.ARM based on ARMCD.,, 4 | COM.ARMCD,Assigned based on Randomization Number. See Note 2.1,LF.ReviewersGuide, 5 | COM.CMCLAS,Coded to ATC level 3 Term based on CMINDC,, 6 | COM.CMCLASCD,Coded to ATC level 3 Code based on CMINDC,, 7 | COM.CMROUTE,Free text from CRF mapped to CDISC CT,, 8 | COM.DOMAIN.DM,"See Reviewer's Guide, Section 2.1 Demographics",LF.ReviewersGuide,section2.1 9 | COM.DOMAIN.QS,"QS is submitted as a split dataset. The split was done based on QSCAT as QSCG (CLINICAL GLOBAL IMPRESSIONS), QSCS (CORNELL SCALE FOR DEPRESSION INDEMENTIA) and QSMM (MINI MENTAL STATE EXAMINATION). See additional documentation in the Reviewer's Guide, Split Datasets Section.",LF.ReviewersGuide, 10 | COM.DSDECOD,CRF controlled terminology was mapped to match CDISC controlled terminology.,, 11 | COM.EG.VISITNUM,Assigned from the TV domain based on the VISIT,, 12 | COM.EGEVAL,Equal to INVESTIGATOR for CRF data,, 13 | COM.EGPOS,Equal to SUPINE,, 14 | COM.EGSPID,ECG parameter ordering variable,, 15 | COM.IDVAR,Name of the variables for the related records.,, 16 | COM.IDVARVAL,Value of identifying variable described in IDVAR.,, 17 | COM.IE.VISITNUM,Assigned from the TV domain based on the VISIT,, 18 | COM.LBREFID,Accession number,, 19 | COM.MHBODSYS,Assigned for Medical History but not Psychiatric History,, 20 | COM.MHENRF,CRF controlled terminology was mapped to match CDISC controlled terminology.,, 21 | COM.PE.VISITNUM,Assigned from the TV domain based on the VISIT,, 22 | COM.QS.VISITNUM,Assigned from the TV domain based on the VISIT,, 23 | COM.RELTYPE,All values are null since this is used only when identifying a dataset-level relationship.,, 24 | COM.STUDY.DATA,The data submitted only includes subjects in the USA since other sites did not enroll any subjects.,, 25 | COM.SUBJECTDATA-JOIN-DM,"Join any Subject Level dataset with the Demographics dataset based on [IG.datasetname]IT.USUBJID = [IG.DM]IT.USUBJID, assuming 'IG.datasetname' is the OID of the ItemGroupDef that defines the subject-level dataset to be joined with the Demographics dataset.",, 26 | COM.SUPPQS.QVAL.RTRINIT,QSMM-CRF Page 13; QSCS-CRF Pages 14; QSCG-CRF Page 17,, 27 | COM.VS.VISITNUM,Assigned from the TV domain based on the VISIT,, 28 | COM.VSSTRESU,Standard units consistent with CDISC controlled terminology,, 29 | -------------------------------------------------------------------------------- /define2xls/data/datasets.csv: -------------------------------------------------------------------------------- 1 | Dataset,Description,Class,Structure,Purpose,Repeating,Reference Data,Comment 2 | AE,Adverse Events,EVENTS,One record per adverse event per subject,Tabulation,Yes,No, 3 | CM,Concomitant Medications,INTERVENTIONS,One record per recorded medication occurrence or constant-dosing interval per subject,Tabulation,Yes,No, 4 | DA,Drug Accountability,FINDINGS,One record per drug accountability finding per subject,Tabulation,Yes,No, 5 | DM,Demographics,SPECIAL PURPOSE,One record per subject,Tabulation,No,No,COM.DOMAIN.DM 6 | DS,Disposition,EVENTS,One record per disposition status or protocol milestone per subject,Tabulation,Yes,No, 7 | EG,ECG Test Results,FINDINGS,One record per ECG observation per visit per subject,Tabulation,Yes,No, 8 | EX,Exposure,INTERVENTIONS,One record per constant dosing interval per subject,Tabulation,Yes,No, 9 | IE,Inclusion/Exclusion Criteria Not Met,FINDINGS,One record per inclusion/exclusion criterion not met per subject,Tabulation,Yes,No, 10 | LB,Laboratory Tests Results,FINDINGS,One record per analyte per visit per subject,Tabulation,Yes,No, 11 | MH,Medical History,EVENTS,One record per medical history event per subject,Tabulation,Yes,No, 12 | PE,Physical Examination,FINDINGS,One record per body system or abnormality per visit per subject,Tabulation,Yes,No, 13 | QSCG,Questionnaire-QSCG,FINDINGS,One record per questionnaire per question per visit per subject,Tabulation,Yes,No,COM.DOMAIN.QS 14 | QSCS,Questionnaire-QSCS,FINDINGS,One record per questionnaire per question per visit per subject,Tabulation,Yes,No,COM.DOMAIN.QS 15 | QSMM,Questionnaire-QSMM,FINDINGS,One record per questionnaire per question per visit per subject,Tabulation,Yes,No,COM.DOMAIN.QS 16 | RELREC,Related Records,RELATIONSHIP,"One record per related record, group of records or dataset",Tabulation,Yes,No, 17 | SC,Subject Characteristics,FINDINGS,One record per characteristic per subject,Tabulation,No,No, 18 | SE,Subject Elements,SPECIAL PURPOSE,One record per actual Element per subject,Tabulation,Yes,No, 19 | SUPPAE,Supplemental Qualifiers for AE,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No, 20 | SUPPCM,Supplemental Qualifiers for CM,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No, 21 | SUPPDM,Supplemental Qualifiers for DM,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No, 22 | SUPPEG,Supplemental Qualifiers for EG,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No, 23 | SUPPEX,Supplemental Qualifiers for EX,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No, 24 | SUPPLB,Supplemental Qualifiers for LB,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No, 25 | SUPPQSCG,Supplemental Qualifiers for QSCG,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No, 26 | SUPPQSCS,Supplemental Qualifiers for QSCS,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No, 27 | SUPPQSMM,Supplemental Qualifiers for QSMM,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No, 28 | SUPPVS,Supplemental Qualifiers for VS,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No, 29 | SV,Subject Visits,SPECIAL PURPOSE,One record per actual visit per subject,Tabulation,Yes,No, 30 | TA,Trial Arms,TRIAL DESIGN,One record per planned Element per Arm,Tabulation,No,Yes, 31 | TE,Trial Elements,TRIAL DESIGN,One record per planned Element,Tabulation,No,Yes, 32 | TI,Trial Inclusion/Exclusion Criteria,TRIAL DESIGN,One record per I/E criterion,Tabulation,No,Yes, 33 | TS,Trial Summary,TRIAL DESIGN,One record per trial summary parameter value,Tabulation,No,Yes, 34 | TV,Trial Visits,TRIAL DESIGN,One record per planned Visit per Arm,Tabulation,No,Yes, 35 | VS,Vital Signs,FINDINGS,One record per vital sign measurement per visit per subject,Tabulation,Yes,No, 36 | -------------------------------------------------------------------------------- /define2xls/data/dictionaries.csv: -------------------------------------------------------------------------------- 1 | OID,Name,Data Type,Dictionary,Version 2 | CL.AEDICT_F,Adverse Event Dictionary,text,MEDDRA,8.0 3 | CL.DRUGDICT_F,Drug Dictionary,text,WHODRUG,200204 4 | CL.ISO3166,ISO3166,text,ISO3166, 5 | -------------------------------------------------------------------------------- /define2xls/data/documents.csv: -------------------------------------------------------------------------------- 1 | ID,Title,Href 2 | LF.ReviewersGuide,Reviewers Guide,reviewersguide.pdf 3 | LF.ComplexAlgorithms,Complex Algorithms,complexalgorithms.pdf 4 | LF.blankcrf,Annotated Case Report Form,blankcrf.pdf 5 | -------------------------------------------------------------------------------- /define2xls/data/odmlib-define-metadata-clean.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/define2xls/data/odmlib-define-metadata-clean.xlsx -------------------------------------------------------------------------------- /define2xls/data/odmlib-define-metadata-save.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/define2xls/data/odmlib-define-metadata-save.xlsx -------------------------------------------------------------------------------- /define2xls/data/odmlib-define-metadata-temp.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/define2xls/data/odmlib-define-metadata-temp.xlsx -------------------------------------------------------------------------------- /define2xls/data/odmlib-define-metadata.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/define2xls/data/odmlib-define-metadata.xlsx -------------------------------------------------------------------------------- /define2xls/data/study.csv: -------------------------------------------------------------------------------- 1 | Attribute,Value 2 | StudyName,CDISC01 3 | StudyDescription,CDISC Test Study 4 | ProtocolName,CDISC01 5 | StandardName,SDTM-IG 6 | StandardVersion,3.1.2 7 | Language,en 8 | -------------------------------------------------------------------------------- /define2xls/datasets.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class Datasets: 6 | HEADERS = ["Dataset", "Description", "Class", "Structure", "Purpose", "Repeating", "Reference Data", "Comment"] 7 | 8 | def __init__(self, odmlib_mdv, data_path): 9 | self.mdv = odmlib_mdv 10 | self.path = data_path 11 | self.file_name = os.path.join(self.path, "datasets.csv") 12 | 13 | def extract(self): 14 | with open(self.file_name, 'w', newline='') as f: 15 | writer = csv.writer(f, dialect="excel") 16 | writer.writerow(self.HEADERS) 17 | for ig in self.mdv.ItemGroupDef: 18 | writer.writerow([ig.Name, ig.Description.TranslatedText[0]._content, ig.Class, ig.Structure, ig.Purpose, 19 | ig.Repeating, ig.IsReferenceData, ig.CommentOID]) 20 | -------------------------------------------------------------------------------- /define2xls/define2xls.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import odmlib.define_loader as OL 3 | import odmlib.loader as LD 4 | import excel_define_file as EX 5 | import odmlib.odm_parser as P 6 | import xmlschema as XSD 7 | import os 8 | import study, datasets, variables, value_level as valuelevel, where_clauses as whereclauses, codelists, dictionaries 9 | import methods, comments, documents 10 | 11 | WORKSHEETS = ["Study", "Datasets", "Variables", "ValueLevel", "WhereClauses", "CodeLists", "Dictionaries", "Methods", 12 | "Comments", "Documents"] 13 | EXCEL_NAME = "odmlib-define-metadata.xlsx" 14 | 15 | """ 16 | define2xls.py - an example program using odmlib to convert a Define-XML file into a metadata spreadsheet 17 | ex. cmd-line args: -d ./data/sdtm-xls-define.xml -p ./data/ 18 | ex. cmd-line args: -d C:\\Users\\shume\\Dropbox\\odm_api\\odm_360\\xls2define\\data\\sdtm-xls-define.xml -p ./data/ -v 19 | -s "C:\\Users\\shume\\Dropbox\\04. XML Tech\\Define-XML\\define_xml_2_0\\define_xml_2_0_releasepackage20140424\\schema\\cdisc-define-2.0\\define2-0-0.xsd" 20 | """ 21 | 22 | class Define2Xls: 23 | """ generate a metadata spreadsheet from a Define-XML v2.0 file """ 24 | def __init__(self, define_file, excel_path, excel_filename=EXCEL_NAME, language="en"): 25 | self.define_file = define_file 26 | self.data_path = excel_path 27 | self.excel_filename = excel_filename 28 | self.lang = language 29 | 30 | def create(self): 31 | loader = LD.ODMLoader(OL.XMLDefineLoader()) 32 | loader.open_odm_document(self.define_file) 33 | mdv_odmlib = loader.MetaDataVersion() 34 | study_odmlib = loader.Study() 35 | ws_files = [] 36 | for worksheet in WORKSHEETS: 37 | if worksheet == "Study": 38 | ws = eval(worksheet.lower() + "." + worksheet + "(study_odmlib, mdv_odmlib, self.data_path, self.lang)") 39 | else: 40 | ws = eval(worksheet.lower() + "." + worksheet + "(mdv_odmlib, self.data_path)") 41 | ws.extract() 42 | ws_files.append(ws.file_name) 43 | self._write_excel(ws_files) 44 | 45 | def _write_excel(self, ws_files): 46 | excel = EX.ExcelDefineFile(ws_files, WORKSHEETS, self.data_path, self.excel_filename) 47 | excel.create_excel() 48 | 49 | 50 | class DefineValidator: 51 | """ Define-XML schema validation """ 52 | def __init__(self, schema, define_file): 53 | """ 54 | :param schema: str - the path and filename for the Define-XML schema 55 | :param define_file: str - the path and filename for the Define-XML to validate 56 | """ 57 | self.schema_file = schema 58 | self.define_file = define_file 59 | 60 | def validate(self): 61 | """" execute the schema validation and report the results """ 62 | validator = P.ODMSchemaValidator(self.schema_file) 63 | try: 64 | validator.validate_file(self.define_file) 65 | print("define-XML schema validation completed successfully...") 66 | except XSD.validators.exceptions.XMLSchemaChildrenValidationError as ve: 67 | print(f"schema validation errors: {ve}") 68 | 69 | def _check_file_existence(self): 70 | """ throw an error if the schema of Define-XML file cannot be found """ 71 | if not os.path.isfile(self.schema_file): 72 | raise ValueError("The schema validate flag is set, but the schema file cannot be found.") 73 | if not os.path.isfile(self.define_file): 74 | raise ValueError("The define-xml file cannot be found.") 75 | 76 | 77 | def set_cmd_line_args(): 78 | """ 79 | get the command-line arguments needed to convert the Define-XML input file into Excel 80 | :return: return the argparse object with the command-line parameters 81 | """ 82 | parser = argparse.ArgumentParser() 83 | parser.add_argument("-d", "--define", help="path and file name of Define-XML v2 input file", required=True, 84 | dest="define_file") 85 | parser.add_argument("-p", "--path", help="path to write the generated Excel file to", required=False, 86 | dest="excel_path", default="./") 87 | parser.add_argument("-e", "--excel", help="Name of Excel file without path", required=False, 88 | dest="excel_filename", default=EXCEL_NAME) 89 | parser.add_argument("-s", "--schema", help="path and file name of Define-XML schema", dest="schema_file") 90 | parser.add_argument("-v", "--validate", help="schema validate the Define-XML file", default=False, const=True, 91 | nargs='?', dest="is_validate") 92 | parser.add_argument("-l", "--lang", help="language code", default="en", dest="language", required=False) 93 | args = parser.parse_args() 94 | return args 95 | 96 | 97 | def main(): 98 | """ main driver method that generates an Excel file using tje Define-XML v2.0 metadata """ 99 | args = set_cmd_line_args() 100 | if args.is_validate: 101 | validator = DefineValidator(args.schema_file, args.define_file) 102 | validator.validate() 103 | d2x = Define2Xls(args.define_file, args.excel_path, args.excel_filename, args.language) 104 | d2x.create() 105 | 106 | 107 | if __name__ == "__main__": 108 | main() 109 | -------------------------------------------------------------------------------- /define2xls/dictionaries.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class Dictionaries: 6 | HEADERS = ["OID", "Name", "Data Type", "Dictionary", "Version"] 7 | 8 | def __init__(self, odmlib_mdv, data_path): 9 | self.mdv = odmlib_mdv 10 | self.path = data_path 11 | self.file_name = os.path.join(self.path, "dictionaries.csv") 12 | 13 | def extract(self): 14 | with open(self.file_name, 'w', newline='') as f: 15 | writer = csv.writer(f, dialect="excel") 16 | writer.writerow(self.HEADERS) 17 | for cl in self.mdv.CodeList: 18 | if cl.ExternalCodeList.Dictionary: 19 | self._write_external_code_list_row(cl, writer) 20 | 21 | def _write_external_code_list_row(self, cl, writer): 22 | ext_cl = cl.ExternalCodeList 23 | version = "" 24 | if ext_cl.Version: 25 | version = ext_cl.Version 26 | writer.writerow([cl.OID, cl.Name, cl.DataType, ext_cl.Dictionary, version]) 27 | -------------------------------------------------------------------------------- /define2xls/documents.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class Documents: 6 | HEADERS = ["ID", "Title", "Href"] 7 | 8 | def __init__(self, odmlib_mdv, data_path): 9 | self.mdv = odmlib_mdv 10 | self.path = data_path 11 | self.file_name = os.path.join(self.path, "documents.csv") 12 | 13 | def extract(self): 14 | with open(self.file_name, 'w', newline='') as f: 15 | writer = csv.writer(f, dialect="excel") 16 | writer.writerow(self.HEADERS) 17 | for lf in self.mdv.leaf: 18 | writer.writerow([lf.ID, lf.title._content, lf.href]) 19 | -------------------------------------------------------------------------------- /define2xls/excel_define_file.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import xlsxwriter as XLS 3 | import os 4 | 5 | 6 | class ExcelDefineFile: 7 | def __init__(self, files, tabs, data_path, excel_filename): 8 | self.xlsx_file = os.path.join(data_path, excel_filename) 9 | self.files = files 10 | self.tabs = tabs 11 | 12 | def create_excel(self): 13 | workbook = XLS.Workbook(self.xlsx_file, {"strings_to_numbers": False}) 14 | header_format = workbook.add_format({"bold": True, "bg_color": "#CCFFFF", "border": True, "border_color": "black"}) 15 | for index, csv_file in enumerate(self.files): 16 | worksheet = workbook.add_worksheet(self.tabs[index]) 17 | is_header_row = True 18 | try: 19 | with open(csv_file, 'rt', encoding='utf8') as f: 20 | reader = csv.reader(f) 21 | for r, row in enumerate(reader): 22 | for c, col in enumerate(row): 23 | if is_header_row: 24 | worksheet.write(r, c, col, header_format) 25 | worksheet.set_column(r, c, 30) 26 | else: 27 | worksheet.write(r, c, col) 28 | is_header_row = False 29 | except UnicodeDecodeError as ue: 30 | print(f"Encoding error writing load file for row {row} and col {col}: {ue}") 31 | workbook.close() 32 | return len(workbook.sheetnames) 33 | -------------------------------------------------------------------------------- /define2xls/methods.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class Methods: 6 | HEADERS = ["OID", "Name", "Type", "Description", "Expression Context", "Expression Code", "Document", "Pages"] 7 | 8 | def __init__(self, odmlib_mdv, data_path): 9 | self.mdv = odmlib_mdv 10 | self.path = data_path 11 | self.file_name = os.path.join(self.path, "methods.csv") 12 | 13 | def extract(self): 14 | with open(self.file_name, 'w', newline='') as f: 15 | writer = csv.writer(f, dialect="excel") 16 | writer.writerow(self.HEADERS) 17 | for md in self.mdv.MethodDef: 18 | context = "" 19 | code = "" 20 | if md.FormalExpression: 21 | context = md.FormalExpression[0].Context 22 | code = md.FormalExpression[0]._content 23 | leaf_id = "" 24 | page_refs = "" 25 | if md.DocumentRef: 26 | leaf_id = md.DocumentRef[0].leafID 27 | page_refs = md.DocumentRef[0].PDFPageRef[0].PageRefs 28 | description = " ".join(md.Description.TranslatedText[0]._content.split()) 29 | writer.writerow([md.OID, md.Name, md.Type, description, context, code, 30 | leaf_id, page_refs]) 31 | -------------------------------------------------------------------------------- /define2xls/requirements.txt: -------------------------------------------------------------------------------- 1 | odmlib>=0.1.4 2 | xmlschema>=1.4.1 3 | XlsxWriter>=1.3.7 -------------------------------------------------------------------------------- /define2xls/study.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class Study: 6 | HEADERS = ["Attribute", "Value"] 7 | 8 | def __init__(self, odmlib_study, odmlib_mdv, data_path, language="en"): 9 | self.study = odmlib_study 10 | self.mdv = odmlib_mdv 11 | self.path = data_path 12 | self.language = language 13 | self.file_name = os.path.join(self.path, "study.csv") 14 | 15 | def extract(self): 16 | print(f"Study OID: {self.study.GlobalVariables.StudyName}") 17 | with open(self.file_name, 'w', newline='') as f: 18 | writer = csv.writer(f, dialect="excel") 19 | writer.writerow(self.HEADERS) 20 | writer.writerow(["StudyName", self.study.GlobalVariables.StudyName]) 21 | writer.writerow(["StudyDescription", self.study.GlobalVariables.StudyDescription]) 22 | writer.writerow(["ProtocolName", self.study.GlobalVariables.ProtocolName]) 23 | writer.writerow(["StandardName", self.mdv.StandardName]) 24 | writer.writerow(["StandardVersion", self.mdv.StandardVersion]) 25 | writer.writerow(["Language", self.language]) 26 | -------------------------------------------------------------------------------- /define2xls/value_level.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class ValueLevel: 6 | HEADERS = ["Order", "Dataset", "Variable", "Where Clause", "Data Type", "Length", "Significant Digits", "Format", 7 | "Mandatory", "Codelist", "Origin", "Pages", "Method", "Predecessor", "Comment"] 8 | 9 | def __init__(self, odmlib_mdv, data_path): 10 | self.mdv = odmlib_mdv 11 | self.path = data_path 12 | self.file_name = os.path.join(self.path, "valuelevel.csv") 13 | 14 | def extract(self): 15 | with open(self.file_name, 'w', newline='') as f: 16 | writer = csv.writer(f, dialect="excel") 17 | writer.writerow(self.HEADERS) 18 | for vl in self.mdv.ValueListDef: 19 | for ir in vl.ItemRef: 20 | # assumes all ItemDefs are referenced by an ItemRef 21 | ird = self._load_item_ref(ir) 22 | idd = self._load_item_def(ir.ItemOID) 23 | # using OID to get dataset is a hack, but dataset column only used to create the OID for VLD 24 | dataset = vl.OID.split(".")[1] 25 | writer.writerow([ird["Order"], dataset, idd["Variable"], ird["Where Clause"], idd["Data Type"], 26 | idd["Length"], idd["Significant Digits"], idd["Format"], ird["Mandatory"], 27 | idd["Codelist"], idd["Origin"], idd["Pages"], ird["Method"], 28 | idd["Predecessor"], idd["Comment"]]) 29 | 30 | def _load_item_ref(self, ir): 31 | ird = {} 32 | ird["Order"] = ir.OrderNumber 33 | ird["Mandatory"] = ir.Mandatory 34 | ird["Method"] = ir.MethodOID 35 | ird["Where Clause"] = self._get_where_clause_oid(ir) 36 | return ird 37 | 38 | def _load_item_def(self, item_oid): 39 | idd = {} 40 | it = self.mdv.find("ItemDef", "OID", item_oid) 41 | idd["Variable"] = it.Name 42 | idd["Data Type"] = it.DataType 43 | idd["Length"] = it.Length 44 | idd["Significant Digits"] = it.SignificantDigits 45 | idd["Format"] = it.DisplayFormat 46 | idd["Codelist"] = it.CodeListRef.CodeListOID if it.CodeListRef is not None else "" 47 | idd["Origin"] = it.Origin.Type if it.Origin else "" 48 | idd["Pages"] = it.Origin.DocumentRef[0].PDFPageRef[0].PageRefs \ 49 | if it.Origin.DocumentRef and it.Origin.DocumentRef[0].PDFPageRef else "" 50 | idd["Predecessor"] = it.Origin.Description.TranslatedText[0]._content if it.Origin.Type == "Predecessor" else "" 51 | idd["Comment"] = it.CommentOID if it.CommentOID else "" 52 | return idd 53 | 54 | def _get_where_clause_oid(self, item): 55 | wc_oids = [] 56 | for wc in item.WhereClauseRef: 57 | wc_oids.append(wc.WhereClauseOID) 58 | return "'".join(wc_oids) -------------------------------------------------------------------------------- /define2xls/variables.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class Variables: 6 | HEADERS = ["Order", "Dataset", "Variable", "Label", "Data Type", "Length", "Significant Digits", "Format", 7 | "KeySequence", "Mandatory", "CodeList", "Valuelist", "Origin", "Pages", "Method", "Predecessor", 8 | "Role", "Comment"] 9 | 10 | def __init__(self, odmlib_mdv, data_path): 11 | self.mdv = odmlib_mdv 12 | self.path = data_path 13 | self.file_name = os.path.join(self.path, "variables.csv") 14 | 15 | def extract(self): 16 | with open(self.file_name, 'w', newline='') as f: 17 | writer = csv.writer(f, dialect="excel") 18 | writer.writerow(self.HEADERS) 19 | for ig in self.mdv.ItemGroupDef: 20 | for ir in ig.ItemRef: 21 | # assumes all ItemDefs are referenced by an ItemRef 22 | ird = self._load_item_ref(ir) 23 | idd = self._load_item_def(ir.ItemOID) 24 | writer.writerow([ird["Order"], ig.Name, idd["Variable"], idd["Label"], idd["Data Type"], idd["Length"], 25 | idd["Significant Digits"], idd["Format"], ird["KeySequence"], ird["Mandatory"], 26 | idd["Codelist"], idd["Valuelist"], idd["Origin"], idd["Pages"], ird["Method"], 27 | idd["Predecessor"], ird["Role"], idd["Comment"]]) 28 | 29 | def _load_item_ref(self, ir): 30 | ird = {} 31 | ird["Order"] = ir.OrderNumber 32 | ird["Mandatory"] = ir.Mandatory 33 | ird["KeySequence"] = ir.KeySequence 34 | ird["Method"] = ir.MethodOID 35 | ird["Role"] = ir.Role 36 | return ird 37 | 38 | def _load_item_def(self, item_oid): 39 | idd = {} 40 | it = self.mdv.find("ItemDef", "OID", item_oid) 41 | idd["Variable"] = it.Name 42 | idd["Data Type"] = it.DataType 43 | idd["Length"] = it.Length 44 | idd["Significant Digits"] = it.SignificantDigits 45 | idd["Format"] = it.DisplayFormat 46 | idd["Label"] = " ".join(it.Description.TranslatedText[0]._content.split()) 47 | idd["Codelist"] = it.CodeListRef.CodeListOID if it.CodeListRef is not None else "" 48 | idd["Valuelist"] = it.ValueListRef.ValueListOID if it.ValueListRef else "" 49 | idd["Origin"] = it.Origin.Type if it.Origin else "" 50 | idd["Pages"] = it.Origin.DocumentRef[0].PDFPageRef[0].PageRefs \ 51 | if it.Origin.DocumentRef and it.Origin.DocumentRef[0].PDFPageRef else "" 52 | idd["Predecessor"] = it.Origin.Description.TranslatedText[0]._content if it.Origin.Type == "Predecessor" else "" 53 | idd["Comment"] = it.CommentOID if it.CommentOID else "" 54 | return idd 55 | -------------------------------------------------------------------------------- /define2xls/where_clauses.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | 5 | class WhereClauses: 6 | HEADERS = ["OID", "Dataset", "Variable", "Comparator", "Value", "Comment"] 7 | 8 | def __init__(self, odmlib_mdv, data_path): 9 | self.mdv = odmlib_mdv 10 | self.path = data_path 11 | self.file_name = os.path.join(self.path, "whereclauses.csv") 12 | 13 | def extract(self): 14 | with open(self.file_name, 'w', newline='') as f: 15 | writer = csv.writer(f, dialect="excel") 16 | writer.writerow(self.HEADERS) 17 | wc_oid = "" 18 | for wc in self.mdv.WhereClauseDef: 19 | # using OID to get dataset is a hack, but dataset column only used to create the OID for def:ItemOID 20 | dataset = wc.OID.split(".")[1] 21 | comment_oid = "" 22 | if wc.CommentOID: 23 | comment_oid = wc.CommentOID 24 | for rc in wc.RangeCheck: 25 | value = self._load_check_values(rc) 26 | variable_name = self._extract_variable_name(rc.ItemOID) 27 | writer.writerow([wc.OID, dataset, variable_name, rc.Comparator, value, comment_oid]) 28 | 29 | def _load_check_values(self, rc): 30 | check_values = [] 31 | for cv in rc.CheckValue: 32 | check_values.append(cv._content) 33 | return ",".join(check_values) 34 | 35 | def _extract_variable_name(self, item_oid): 36 | return item_oid.split(".")[-1] -------------------------------------------------------------------------------- /get_started/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /get_started/.idea/get_started.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 13 | -------------------------------------------------------------------------------- /get_started/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /get_started/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /get_started/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /get_started/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /get_started/README.md: -------------------------------------------------------------------------------- 1 | # odmlib examples 2 | 3 | ## Introduction 4 | The odmlib examples are small applications that demonstrate the use of the odmlib Python package for creating 5 | and process ODM files, including extensions like Define-XML. The examples are intended to make it easier 6 | to get started using the odmlib package. 7 | 8 | The odmlib package simplifies working with the CDISC ODM data exchange standard and its extensions, such as 9 | Define-XML, in Python. The odmlib package provides an object-oriented interface to working with ODM documents 10 | that simplifies creating and processing them. 11 | 12 | ## Why odmlib? 13 | The odmlib package satisfies my personal interest in working with ODM using an object-oriented 14 | interface in Python. 15 | 16 | ## Getting Started 17 | See the [odmlib repository](https://github.com/swhume/odmlib) to get the odmlib pacakge. Eventually, it may 18 | make its way into PyPi, but for now you'll need to install from the source. The odmlib README provides 19 | instructions for getting started. 20 | 21 | ## Limitations 22 | The odmlib examples are simple programs intended to demonstrate some of the basic capabilities of odmlib. 23 | The examples are not complete, production ready applications. 24 | 25 | The odmlib package is still in development. Although odmlib supports all of ODM more work remains 26 | to complete all features for processing ClinicalData. The initial focus has been on getting 27 | the metadata sections complete. -------------------------------------------------------------------------------- /get_started/data/odm_demo.xml: -------------------------------------------------------------------------------- 1 | 2 | Get Started with ODM XMLDemo to get started with odmlibODM XML Get StartedGet Started ProtocolDate of measurementsDateResult of the vital signs measurement as originally received or collected.DiastolicNoYesConcatenation of BRTHYR, BRTHMO, and BRTHDY in ISO 8601 format -------------------------------------------------------------------------------- /get_started/requirements.txt: -------------------------------------------------------------------------------- 1 | pip>=20.3.3 2 | xmlschema>=1.10.0 3 | validators>=0.18.2 4 | elementpath>=2.5.0 5 | setuptools>=51.1.2 6 | six>=1.15.0 7 | decorator>=4.4.2 8 | odmlib>=0.1.4 -------------------------------------------------------------------------------- /library_xml/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /library_xml/.idea/.name: -------------------------------------------------------------------------------- 1 | library_1_0 -------------------------------------------------------------------------------- /library_xml/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /library_xml/.idea/library_1_0.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 13 | -------------------------------------------------------------------------------- /library_xml/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /library_xml/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /library_xml/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /library_xml/README.md: -------------------------------------------------------------------------------- 1 | # library_xml 2 | 3 | ## Introduction 4 | library_xml retrieves a standard from the CDISC Library as Library-XML (an ODM media type) and loads it into odmlib 5 | for processing. This example program includes the models for Library-XML based on ODM (e.g. CDASHIG) and Define-XML 6 | (e.g. SDTMIG). Library-XML shows how to use external odmlib models (ODM extensions) and can be used as a method for 7 | implementing other extensions. That is, this example includes the Library-XML models to be used with odmlib. 8 | Library-XML also could be enhanced to process standards retrieved from the CDISC Library using the odm+xml media type. 9 | 10 | This example application uses the CDISC Library API. In order to run the application you will need to create an 11 | account and use your own API key. 12 | 13 | ## Getting Started 14 | To run library-xml.py from the command-line to retrieve SDTMIG v3.4: 15 | 16 | `python library-xml.py -d -e "/mdr/sdtmig/3-4" -k e5a7d2b9bg1a4066ae4b25133a091574` 17 | 18 | The -d indicates that the standard retrieved uses the Define-XML model for Library-XML. You will need to replace the 19 | -k value with your own CDISC Library API Key. The endpoint to retrieve is specified in the -e value. Since no -f was 20 | provided, the default output filename used. 21 | 22 | Or, to run it to retrieve CDASHIG v2.2: 23 | 24 | `python library-xml.py -e "/mdr/cdashig/2-2" -f library-odmlib-cdashig2-2.json -k e5a7d2b9bg1a4066ae4b25133a091574` 25 | 26 | In this example, the output filename was specified in the value of -f. 27 | 28 | The odmlib package must be installed to run library-xml. See the 29 | [odmlib repository](https://github.com/swhume/odmlib) to install the odmlib source code and latest features. 30 | The odmlib package can also be installed from PyPi with the understanding that it is still in development 31 | so might not have everything available in the odmlib repository. It can be installed from PyPi using: 32 | 33 | 'pip install odmlib' 34 | 35 | The odmlib README provides instructions for getting started. 36 | 37 | ## Limitations 38 | The odmlib examples are basic programs intended to introduce programmers to the basic capabilities of odmlib. 39 | The examples are not complete, production ready applications. However, I'm happy to update these applications 40 | to accommodate new feature or bug fixes and will also review pull requests. 41 | 42 | The Library-XML program demonstrates the use of the Library-XML extension and could be enhanced to address real 43 | use cases. 44 | 45 | To use the CDISC Library API you will need to create an account and use your credentials to authenticate. 46 | 47 | The odmlib package is still in development. -------------------------------------------------------------------------------- /library_xml/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/library_xml/__init__.py -------------------------------------------------------------------------------- /library_xml/library_define_1_0/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/library_xml/library_define_1_0/__init__.py -------------------------------------------------------------------------------- /library_xml/library_odm_1_0/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/library_xml/library_odm_1_0/__init__.py -------------------------------------------------------------------------------- /library_xml/library_xml.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import os 3 | import argparse 4 | import odmlib.ns_registry as NS 5 | import odmlib.define_loader as DL 6 | import odmlib.odm_loader as OL 7 | 8 | """ 9 | Example Cmd-line Args: 10 | SDTMIG v3.4: -d -e "/mdr/sdtmig/3-4" -k e5a7d2b9bg1a4066ae4b25133a091574 11 | CDASHIG v2.2: -e "/mdr/cdashig/2-2" -f library-odmlib-cdashig2-2.json -k e5a7d2b9bg1a4066ae4b25133a091574 12 | NOTE: you will need to replace the -k arg with our own CDISC Library API key 13 | """ 14 | 15 | 16 | def write_odm_as_json(odm, filename): 17 | print(f"Saving {odm.Study[0].GlobalVariables.StudyName} in Library-XML version {odm.LibraryXMLVersion} as JSON") 18 | with open(filename, 'w') as f: 19 | f.write(odm.to_json()) 20 | 21 | 22 | def load_odmlib(endpoint, filename, model_package, ns, api_key): 23 | base_url = "https://library.cdisc.org/api" 24 | headers = {"Accept": "application/odm+xml", "User-Agent": "crawler", "api-key": api_key} 25 | r = requests.get(base_url + endpoint, headers=headers) 26 | if r.status_code == 200: 27 | if "define" in model_package: 28 | loader = DL.XMLDefineLoader(model_package=model_package, ns_uri="http://www.cdisc.org/ns/library-xml/v1.0", local_model=True) 29 | else: 30 | loader = OL.XMLODMLoader(model_package=model_package, ns_uri="http://www.cdisc.org/ns/library-xml/v1.0", local_model=True) 31 | loader.create_document_from_string(r.text, ns) 32 | odm = loader.load_odm() 33 | write_odm_as_json(odm, filename) 34 | else: 35 | if r.status_code == "406": 36 | print(f"{endpoint} is not available from CDISC Library as odm+xml") 37 | else: 38 | print(f"HTTPError {r.status_code} for url {base_url + endpoint}") 39 | 40 | 41 | def set_cmd_line_args(): 42 | parser = argparse.ArgumentParser() 43 | parser.add_argument("-f", "--file", help="json file name to write output", required=False, 44 | dest="file_out", default="library-odmlib.json") 45 | parser.add_argument("-e", "--endpoint", help="CDISC Library API endpoint to retrieve", required=True, 46 | dest="endpoint", ) 47 | parser.add_argument("-k", "--apikey", help="the CDISC Library API Key", required=True, dest="api_key") 48 | parser.add_argument("-d", "--define", help="is the Library-XML content in Define-XML?", default=False, const=True, 49 | nargs='?', dest="is_define") 50 | args = parser.parse_args() 51 | return args 52 | 53 | 54 | if __name__ == '__main__': 55 | args = set_cmd_line_args() 56 | if args.is_define: 57 | model_package = "library_define_1_0" 58 | NS.NamespaceRegistry(prefix="def", uri="http://www.cdisc.org/ns/def/v2.1") 59 | else: 60 | model_package = "library_odm_1_0" 61 | ns = NS.NamespaceRegistry(prefix="mdr", uri="http://www.cdisc.org/ns/library-xml/v1.0") 62 | 63 | print(f"Requesting {args.endpoint} from the CDISC Library...") 64 | odmlib_json_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', args.file_out) 65 | load_odmlib(args.endpoint, odmlib_json_file, model_package, ns, args.api_key) 66 | -------------------------------------------------------------------------------- /library_xml/requirements.txt: -------------------------------------------------------------------------------- 1 | requests>=2.26.0 2 | odmlib>=0.1.4 3 | -------------------------------------------------------------------------------- /library_xml/tests/test_local_library_loader.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import odmlib.ns_registry as NS 3 | import odmlib.define_loader as DL 4 | import odmlib.odm_loader as OL 5 | from odmlib.define_2_1.rules import oid_ref as OID 6 | 7 | 8 | class TestLocalLibraryLoader(unittest.TestCase): 9 | def test_odmlib_sdtmig(self): 10 | model_package = "library_define_1_0" 11 | NS.NamespaceRegistry(prefix="def", uri="http://www.cdisc.org/ns/def/v2.1") 12 | ns = NS.NamespaceRegistry(prefix="mdr", uri="http://www.cdisc.org/ns/library-xml/v1.0") 13 | with open("../data/library-sdtm-3-4.xml", "r", encoding="utf-8") as f: 14 | odm_string = f.read() 15 | loader = DL.XMLDefineLoader(model_package=model_package, ns_uri="http://www.cdisc.org/ns/library-xml/v1.0", local_model=True) 16 | loader.create_document_from_string(odm_string, ns) 17 | odm = loader.load_odm() 18 | self.assertEqual(odm.FileOID, "ODM.SDTMIGv3.4.2021-11-29") 19 | self.assertEqual(odm.LibraryXMLVersion, "1.0.0") 20 | self.assertEqual(odm.Context, "Other") 21 | self.assertEqual(odm.Study[0].MetaDataVersion.DatePublished, "2021-11-29") 22 | self.assertEqual(odm.Study[0].MetaDataVersion.Standards[0].Name, "SDTMIG v3.4") 23 | self.assertEqual(odm.Study[0].MetaDataVersion.ItemGroupDef[1].OID, "IGD.CM") 24 | self.assertEqual(odm.Study[0].MetaDataVersion.ItemGroupDef[1].Class.Name, "INTERVENTIONS") 25 | self.assertEqual(odm.Study[0].MetaDataVersion.ItemDef[0].CDISCNotes.TranslatedText[0]._content, "Unique identifier for a study.") 26 | self.assertEqual(odm.Study[0].MetaDataVersion.ItemDef[0].CDISCNotes.TranslatedText[0].lang, "en") 27 | self.assertEqual(odm.Study[0].MetaDataVersion.ItemDef[1].SubmissionDataType, "Char") 28 | mdv = odm.Study[0].MetaDataVersion 29 | it = mdv.find("ItemDef", "OID", "IT.DS.DSDECOD") 30 | self.assertEqual(it.Name, "DSDECOD") 31 | self.assertEqual(it.AltCodeListRef[0].CodeListOID, "CL.C114118") 32 | self.assertEqual(it.AltCodeListRef[1].CodeListOID, "CL.C150811") 33 | 34 | def test_odmlib_cdashig(self): 35 | model_package = "library_odm_1_0" 36 | NS.NamespaceRegistry(prefix="odm", uri="http://www.cdisc.org/ns/odm/v1.3", is_default=True) 37 | ns = NS.NamespaceRegistry(prefix="mdr", uri="http://www.cdisc.org/ns/library-xml/v1.0") 38 | with open("../data/library-cdash-2-2.xml", "r", encoding="utf-8") as f: 39 | odm_string = f.read() 40 | loader = OL.XMLODMLoader(model_package=model_package, ns_uri="http://www.cdisc.org/ns/library-xml/v1.0", local_model=True) 41 | loader.create_document_from_string(odm_string, ns) 42 | odm = loader.load_odm() 43 | self.assertEqual(odm.FileOID, "ODM.CDASHIGv2.2.2021-09-28") 44 | self.assertEqual(odm.LibraryXMLVersion, "1.0.0") 45 | mdv = odm.Study[0].MetaDataVersion 46 | self.assertEqual(mdv.DatePublished, "2021-09-28") 47 | self.assertEqual(mdv.ItemGroupDef[1].OID, "IGD.CM") 48 | self.assertEqual(odm.Study[0].MetaDataVersion.ItemGroupDef[1].Class.Name, "INTERVENTIONS") 49 | mapping_instructions = odm.Study[0].MetaDataVersion.ItemDef[0].MappingInstructions.TranslatedText[0]._content.replace("\n", "") 50 | self.assertEqual(" ".join(mapping_instructions.split()), "Maps directly to the SDTMIG variable listed in the SDTMIG Target column.") 51 | self.assertEqual(odm.Study[0].MetaDataVersion.ItemDef[0].Definition.TranslatedText[0]._content, "A unique identifier for a study.") 52 | self.assertEqual(odm.Study[0].MetaDataVersion.ItemDef[1].SubmissionDataType, "Char") 53 | it = mdv.find("ItemDef", "OID", "IT.AG.AGSCAT") 54 | self.assertEqual(it.Name, "AGSCAT") 55 | self.assertEqual(it.Core, "O") 56 | 57 | def test_oid_checks(self): 58 | model_package = "library_define_1_0" 59 | NS.NamespaceRegistry(prefix="def", uri="http://www.cdisc.org/ns/def/v2.1") 60 | ns = NS.NamespaceRegistry(prefix="mdr", uri="http://www.cdisc.org/ns/library-xml/v1.0") 61 | with open("../data/library-sdtm-3-4.xml", "r", encoding="utf-8") as f: 62 | odm_string = f.read() 63 | loader = DL.XMLDefineLoader(model_package=model_package, ns_uri="http://www.cdisc.org/ns/library-xml/v1.0", local_model=True) 64 | loader.create_document_from_string(odm_string, ns) 65 | odm = loader.load_odm() 66 | oid_checker = OID.OIDRef() 67 | odm.verify_oids(oid_checker) 68 | self.assertTrue(oid_checker.check_oid_refs()) 69 | orphans = oid_checker.check_unreferenced_oids() 70 | self.assertDictEqual(orphans, {'STD.SDTMIGv3.4': 'StandardOID'}) 71 | 72 | def test_oid_checks_skip(self): 73 | model_package = "library_define_1_0" 74 | NS.NamespaceRegistry(prefix="def", uri="http://www.cdisc.org/ns/def/v2.1") 75 | ns = NS.NamespaceRegistry(prefix="mdr", uri="http://www.cdisc.org/ns/library-xml/v1.0") 76 | with open("../data/library-sdtm-3-4.xml", "r", encoding="utf-8") as f: 77 | odm_string = f.read() 78 | loader = DL.XMLDefineLoader(model_package=model_package, ns_uri="http://www.cdisc.org/ns/library-xml/v1.0", local_model=True) 79 | loader.create_document_from_string(odm_string, ns) 80 | odm = loader.load_odm() 81 | oid_checker = OID.OIDRef(skip_attrs=["StandardOID"], skip_elems=["Standard"]) 82 | odm.verify_oids(oid_checker) 83 | self.assertTrue(oid_checker.check_oid_refs()) 84 | orphans = oid_checker.check_unreferenced_oids() 85 | print(f"Orphans: {orphans}") 86 | self.assertDictEqual(orphans, {}) 87 | 88 | 89 | if __name__ == '__main__': 90 | unittest.main() 91 | -------------------------------------------------------------------------------- /merge_odm/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /../../../../../../../:\Users\shume\Dropbox\odmlib_examples\odm_merge\.idea/dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /merge_odm/.idea/.name: -------------------------------------------------------------------------------- 1 | merge_odm -------------------------------------------------------------------------------- /merge_odm/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /merge_odm/.idea/merge_odm.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /merge_odm/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /merge_odm/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /merge_odm/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /merge_odm/README.md: -------------------------------------------------------------------------------- 1 | # merge_odm 2 | 3 | ## Introduction 4 | The merge_odm program is an odmlib example application that generates a target ODM file with a CRF 5 | moved from a source ODM file. The merge_odm application merges a form in a source ODM file, such as might be 6 | used as a CRF library, and moved into another, target, ODM file. This example demonstrates some basic odmlib 7 | features. 8 | 9 | ## Getting Started 10 | To run merge_odm.py from the command-line: `python merge_odm.py` 11 | 12 | The application expects a source and target xml file in a data directory that exists in the same path as the 13 | merge_odm.py application. 14 | 15 | The odmlib package must be installed to run merge_odm. See the [odmlib repository](https://github.com/swhume/odmlib) 16 | to get the odmlib package. Eventually, it may make its way into PyPi, but for now you'll need to install from the 17 | source. The odmlib README provides instructions for getting started. 18 | 19 | ## Limitations 20 | The odmlib examples are basic programs intended to demonstrate some of the basic capabilities of odmlib. 21 | The examples are not complete, production ready applications. 22 | 23 | The odmlib package is still in development. Although odmlib supports all of ODM more work remains 24 | to complete all features for processing ClinicalData. The initial focus has been on getting 25 | the metadata sections complete. -------------------------------------------------------------------------------- /merge_odm/merge_odm.py: -------------------------------------------------------------------------------- 1 | import odmlib.odm_loader as OL 2 | import odmlib.loader as LD 3 | import os 4 | 5 | # An odmlib example application 6 | 7 | SOURCE = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'cdash-odm-source.xml') 8 | TARGET = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'cdash-odm-target.xml') 9 | 10 | 11 | class MergeODM: 12 | def __init__(self, source_file, target_file, form_oid): 13 | """ simple merge application that generates a target ODM file with a CRF moved from a source ODM file """ 14 | self.source_file = source_file 15 | self.target_file = target_file 16 | self.form_oid = form_oid 17 | 18 | def merge(self): 19 | source_loader = LD.ODMLoader(OL.XMLODMLoader()) 20 | source_loader.open_odm_document(self.source_file) 21 | source_mdv = source_loader.MetaDataVersion() 22 | target_loader = LD.ODMLoader(OL.XMLODMLoader()) 23 | target_loader.open_odm_document(self.target_file) 24 | target_root = target_loader.root() 25 | self._merge_form_def(source_mdv, target_root.Study[0].MetaDataVersion[0]) 26 | self._write_target_odm(target_root) 27 | 28 | def _merge_form_def(self, source_mdv, target_mdv): 29 | vs_form = source_mdv.find("FormDef", "OID", self.form_oid) 30 | if self._element_does_not_exist(target_mdv, vs_form.OID, "FormDef"): 31 | target_mdv.FormDef.append(vs_form) 32 | self._merge_item_group_def(source_mdv, target_mdv, vs_form) 33 | 34 | def _merge_item_group_def(self, source_mdv, target_mdv, form): 35 | for igr in form.ItemGroupRef: 36 | igd = source_mdv.find("ItemGroupDef", "OID", igr.ItemGroupOID) 37 | if self._element_does_not_exist(target_mdv, igd.OID, "ItemGroupDef"): 38 | target_mdv.ItemGroupDef.append(igd) 39 | self._merge_items(source_mdv, target_mdv, igd) 40 | 41 | def _merge_items(self, source_mdv, target_mdv, igd): 42 | for itr in igd.ItemRef: 43 | item = source_mdv.find("ItemDef", "OID", itr.ItemOID) 44 | if self._element_does_not_exist(target_mdv, item.OID, "ItemDef"): 45 | target_mdv.ItemDef.append(item) 46 | self._merge_method(source_mdv, target_mdv, itr) 47 | self._merge_codelist(source_mdv, target_mdv, item) 48 | 49 | def _merge_method(self, source_mdv, target_mdv, itr): 50 | if itr.MethodOID: 51 | method = source_mdv.find("MethodDef", "OID", itr.MethodOID) 52 | if self._element_does_not_exist(target_mdv, method.OID, "MethodDef"): 53 | target_mdv.MethodDef.append(method) 54 | 55 | def _merge_codelist(self, source_mdv, target_mdv, item): 56 | if item.CodeListRef: 57 | codelist = source_mdv.find("CodeList", "OID", item.CodeListRef.CodeListOID) 58 | if self._element_does_not_exist(target_mdv, codelist.OID, "CodeList"): 59 | target_mdv.CodeList.append(codelist) 60 | 61 | def _write_target_odm(self, target_root): 62 | target_root.write_xml(self.target_file) 63 | 64 | def _element_does_not_exist(self, mdv, oid, element_type): 65 | if mdv.find(element_type, "OID", oid): 66 | return False 67 | else: 68 | return True 69 | 70 | 71 | def main(): 72 | """ main driver method that merges a CRF in the source ODM file into a target ODM file """ 73 | m = MergeODM(SOURCE, TARGET, "ODM.F.VS") 74 | m.merge() 75 | 76 | 77 | if __name__ == "__main__": 78 | main() 79 | -------------------------------------------------------------------------------- /merge_odm/requirements.txt: -------------------------------------------------------------------------------- 1 | odmlib>=0.1.4 -------------------------------------------------------------------------------- /notebooks/data/cosa_define_demo.xml: -------------------------------------------------------------------------------- 1 | 2 | TEST Define-XML ItemGroupDefItemGroupDef 001Define-XML ItemGroupDefVital Signs -------------------------------------------------------------------------------- /notebooks/data/cosa_demo.xml: -------------------------------------------------------------------------------- 1 | 2 | COSA Demo StudyDemo to get started with odmlibCOSA odmlib Demonstration -------------------------------------------------------------------------------- /notebooks/generate_define.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "pycharm": { 7 | "name": "#%% md\n" 8 | } 9 | }, 10 | "source": [ 11 | "## Generating a Define-XML v2.1 File\n", 12 | "\n", 13 | "This notebook demonstrates how to run the odmlib example program xlsx2define2-1.py. This example program takes a\n", 14 | "spreadsheet containing Define-XML v2.1 metadata and generates a Define-XML file. The program runs on the command-line\n", 15 | "and provides a number of options.\n", 16 | "\n", 17 | "A number of odmlib example programs, including xlsx2define2-1.py, are located in GitHub at\n", 18 | "https://github.com/swhume/odmlib_examples\n", 19 | "\n", 20 | "The example programs are open-source using the MIT license.\n", 21 | "\n", 22 | "### Install odmlib and other libraries\n", 23 | "Before you can run the odmlib examples, if you haven't already installed odmlib you'll need to do that.\n", 24 | "```\n", 25 | "pip install odmlib\n", 26 | "```\n", 27 | "The xlsx2define2-1.py application also requires xmlschema and openpyxl to be installed using pip in the same\n", 28 | "way that odmlib was installed above.\n", 29 | "\n", 30 | "### Use the example metadata spreadsheet\n", 31 | "\n", 32 | "The first time you run this use the provided example metadata spreadsheet to test it. The\n", 33 | "odmlib-define-metadata.xlsx spreadsheet contains the metadata to generate a Define-XML v2.1 file.\n", 34 | "\n", 35 | "### Run the program to generate the Define-XML file\n", 36 | "\n", 37 | "This example program runs on using a command-line interface. We'll use the following options when running\n", 38 | "the application:\n", 39 | "* -e provides the name and location of the spreadsheet file to use as an input.\n", 40 | "* -d provides the name and location of the Define-XML file to generate as output.\n", 41 | "\n", 42 | "Now, let's run the program.\n", 43 | "\n", 44 | "```\n", 45 | "python xlsx2define2-1.py -e ./data/odmlib-define-metadata.xlsx -d ./data/odmlib-define.xml\n", 46 | "```" 47 | ] 48 | } 49 | ], 50 | "metadata": { 51 | "kernelspec": { 52 | "display_name": "Python 3 (ipykernel)", 53 | "language": "python", 54 | "name": "python3" 55 | }, 56 | "language_info": { 57 | "codemirror_mode": { 58 | "name": "ipython", 59 | "version": 3 60 | }, 61 | "file_extension": ".py", 62 | "mimetype": "text/x-python", 63 | "name": "python", 64 | "nbconvert_exporter": "python", 65 | "pygments_lexer": "ipython3", 66 | "version": "3.9.5" 67 | } 68 | }, 69 | "nbformat": 4, 70 | "nbformat_minor": 1 71 | } -------------------------------------------------------------------------------- /snippets/data/simple_create.xml: -------------------------------------------------------------------------------- 1 | 2 | Get Started with ODM XMLDemo to get started with odmlibODM XML Get StartedGet Started ProtocolYear of the subject's birthBirth Year -------------------------------------------------------------------------------- /snippets/odmlib_first_define.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Sam Hume. Licensed under the MIT-0 license https://opensource.org/licenses/MIT-0 2 | import odmlib.define_2_1.model as DEFINE 3 | import datetime 4 | 5 | """ 6 | This is the code presented at the PHUSE US Connect 2022 and described in paper PAP_OS01. 7 | The purpose of this code is to demonstrate using odmlib to create and process a very simple Define-XML v2.1 file. 8 | NOTE: In places where paths are referenced, you will need to update them to reflect your system. 9 | """ 10 | 11 | current_datetime = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat() 12 | odm = DEFINE.ODM(FileOID="DEF.COSA.DEMO", 13 | AsOfDateTime=current_datetime, 14 | CreationDateTime=current_datetime, 15 | ODMVersion="1.3.2", 16 | FileType="Snapshot", 17 | Originator="Sam Hume", 18 | SourceSystem="odmlib", 19 | SourceSystemVersion="0.1.4", 20 | Context="Other") 21 | 22 | study = DEFINE.Study(OID="ST.DEFINE.COSA.001") 23 | study.GlobalVariables.StudyName = DEFINE.StudyName(_content="TEST Define-XML ItemGroupDef") 24 | study.GlobalVariables.StudyDescription = DEFINE.StudyDescription(_content="ItemGroupDef 001") 25 | study.GlobalVariables.ProtocolName = DEFINE.ProtocolName(_content="Define-XML ItemGroupDef") 26 | odm.Study = study 27 | 28 | mdv = DEFINE.MetaDataVersion(OID="MDV.COSA.IGD.001", Name="ItemGroupDefDemo001", 29 | Description="ItemGroupDef COSA Demo", DefineVersion="2.1.0") 30 | 31 | mdv.Standards.Standard.append(DEFINE.Standard(OID="STD.1", Name="SDTMIG", Type="IG", Version="3.2", Status="Final")) 32 | mdv.Standards.Standard.append(DEFINE.Standard(OID="STD.2", Name="CDISC/NCI", Type="CT", PublishingSet="SDTM", 33 | Version="2021-12-17", Status="Final")) 34 | 35 | igd = DEFINE.ItemGroupDef(OID="IG.VS", 36 | Name="VS", 37 | Repeating="Yes", 38 | Domain="VS", 39 | SASDatasetName="VS", 40 | IsReferenceData="No", 41 | Purpose="Tabulation", 42 | ArchiveLocationID="LF.VS", 43 | Structure="One record per vital sign measurement per visit per subject", 44 | StandardOID="STD.1", 45 | IsNonStandard="Yes", 46 | HasNoData="Yes") 47 | 48 | igd.Description.TranslatedText.append(DEFINE.TranslatedText(_content="Vital Signs", lang="en")) 49 | igd.ItemRef.append(DEFINE.ItemRef(ItemOID="IT.STUDYID", Mandatory="Yes", OrderNumber=1, KeySequence=1)) 50 | igd.ItemRef.append(DEFINE.ItemRef(ItemOID="IT.VS.DOMIAN", Mandatory="Yes", OrderNumber=2)) 51 | igd.ItemRef.append(DEFINE.ItemRef(ItemOID="IT.USUBJID", Mandatory="Yes", OrderNumber=3, KeySequence=2)) 52 | igd.ItemRef.append(DEFINE.ItemRef(ItemOID="IT.VS.VSSEQ", Mandatory="Yes", OrderNumber=4)) 53 | igd.ItemRef.append(DEFINE.ItemRef(ItemOID="IT.VS.VSTESTCD", Mandatory="Yes", OrderNumber=5, KeySequence=3)) 54 | igd.ItemRef.append(DEFINE.ItemRef(ItemOID="IT.VS.VSTEST", Mandatory="Yes", OrderNumber=6)) 55 | 56 | try: 57 | ir = DEFINE.ItemRef(Mandatory="Yes", OrderNumber=1) 58 | except ValueError as ve: 59 | print(f"Error creating ItemRef: {ve}") 60 | 61 | igd.Class = DEFINE.Class(Name="FINDINGS") 62 | 63 | odm.Study.MetaDataVersion = mdv 64 | odm.Study.MetaDataVersion.ItemGroupDef.append(igd) 65 | 66 | # update the path to reflect your system 67 | odm.write_xml(odm_file="./data/cosa_define_demo.xml") 68 | 69 | odm.write_json(odm_file="./data/cosa_define_demo.json") 70 | 71 | # update the path to reflect your system 72 | with open("./data/cosa_define_demo.xml", 'r') as file: 73 | cosa_odm = file.read() 74 | print(cosa_odm) 75 | 76 | from odmlib import odm_parser as P 77 | # relpace the path below to your Define-XML v2.1 schema 78 | schema_file = "/home/sam/standards/DefineV211/schema/cdisc-define-2.1/define2-1-0.xsd" 79 | 80 | validator = P.ODMSchemaValidator(schema_file) 81 | try: 82 | # update the path to reflect your system 83 | validator.validate_file("./data/cosa_define_demo.xml") 84 | print("define-XML schema validation completed successfully...") 85 | except P.OdmlibSchemaValidationError as ve: 86 | print(f"schema validation errors: {ve}") 87 | 88 | from odmlib import define_loader as DL, loader as LD 89 | loader = LD.ODMLoader(DL.XMLDefineLoader(model_package="define_2_1", ns_uri="http://www.cdisc.org/ns/def/v2.1")) 90 | loader.open_odm_document("./data/cosa_define_demo.xml") 91 | 92 | odm = loader.load_odm() 93 | print(f"Study OID is {odm.Study.OID}") 94 | print(f"Study Name is {odm.Study.GlobalVariables.StudyName}") 95 | print(f"Study Description is {odm.Study.GlobalVariables.StudyDescription}") 96 | print(f"Protocol Name is {odm.Study.GlobalVariables.ProtocolName}") 97 | 98 | cosa_dict = odm.to_dict() 99 | print(cosa_dict) 100 | 101 | cosa_json = odm.to_json() 102 | -------------------------------------------------------------------------------- /snippets/simple_create_odm.py: -------------------------------------------------------------------------------- 1 | import odmlib.odm_1_3_2.model as ODM 2 | import datetime 3 | 4 | ODM_FILE = "./data/simple_create.xml" 5 | 6 | current_datetime = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat() 7 | root = ODM.ODM(FileOID="ODM.DEMO.001", Granularity="Metadata", AsOfDateTime=current_datetime, 8 | CreationDateTime=current_datetime, ODMVersion="1.3.2", FileType="Snapshot", 9 | Originator="swhume", SourceSystem="odmlib", SourceSystemVersion="0.1") 10 | 11 | # create Study and add to ODM 12 | root.Study.append(ODM.Study(OID="ODM.GET.STARTED")) 13 | 14 | # create the global variables 15 | root.Study[0].GlobalVariables = ODM.GlobalVariables() 16 | root.Study[0].GlobalVariables.StudyName = ODM.StudyName(_content="Get Started with ODM XML") 17 | root.Study[0].GlobalVariables.StudyDescription = ODM.StudyDescription(_content="Demo to get started with odmlib") 18 | root.Study[0].GlobalVariables.ProtocolName = ODM.ProtocolName(_content="ODM XML Get Started") 19 | 20 | # create the MetaDataVersion 21 | root.Study[0].MetaDataVersion.append(ODM.MetaDataVersion(OID="MDV.DEMO-ODM-01", Name="Get Started MDV", 22 | Description="Get Started Demo")) 23 | # create Protocol 24 | p = ODM.Protocol() 25 | p.Description = ODM.Description() 26 | p.Description.TranslatedText.append(ODM.TranslatedText(_content="Get Started Protocol", lang="en")) 27 | p.StudyEventRef.append(ODM.StudyEventRef(StudyEventOID="BASELINE", OrderNumber=1, Mandatory="Yes")) 28 | root.Study[0].MetaDataVersion[0].Protocol = p 29 | 30 | # create a StudyEventDef 31 | sed = ODM.StudyEventDef(OID="BASELINE", Name="Baseline Visit", Repeating="No", Type="Scheduled") 32 | sed.FormRef.append(ODM.FormRef(FormOID="ODM.F.DM", Mandatory="Yes", OrderNumber=1)) 33 | root.Study[0].MetaDataVersion[0].StudyEventDef.append(sed) 34 | 35 | # create a FormDef 36 | fd = ODM.FormDef(OID="ODM.F.DM", Name="Demographics", Repeating="No") 37 | fd.ItemGroupRef.append(ODM.ItemGroupRef(ItemGroupOID="ODM.IG.DM", Mandatory="Yes", OrderNumber=2)) 38 | root.Study[0].MetaDataVersion[0].ItemGroupDef.append(fd) 39 | 40 | # create an ItemGroupDef 41 | igd = ODM.ItemGroupDef(OID="ODM.IG.DM", Name="Demographics", Repeating="No") 42 | igd.ItemRef.append(ODM.ItemRef(ItemOID="ODM.IT.DM.BRTHYR", Mandatory="Yes")) 43 | root.Study[0].MetaDataVersion[0].ItemGroupDef.append(igd) 44 | 45 | # create an ItemDef 46 | itd = ODM.ItemDef(OID="ODM.IT.DM.BRTHYR", Name="Birth Year", DataType="integer") 47 | itd.Description = ODM.Description() 48 | itd.Description.TranslatedText.append(ODM.TranslatedText(_content="Year of the subject's birth", lang="en")) 49 | itd.Question = ODM.Question() 50 | itd.Question.TranslatedText.append(ODM.TranslatedText(_content="Birth Year", lang="en")) 51 | itd.Alias.append(ODM.Alias(Context="CDASH", Name="BRTHYR")) 52 | itd.Alias.append(ODM.Alias(Context="SDTM", Name="BRTHDTC")) 53 | root.Study[0].MetaDataVersion[0].ItemDef.append(itd) 54 | 55 | # save the new ODM document to a file 56 | root.write_xml(ODM_FILE) 57 | -------------------------------------------------------------------------------- /snippets/validate_define.py: -------------------------------------------------------------------------------- 1 | from odmlib import odm_parser as P 2 | import odmlib.define_2_1.rules.oid_ref as OID 3 | import odmlib.define_loader as OL 4 | import odmlib.loader as LD 5 | import odmlib.define_2_1.rules.metadata_schema as METADATA 6 | import xmlschema as XSD 7 | import os 8 | 9 | from odmlib.define_2_1.rules import metadata_schema as METADATA 10 | from odmlib.define_2_1.rules import oid_ref as OID 11 | 12 | 13 | # DEF_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'defineV21-SDTM.xml') 14 | DEF_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 't1d-define.xml') 15 | SCHEMA_FILE = os.path.join(os.sep, 'home', 'sam', 'standards', 'DefineV211', 'schema', 'cdisc-define-2.1', 'define2-1-0.xsd') 16 | 17 | 18 | def validate_odm_xml_file(): 19 | validator = P.ODMSchemaValidator(SCHEMA_FILE) 20 | try: 21 | validator.validate_file(DEF_FILE) 22 | except XSD.validators.exceptions.XMLSchemaChildrenValidationError as ve: 23 | print(f"schema validation errors: {ve}") 24 | else: 25 | print("Define-XML schema validation completed successfully...") 26 | 27 | 28 | def load_root(): 29 | loader = LD.ODMLoader(OL.XMLDefineLoader(model_package="define_2_1", ns_uri="http://www.cdisc.org/ns/def/v2.1")) 30 | loader.open_odm_document(DEF_FILE) 31 | root = loader.load_odm() 32 | return root 33 | 34 | 35 | def load_mdv(): 36 | loader = LD.ODMLoader(OL.XMLDefineLoader(model_package="define_2_1", ns_uri="http://www.cdisc.org/ns/def/v2.1")) 37 | loader.open_odm_document(DEF_FILE) 38 | mdv = loader.MetaDataVersion() 39 | return mdv 40 | 41 | def verify_oids(root): 42 | oid_checker = OID.OIDRef() 43 | try: 44 | # checks for non-unique OIDs and runs the ref/def check 45 | root.verify_oids(oid_checker) 46 | except ValueError as ve: 47 | print(f"Error verifying OIDs: {ve}") 48 | else: 49 | print(f"OIDs verified as valid") 50 | 51 | 52 | def find_unreferenced_oids(mdv): 53 | oid_checker = OID.OIDRef() 54 | mdv.verify_oids(oid_checker) 55 | orphans = oid_checker.check_unreferenced_oids() 56 | print(f"found {len(orphans)} missing OID Defs") 57 | if orphans: 58 | print(f"Orphaned OIDs: {orphans}") 59 | 60 | 61 | def verify_schema_rules(root): 62 | validator = METADATA.MetadataSchema() 63 | is_valid = validator.verify_conformance(root.to_dict(), "ODM") 64 | if is_valid: 65 | print("MetaDataVersion conforms to schema rules...") 66 | else: 67 | print("Errors found checking the MetaDataVersion against the schema rules...") 68 | 69 | 70 | def verify_element_order(mdv): 71 | try: 72 | mdv.verify_order() 73 | except ValueError as ve: 74 | print(f"Error verifying element order in MetaDataVersion: {ve}") 75 | else: 76 | print(f"MetaDataVersion element order is verified") 77 | 78 | 79 | def main(): 80 | validate_odm_xml_file() 81 | mdv = load_mdv() 82 | root = load_root() 83 | verify_oids(root) 84 | find_unreferenced_oids(root) 85 | verify_element_order(mdv) 86 | 87 | 88 | if __name__ == "__main__": 89 | main() 90 | -------------------------------------------------------------------------------- /snippets/validate_odm_metadata.py: -------------------------------------------------------------------------------- 1 | from odmlib import odm_parser as P 2 | import odmlib.odm_1_3_2.rules.oid_ref as OID 3 | #import cerberus as C 4 | # import odmlib.odm_1_3_2.model as ODM 5 | import odmlib.odm_loader as OL 6 | import odmlib.loader as LD 7 | import odmlib.odm_1_3_2.rules.metadata_schema as METADATA 8 | import xmlschema as XSD 9 | import os 10 | 11 | ODM_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'ODM-SnapShot-Export.xml') 12 | SCHEMA_FILE = os.path.join(os.sep, 'home', 'sam', 'standards', 'odm1-3-2', 'ODM1-3-2.xsd') 13 | 14 | 15 | def validate_odm_xml_file(): 16 | validator = P.ODMSchemaValidator(SCHEMA_FILE) 17 | try: 18 | validator.validate_file(ODM_FILE) 19 | except XSD.validators.exceptions.XMLSchemaChildrenValidationError as ve: 20 | print(f"schema validation errors: {ve}") 21 | else: 22 | print("ODM XML schema validation completed successfully...") 23 | 24 | 25 | def load_root(): 26 | loader = LD.ODMLoader(OL.XMLODMLoader(model_package="odm_1_3_2", ns_uri="http://www.cdisc.org/ns/odm/v1.3")) 27 | loader.open_odm_document(ODM_FILE) 28 | root = loader.load_odm() 29 | return root 30 | 31 | 32 | def load_mdv(): 33 | loader = LD.ODMLoader(OL.XMLODMLoader(model_package="odm_1_3_2", ns_uri="http://www.cdisc.org/ns/odm/v1.3")) 34 | loader.open_odm_document(ODM_FILE) 35 | mdv = loader.MetaDataVersion() 36 | return mdv 37 | 38 | def verify_oids(root): 39 | oid_checker = OID.OIDRef() 40 | try: 41 | # checks for non-unique OIDs and runs the ref/def check 42 | root.verify_oids(oid_checker) 43 | except ValueError as ve: 44 | print(f"Error verifying OIDs: {ve}") 45 | else: 46 | print(f"OIDs verified as valid") 47 | 48 | 49 | def find_unreferenced_oids(mdv): 50 | oid_checker = OID.OIDRef() 51 | mdv.verify_oids(oid_checker) 52 | orphans = oid_checker.check_unreferenced_oids() 53 | print(f"found {len(orphans)} missing OID Defs") 54 | if orphans: 55 | print(f"Orphaned OIDs: {orphans}") 56 | 57 | 58 | def verify_schema_rules(root): 59 | validator = METADATA.MetadataSchema() 60 | is_valid = validator.check_conformance(root.to_dict(), "ODM") 61 | if is_valid: 62 | print("MetaDataVersion conforms to schema rules...") 63 | else: 64 | print("Errors found checking the MetaDataVersion against the schema rules...") 65 | 66 | 67 | def verify_element_order(mdv): 68 | try: 69 | mdv.verify_order() 70 | except ValueError as ve: 71 | print(f"Error verifying element order in MetaDataVersion: {ve}") 72 | else: 73 | print(f"MetaDataVersion element order is verified") 74 | 75 | 76 | def main(): 77 | validate_odm_xml_file() 78 | mdv = load_mdv() 79 | root = load_root() 80 | verify_oids(root) 81 | find_unreferenced_oids(root) 82 | # TODO schema rules only implemented for metadata at this point 83 | # verify_schema_rules(root) 84 | verify_element_order(mdv) 85 | 86 | 87 | if __name__ == "__main__": 88 | main() 89 | -------------------------------------------------------------------------------- /xls2define/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /../../../../../../../:\Users\shume\Dropbox\odmlib_examples\xls2define\.idea/dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /xls2define/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /xls2define/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /xls2define/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /xls2define/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /xls2define/.idea/xls2define.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /xls2define/CodeLists.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_0 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class CodeLists(define_object.DefineObject): 6 | """ create a Define-XML v2.0 CodeList element object """ 7 | def __init__(self): 8 | super().__init__() 9 | self.igd = None 10 | 11 | def create_define_objects(self, sheet, objects, lang): 12 | """ 13 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary 14 | :param sheet: xlrd Excel sheet object 15 | :param objects: dictionary of odmlib objects updated by this method 16 | :param lang: xml:lang setting for TranslatedText 17 | 18 | """ 19 | self.lang = lang 20 | self.sheet = sheet 21 | header = self.load_header(self.sheet.max_column) 22 | objects["CodeList"] = [] 23 | cl_c_code = "" 24 | cl_name = "" 25 | cl = None 26 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True): 27 | row_content = self.load_row(row, header) 28 | # when this is a new code list the names will not be the same 29 | if row_content["Name"] != cl_name: 30 | if cl_name: 31 | self._add_previous_codelist_to_objects(cl_c_code, cl, objects) 32 | cl = self._create_codelist_object(row_content) 33 | cl_c_code = row_content.get("NCI Codelist Code") 34 | cl_name = row_content.get("Name") 35 | if row_content["Decoded Value"]: 36 | cl_item = self._create_codelistitem_object(row_content) 37 | cl.CodeListItem.append(cl_item) 38 | else: 39 | en_item = self._create_enumerateditem_object(row_content) 40 | cl.EnumeratedItem.append(en_item) 41 | self._add_previous_codelist_to_objects(cl_c_code, cl, objects) 42 | 43 | def _add_previous_codelist_to_objects(self, cl_c_code, cl, objects): 44 | """ 45 | finish creating a codelist by adding Alias of a c-code exists and adding the object to the list of codelists 46 | :param row_idx: positive integer indicating which row - skip processing the first row 47 | :param cl_c_code: codelist c-code 48 | :param cl: odmlib codelist object 49 | :param objects: dictionary of odmlib objects created from the Excel input file and updated in this method 50 | """ 51 | # finish processing the first list before saving it 52 | # if row_idx > 1 and cl: 53 | # if the code list has an associated c-code add it to the code list as an Alias element 54 | if cl_c_code: 55 | alias = DEFINE.Alias(Context="nci:ExtCodeID", Name=cl_c_code) 56 | cl.Alias.append(alias) 57 | # add the code list to the list of code list objects 58 | objects["CodeList"].append(cl) 59 | 60 | def _create_codelist_object(self, row): 61 | """ 62 | using the row from the Codelists worksheet create an odmlib CodeList object 63 | :param row: dictionary with contents from a row in the Codelists worksheet 64 | :return: CodeList odmlib object 65 | """ 66 | cl = DEFINE.CodeList(OID=row["OID"], Name=row["Name"], DataType=row["Data Type"]) 67 | return cl 68 | 69 | def _create_enumerateditem_object(self, row): 70 | """ 71 | using the row from the Codelists worksheet create an odmlib EnumeratedItem object 72 | :param row: dictionary with contents from a row in the Codelists worksheet 73 | :return: EnumeratedItem odmlib object 74 | """ 75 | attr = {"CodedValue": row["Term"]} 76 | if row.get("Order"): 77 | attr["OrderNumber"] = row["Order"] 78 | en_item = DEFINE.EnumeratedItem(**attr) 79 | if row.get("NCI Term Code"): 80 | alias = DEFINE.Alias(Context="nci:ExtCodeID", Name=row["NCI Term Code"]) 81 | en_item.Alias.append(alias) 82 | return en_item 83 | 84 | def _create_codelistitem_object(self, row): 85 | """ 86 | using the row from the Codelists worksheet create an odmlib CodeListItem object 87 | :param row: dictionary with contents from a row in the Codelists worksheet 88 | :return: CodeListItem odmlib object 89 | """ 90 | attr = {"CodedValue": row["Term"]} 91 | if row.get("Order"): 92 | attr["OrderNumber"] = row["Order"] 93 | cl_item = DEFINE.CodeListItem(**attr) 94 | decode = DEFINE.Decode() 95 | tt = DEFINE.TranslatedText(_content=row["Decoded Value"], lang="en") 96 | decode.TranslatedText.append(tt) 97 | cl_item.Decode = decode 98 | if row.get("NCI Term Code"): 99 | alias = DEFINE.Alias(Context="nci:ExtCodeID", Name=row["NCI Term Code"]) 100 | cl_item.Alias.append(alias) 101 | return cl_item 102 | -------------------------------------------------------------------------------- /xls2define/Comments.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_0 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class Comments(define_object.DefineObject): 6 | """ create a Define-XML v2.0 CommentDef element object """ 7 | def __init__(self): 8 | super().__init__() 9 | self.lookup_oid = None 10 | self.igd = None 11 | 12 | def create_define_objects(self, sheet, objects, lang): 13 | """ 14 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary 15 | :param sheet: xlrd Excel sheet object 16 | :param objects: dictionary of odmlib objects updated by this method 17 | :param lang: xml:lang setting for TranslatedText 18 | """ 19 | self.lang = lang 20 | self.sheet = sheet 21 | header = self.load_header(self.sheet.max_column) 22 | objects["CommentDef"] = [] 23 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True): 24 | row_content = self.load_row(row, header) 25 | comment = self._create_commentdef_object(row_content) 26 | objects["CommentDef"].append(comment) 27 | 28 | def _create_commentdef_object(self, row): 29 | """ 30 | use the values from the Comments worksheet row to create a CommentDef odmlib object 31 | :param row: Comments worksheet row values as a dictionary 32 | :return: a CommentDef odmlib object 33 | """ 34 | com = DEFINE.CommentDef(OID=row["OID"]) 35 | tt = DEFINE.TranslatedText(_content=row["Description"], lang=self.lang) 36 | com.Description = DEFINE.Description() 37 | com.Description.TranslatedText.append(tt) 38 | if row.get("Document"): 39 | self._add_document(row, com) 40 | return com 41 | 42 | def _add_document(self, row, com): 43 | """ 44 | creates a DocumentRef object using a row from the Comments Worksheet 45 | :param row: Comments worksheet row values as a dictionary 46 | :param method: odmlib CommentDef object that gets updated with a DocumentRef object 47 | """ 48 | dr = DEFINE.DocumentRef(leafID=row["Document"]) 49 | if row.get("Pages"): 50 | pdf = DEFINE.PDFPageRef(PageRefs=row["Pages"], Type="NamedDestination") 51 | dr.PDFPageRef.append(pdf) 52 | com.DocumentRef.append(dr) 53 | -------------------------------------------------------------------------------- /xls2define/Datasets.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_0 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class Datasets(define_object.DefineObject): 6 | """ create a Define-XML v2.0 ItemGroupDef element object """ 7 | def __init__(self): 8 | super().__init__() 9 | 10 | def create_define_objects(self, sheet, objects, lang): 11 | """ 12 | parse each row in the Excel sheet and create odmlib objects to return in the objects dictionary 13 | :param sheet: xlrd Excel sheet object 14 | :param objects: dictionary of odmlib objects updated by this method 15 | :param lang: xml:lang setting for TranslatedText 16 | """ 17 | self.lang = lang 18 | self.sheet = sheet 19 | num_cols = self.sheet.max_column 20 | header = self.load_header(num_cols) 21 | objects["ItemGroupDef"] = [] 22 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=num_cols, values_only=True): 23 | row_content = self.load_row(row, header) 24 | itg = self._create_itemgroupdef_object(row_content) 25 | objects["ItemGroupDef"].append(itg) 26 | 27 | def _create_itemgroupdef_object(self, row): 28 | """ 29 | use the values from the Dataset worksheet row to create a ItemGroupDef odmlib object 30 | :param row: Datasets worksheet row values as a dictionary 31 | :return: odmlib ItemGroupDef object 32 | """ 33 | oid = self.generate_oid(["IG", row["Dataset"]]) 34 | attr = {"OID": oid, "Name": row["Dataset"], "Repeating": row["Repeating"], "Domain": row["Dataset"], 35 | "SASDatasetName": row["Dataset"], "IsReferenceData": row["Reference Data"], "Purpose": row["Purpose"], 36 | "Class": row["Class"], "Structure": row["Structure"], 37 | "ArchiveLocationID": ".".join(["LF", row["Dataset"]])} 38 | if row.get("Comment"): 39 | attr["CommentOID"] = row["Comment"] 40 | igd = DEFINE.ItemGroupDef(**attr) 41 | tt = DEFINE.TranslatedText(_content=row["Description"], lang=self.lang) 42 | igd.Description = DEFINE.Description() 43 | igd.Description.TranslatedText.append(tt) 44 | return igd 45 | -------------------------------------------------------------------------------- /xls2define/Dictionaries.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_0 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class Dictionaries(define_object.DefineObject): 6 | """ create a Define-XML v2.0 CodeList element object """ 7 | def __init__(self): 8 | super().__init__() 9 | 10 | def create_define_objects(self, sheet, objects, lang): 11 | """ 12 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary 13 | :param sheet: xlrd Excel sheet object 14 | :param objects: dictionary of odmlib objects updated by this method 15 | :param lang: xml:lang setting for TranslatedText 16 | """ 17 | self.lang = lang 18 | self.sheet = sheet 19 | header = self.load_header(self.sheet.max_column) 20 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True): 21 | row_content = self.load_row(row, header) 22 | cl = self._create_codelist_object(row_content) 23 | objects["CodeList"].append(cl) 24 | 25 | def _create_codelist_object(self, row): 26 | """ 27 | using the row from the Dictionaries worksheet create an odmlib CodeList object and add ExternalCodeList 28 | :param row: dictionary with contents from a row in the Dictionaries worksheet 29 | :return: CodeList odmlib object with ExternalCodeList 30 | """ 31 | cl = DEFINE.CodeList(OID=row["OID"], Name=row["Name"], DataType=row["Data Type"]) 32 | attr = {"Dictionary": row["Dictionary"]} 33 | if row.get("Version"): 34 | attr["Version"] = row["Version"] 35 | exd = DEFINE.ExternalCodeList(**attr) 36 | cl.ExternalCodeList = exd 37 | return cl 38 | -------------------------------------------------------------------------------- /xls2define/Documents.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_0 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class Documents(define_object.DefineObject): 6 | """ create a Define-XML v2.0 leaf element object """ 7 | def __init__(self): 8 | super().__init__() 9 | 10 | def create_define_objects(self, sheet, objects, lang): 11 | """ 12 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary 13 | :param sheet: xlrd Excel sheet object 14 | :param objects: dictionary of odmlib objects updated by this method 15 | :param lang: xml:lang setting for TranslatedText 16 | """ 17 | self.lang = lang 18 | self.sheet = sheet 19 | header = self.load_header(self.sheet.max_column) 20 | objects["leaf"] = [] 21 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True): 22 | row_content = self.load_row(row, header) 23 | leaf = self._create_leaf_object(row_content) 24 | objects["leaf"].append(leaf) 25 | 26 | def _create_leaf_object(self, row): 27 | """ 28 | use the values from the Documents worksheet row to create a leaf odmlib object 29 | :param row: Documents worksheet row values as a dictionary 30 | :return: a leaf odmlib object 31 | """ 32 | lf = DEFINE.leaf(ID=row["ID"], href=row["Href"]) 33 | title = DEFINE.title(_content=row["Title"]) 34 | lf.title = title 35 | return lf 36 | -------------------------------------------------------------------------------- /xls2define/Methods.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_0 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class Methods(define_object.DefineObject): 6 | """ create a Define-XML v2.0 MethodDef element object """ 7 | def __init__(self): 8 | super().__init__() 9 | 10 | def create_define_objects(self, sheet, objects, lang): 11 | """ 12 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary 13 | :param sheet: xlrd Excel sheet object 14 | :param objects: dictionary of odmlib objects updated by this method 15 | :param lang: xml:lang setting for TranslatedText 16 | """ 17 | self.lang = lang 18 | self.sheet = sheet 19 | header = self.load_header(self.sheet.max_column) 20 | objects["MethodDef"] = [] 21 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True): 22 | row_content = self.load_row(row, header) 23 | item = self._create_methoddef_object(row_content) 24 | objects["MethodDef"].append(item) 25 | 26 | def _create_methoddef_object(self, row): 27 | """ 28 | use the values from the Methods worksheet row to create a MethodDef odmlib object 29 | :param row: Methods worksheet row values as a dictionary 30 | :return: a MethodDef odmlib object 31 | """ 32 | attr = {"OID": row["OID"], "Name": row["Name"], "Type": row["Type"]} 33 | method = DEFINE.MethodDef(**attr) 34 | tt = DEFINE.TranslatedText(_content=row["Description"], lang=self.lang) 35 | method.Description = DEFINE.Description() 36 | method.Description.TranslatedText.append(tt) 37 | if row.get("Expression Context"): 38 | method.FormalExpression.append(DEFINE.FormalExpression(Context=row["Expression Context"], _content=row["Expression Code"])) 39 | if row.get("Document"): 40 | self._add_document(row, method) 41 | return method 42 | 43 | def _add_document(self, row, method): 44 | """ 45 | creates a DocumentRef object using a row from the Methods Worksheet 46 | :param row: Methods worksheet row values as a dictionary 47 | :param method: odmlib MethodDef object that gets updated with a DocumentRef object 48 | """ 49 | dr = DEFINE.DocumentRef(leafID=row["Document"]) 50 | pdf = DEFINE.PDFPageRef(PageRefs=row["Pages"], Type="NamedDestination") 51 | dr.PDFPageRef.append(pdf) 52 | method.DocumentRef.append(dr) 53 | -------------------------------------------------------------------------------- /xls2define/README.md: -------------------------------------------------------------------------------- 1 | # xls2define 2 | 3 | ## Introduction 4 | Use the xlsx2define2-1 example instead of this one. The Define-XML v2.1 examples are getting more use and testing 5 | creating study Define-XML files, so they're getting updated more frequently. 6 | 7 | The xls2define program is an odmlib example application that generates a Define-XML v2.0 file from 8 | an Excel spreadsheet that contains the study metadata needed to create the Define-XML file. The Exel 9 | spreadsheet version of the makes it easier for many to edit or create new content to include in a 10 | Define-XML v2.0 file. The companion define2xls program takes the generated Define-XML file and creates 11 | a spreadsheet using the metadata. This example demonstrates some basic odmlib features. 12 | 13 | ## Getting Started 14 | To run xls2define.py from the command-line: 15 | 16 | `python xls2define.py -e ./data/odmlib-define-metadata.xlsx -d ./data/odmlib-roundtrip-define.xml` 17 | 18 | The odmlib package must be installed to run xls2define. See the 19 | [odmlib repository](https://github.com/swhume/odmlib) to get the odmlib package. Eventually, it 20 | may make its way into PyPi, but for now you'll need to install from the source. The odmlib 21 | README provides instructions for getting started. 22 | 23 | ## Limitations 24 | The odmlib examples are basic programs intended to demonstrate some of the basic capabilities of odmlib. 25 | The examples are not complete, production ready applications. 26 | 27 | The odmlib package is still in development. Although odmlib supports all of ODM more work remains 28 | to complete all features for processing ClinicalData. The initial focus has been on getting 29 | the metadata sections complete. -------------------------------------------------------------------------------- /xls2define/Study.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_0 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class Study(define_object.DefineObject): 6 | """ create a Define-XML v2.0 Study element object and initialize the MetaDataVersion object """ 7 | def __init__(self): 8 | super().__init__() 9 | 10 | def create_define_objects(self, sheet, objects, lang): 11 | """ 12 | parse each row in the Excel sheet and create ODMLIB objects to return in the objects dictionary 13 | :param sheet: xlrd Excel sheet object 14 | :param objects: dictionary of ODMLIB objects updated by this method 15 | :param lang: xml:lang setting for TranslatedText 16 | """ 17 | self.lang = lang 18 | self.sheet = sheet 19 | rows = {} 20 | for row in sheet.iter_rows(min_row=1, min_col=1, max_col=2, values_only=True): 21 | row_content = self._load_row(row) 22 | rows.update(row_content) 23 | self.lang = rows["Language"] 24 | objects["Study"] = self._create_study_object(rows) 25 | objects["MetaDataVersion"] = self._create_metadataversion_object(rows) 26 | 27 | def _create_study_object(self, rows): 28 | """ 29 | create the study ODMLIB object from the Study worksheet and return it 30 | :param rows: dictionary created from the rows in the study worksheet 31 | :return: odmlib Study object 32 | """ 33 | study_oid = self.generate_oid(['ODM', rows["StudyName"]]) 34 | study = DEFINE.Study(OID=study_oid) 35 | gv = DEFINE.GlobalVariables() 36 | gv.StudyName = DEFINE.StudyName(_content=rows["StudyName"]) 37 | gv.StudyDescription = DEFINE.StudyDescription(_content=rows["StudyDescription"]) 38 | gv.ProtocolName = DEFINE.ProtocolName(_content=rows["ProtocolName"]) 39 | study.GlobalVariables = gv 40 | return study 41 | 42 | def _create_metadataversion_object(self, rows): 43 | """ 44 | create the MetaDataVersion ODMLIB object from the Study worksheet and return it 45 | :param rows: dictionary created from the rows in the study worksheet 46 | :return: odmlib MetaDataVersion object 47 | """ 48 | mdv_oid = self.generate_oid(["MDV", rows["StudyName"]]) 49 | mdv = DEFINE.MetaDataVersion(OID=mdv_oid, Name="MDV " + rows["StudyName"], Description="Data Definitions for " 50 | + rows["StudyName"], DefineVersion="2.0.0", StandardName=rows["StandardName"], 51 | StandardVersion=rows["StandardVersion"]) 52 | return mdv 53 | 54 | def _load_row(self, row_values): 55 | """ 56 | load the Study worksheet row and return a dictionary 57 | :param row_idx: index indicating the row to load 58 | :return: dictionary with the row attribute as key and value as dictionary value 59 | """ 60 | row = {} 61 | row[row_values[0]] = row_values[1] 62 | return row 63 | -------------------------------------------------------------------------------- /xls2define/ValueLevel.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_0 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class ValueLevel(define_object.DefineObject): 6 | """ create a Define-XML v2.0 ValueListDef element object """ 7 | def __init__(self): 8 | super().__init__() 9 | self.lookup_oid = None 10 | self.vld = None 11 | 12 | def create_define_objects(self, sheet, objects, lang): 13 | """ 14 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary 15 | :param sheet: xlrd Excel sheet object 16 | :param objects: dictionary of odmlib objects updated by this method 17 | :param lang: xml:lang setting for TranslatedText 18 | """ 19 | self.lang = lang 20 | self.sheet = sheet 21 | header = self.load_header(self.sheet.max_column) 22 | objects["ValueListDef"] = [] 23 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True): 24 | row_content = self.load_row(row, header) 25 | self._create_valuelistdef_object(row_content, objects) 26 | self._create_itemref_object(row_content) 27 | self._create_itemdef_object(row_content, objects) 28 | 29 | def _create_valuelistdef_object(self, row, objects): 30 | """ 31 | use the values from the ValueLevel worksheet row to create a ValueListDef odmlib object 32 | :param row: ValueList worksheet row values as a dictionary 33 | :param objects: dictionary of odmlib objects updated by this method 34 | """ 35 | item_oid = self.generate_oid(["IT", row["Dataset"], row["Variable"]]) 36 | if item_oid != self.lookup_oid: 37 | self.lookup_oid = item_oid 38 | oid = self.generate_oid(["VL", row["Dataset"], row["Variable"]]) 39 | self.vld = DEFINE.ValueListDef(OID=oid) 40 | objects["ValueListDef"].append(self.vld) 41 | 42 | def _create_itemref_object(self, row): 43 | """ 44 | use the values from the ValueLevel worksheet row to create ItemRef objects for ValueListDef 45 | :param row: ValueList worksheet row values as a dictionary 46 | """ 47 | oid = self.generate_oid(["IT", row["Where Clause"][3:]]) 48 | attr = {"ItemOID": oid, "Mandatory": row["Mandatory"], "OrderNumber": int(row["Order"])} 49 | if row.get("Method"): 50 | attr["MethodOID"] = self.generate_oid(["MT", row["Method"]]) 51 | item = DEFINE.ItemRef(**attr) 52 | wc = DEFINE.WhereClauseRef(WhereClauseOID=row["Where Clause"]) 53 | item.WhereClauseRef.append(wc) 54 | self.vld.ItemRef.append(item) 55 | 56 | def _create_itemdef_object(self, row, objects): 57 | """ 58 | use the values from the ValueLevel worksheet row to create ItemDef objects referenced by ValueListDef ItemRefs 59 | :param row: ValueList worksheet row values as a dictionary 60 | :param objects: dictionary of odmlib objects updated by this method 61 | """ 62 | oid = self.generate_oid(["IT", row["Where Clause"][3:]]) 63 | attr = {"OID": oid, "Name": row["Variable"], "DataType": row["Data Type"], "SASFieldName": row["Variable"]} 64 | self._add_optional_itemdef_attributes(attr, row) 65 | item = DEFINE.ItemDef(**attr) 66 | self._add_optional_itemdef_elements(item, row) 67 | objects["ItemDef"].append(item) 68 | 69 | def _add_optional_itemdef_elements(self, item, row): 70 | """ 71 | use the values from the ValueList worksheet row to add the optional ELEMENTS to the ItemDef object 72 | :param item: ItemDef odmlib object updated with optional ELEMENTS 73 | :param row: ValueList worksheet row values as a dictionary 74 | """ 75 | if row.get("Codelist"): 76 | cl = DEFINE.CodeListRef(CodeListOID=self.generate_oid(["CL", row["Codelist"]])) 77 | item.CodeListRef = cl 78 | item.Origin = DEFINE.Origin(Type=row["Origin"]) 79 | if row.get("Pages"): 80 | dr = DEFINE.DocumentRef(leafID="LF.blankcrf") 81 | pdf = DEFINE.PDFPageRef(PageRefs=row["Pages"], Type="PhysicalRef") 82 | dr.PDFPageRef.append(pdf) 83 | item.Origin.DocumentRef.append(dr) 84 | if row.get("Predecessor"): 85 | item.Origin.Description = DEFINE.Description() 86 | tt = DEFINE.TranslatedText(_content=row["Predecessor"]) 87 | item.Origin.Description.TranslatedText.append(tt) 88 | 89 | def _add_optional_itemdef_attributes(self, attr, row): 90 | """ 91 | use the values from the ValueList worksheet row to add the optional attributes to the ItemDef object 92 | :param item: ItemDef odmlib object updated with optional attributes 93 | :param row: ValueList worksheet row values as a dictionary 94 | """ 95 | if row.get("Length"): 96 | attr["Length"] = row["Length"] 97 | if row.get("Significant Digits"): 98 | attr["SignificantDigits"] = row["Significant Digits"] 99 | if row.get("Format"): 100 | attr["DisplayFormat"] = row["Format"] 101 | if row.get("Comment"): 102 | attr["CommentOID"] = self.generate_oid(["COM", row["Comment"]]) 103 | -------------------------------------------------------------------------------- /xls2define/Variables.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_0 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class Variables(define_object.DefineObject): 6 | """ create a Define-XML v2.0 ItemDef element object """ 7 | def __init__(self): 8 | super().__init__() 9 | self.lookup_oid = None 10 | self.igd = None 11 | 12 | def create_define_objects(self, sheet, objects, lang): 13 | """ 14 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary 15 | :param sheet: xlrd Excel sheet object 16 | :param objects: dictionary of odmlib objects updated by this method 17 | :param lang: xml:lang setting for TranslatedText 18 | """ 19 | self.lang = lang 20 | self.sheet = sheet 21 | header = self.load_header(self.sheet.max_column) 22 | objects["ItemDef"] = [] 23 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True): 24 | row_content = self.load_row(row, header) 25 | item = self._create_itemdef_object(row_content) 26 | self._create_itemref_object(row_content, objects) 27 | objects["ItemDef"].append(item) 28 | self._create_leaf_objects(objects) 29 | 30 | def _create_itemdef_object(self, row): 31 | """ 32 | use the values from the Variables worksheet row to create a ItemDef odmlib object 33 | :param row: Variables worksheet row values as a dictionary 34 | :return: odmlib ItemDef object 35 | """ 36 | oid = self.generate_oid(["IT", row["Dataset"], row["Variable"]]) 37 | attr = {"OID": oid, "Name": row["Variable"], "DataType": row["Data Type"], "SASFieldName": row["Variable"]} 38 | self._add_optional_itemdef_attributes(attr, row) 39 | item = DEFINE.ItemDef(**attr) 40 | tt = DEFINE.TranslatedText(_content=row["Label"], lang=self.lang) 41 | item.Description = DEFINE.Description() 42 | item.Description.TranslatedText.append(tt) 43 | self._add_optional_itemdef_elements(item, row) 44 | return item 45 | 46 | def _add_optional_itemdef_elements(self, item, row): 47 | """ 48 | use the values from the Variables worksheet row to add the optional ELEMENTS to the ItemDef object 49 | :param item: ItemDef odmlib object updated with optional ELEMENTS 50 | :param row: Variables worksheet row values as a dictionary 51 | """ 52 | if row.get("CodeList"): 53 | cl = DEFINE.CodeListRef(CodeListOID=row["CodeList"]) 54 | item.CodeListRef = cl 55 | item.Origin = DEFINE.Origin(Type=row["Origin"]) 56 | if row.get("Pages"): 57 | dr = DEFINE.DocumentRef(leafID="LF.blankcrf") 58 | pr = DEFINE.PDFPageRef(PageRefs=row["Pages"], Type="PhysicalRef") 59 | dr.PDFPageRef.append(pr) 60 | item.Origin.DocumentRef.append(dr) 61 | if row.get("Predecessor"): 62 | item.Origin.Description = DEFINE.Description() 63 | tt = DEFINE.TranslatedText(_content=row["Predecessor"]) 64 | item.Origin.Description.TranslatedText.append(tt) 65 | if row.get("Valuelist"): 66 | vl = DEFINE.ValueListRef(ValueListOID=row["Valuelist"]) 67 | item.ValueListRef = vl 68 | 69 | def _add_optional_itemdef_attributes(self, attr, row): 70 | """ 71 | use the values from the Variables worksheet row to add the optional attributes to the ItemDef object 72 | :param item: ItemDef odmlib object updated with optional attributes 73 | :param row: Variables worksheet row values as a dictionary 74 | """ 75 | if row.get("Length"): 76 | attr["Length"] = row["Length"] 77 | if row.get("Significant Digits"): 78 | attr["SignificantDigits"] = row["Significant Digits"] 79 | if row.get("Format"): 80 | attr["DisplayFormat"] = row["Format"] 81 | if row.get("Comment"): 82 | attr["CommentOID"] = row["Comment"] 83 | 84 | def _create_itemref_object(self, row, objects): 85 | """ 86 | use the values from the Variables worksheet row to create the ItemRef object and add it to ItemGroupDef 87 | :param row: Variables worksheet row values as a dictionary 88 | :param objects: dictionary of odmlib objects updated by this method 89 | """ 90 | dataset_oid = self.generate_oid(["IG", row["Dataset"]]) 91 | if dataset_oid != self.lookup_oid: 92 | self.lookup_oid = dataset_oid 93 | self.igd = self.find_object(objects["ItemGroupDef"], self.lookup_oid) 94 | if self.igd is None: 95 | raise ValueError(f"ItemGroupDef with OID {dataset_oid} is missing from the Datasets tab") 96 | oid = self.generate_oid(["IT", row["Dataset"], row["Variable"]]) 97 | attr = {"ItemOID": oid, "Mandatory": row["Mandatory"]} 98 | self._add_optional_itemref_attributes(attr, row) 99 | item = DEFINE.ItemRef(**attr) 100 | self.igd.ItemRef.append(item) 101 | 102 | def _add_optional_itemref_attributes(self, attr, row): 103 | """ 104 | use the values from the Variables worksheet row to add the optional attributes to the attr dictionary 105 | :param attr: ItemRef object attributes to update with optional values 106 | :param row: Variables worksheet row values as a dictionary 107 | """ 108 | if row.get("Method"): 109 | attr["MethodOID"] = row["Method"] 110 | if row.get("Order"): 111 | attr["OrderNumber"] = int(row["Order"]) 112 | if row.get("KeySequence"): 113 | attr["KeySequence"] = int(row["KeySequence"]) 114 | if row.get("Role"): 115 | attr["Role"] = row["Role"] 116 | 117 | def _create_leaf_objects(self, objects): 118 | """ 119 | each ItemGroupDef object in objects is updated to add a leaf object 120 | :param objects: dictionary of odmlib objects updated by this method 121 | """ 122 | for igd in objects["ItemGroupDef"]: 123 | id = self.generate_oid(["LF", igd.Name]) 124 | xpt_name = igd.Name + ".xpt" 125 | leaf = DEFINE.leaf(ID=id, href=xpt_name.lower()) 126 | title = DEFINE.title(_content=xpt_name.lower()) 127 | leaf.title = title 128 | igd.leaf = leaf 129 | -------------------------------------------------------------------------------- /xls2define/WhereClauses.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_0 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class WhereClauses(define_object.DefineObject): 6 | """ create a Define-XML v2.0 WhereClauseDef element object """ 7 | def __init__(self): 8 | super().__init__() 9 | 10 | def create_define_objects(self, sheet, objects, lang): 11 | """ 12 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary 13 | :param sheet: xlrd Excel sheet object 14 | :param objects: dictionary of odmlib objects updated by this method 15 | :param lang: xml:lang setting for TranslatedText 16 | """ 17 | self.lang = lang 18 | self.sheet = sheet 19 | header = self.load_header(self.sheet.max_column) 20 | objects["WhereClauseDef"] = [] 21 | prev_oid = "" 22 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True): 23 | row_content = self.load_row(row, header) 24 | # if the current id is the same as the previous, we're just adding another range_check 25 | oid = row_content["OID"] 26 | if oid != prev_oid: 27 | wcd = self._create_whereclausedef_object(row_content) 28 | objects["WhereClauseDef"].append(wcd) 29 | prev_oid = oid 30 | else: 31 | rc = self._create_rangecheck(row_content) 32 | objects["WhereClauseDef"][-1].RangeCheck.append(rc) 33 | 34 | def _create_whereclausedef_object(self, row): 35 | """ 36 | use the values from the WhereClauses worksheet row to create a WhereClauseDef odmlib object 37 | :param row: WhereClauses worksheet row values as a dictionary 38 | :return: a WhereClause odmlib object 39 | """ 40 | attr = {"OID": row["OID"]} 41 | if row.get("Comment"): 42 | attr["CommentOID"] = self.generate_oid(["COM", row["Comment"]]) 43 | wc = DEFINE.WhereClauseDef(**attr) 44 | item_oid = self.generate_oid(["IT", row["Dataset"], row["Variable"]]) 45 | rc_attr = {"SoftHard": "Soft", "ItemOID": item_oid, "Comparator": row["Comparator"]} 46 | rc = DEFINE.RangeCheck(**rc_attr) 47 | for value in row["Value"].split(", "): 48 | cv = DEFINE.CheckValue(_content=value) 49 | rc.CheckValue.append(cv) 50 | wc.RangeCheck.append(rc) 51 | return wc 52 | 53 | def _create_rangecheck(self, row): 54 | """ 55 | use the values from the WhereClauses worksheet to create a RangeCheck odmlinb object 56 | :param row: WhereClauses worksheet row values as a dictionary 57 | :return: a RangeCheck odmlib object 58 | """ 59 | item_oid = self.generate_oid(["IT", row["Dataset"], row["Variable"]]) 60 | rc_attr = {"SoftHard": "Soft", "ItemOID": item_oid, "Comparator": row["Comparator"]} 61 | rc = DEFINE.RangeCheck(**rc_attr) 62 | for value in row["Value"].split(", "): 63 | cv = DEFINE.CheckValue(_content=value) 64 | rc.CheckValue.append(cv) 65 | return rc 66 | -------------------------------------------------------------------------------- /xls2define/data/SDTM-Metadata-Worksheet.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/xls2define/data/SDTM-Metadata-Worksheet.xlsx -------------------------------------------------------------------------------- /xls2define/data/odmlib-define-metadata.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/xls2define/data/odmlib-define-metadata.xlsx -------------------------------------------------------------------------------- /xls2define/define_object.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | 3 | 4 | class DefineObject(ABC): 5 | def __init__(self): 6 | self.sheet = None 7 | self.lang = "en" 8 | 9 | def load_row(self, row_values, header): 10 | row = {} 11 | for cell in zip(header, row_values): 12 | row[cell[0]] = cell[1] 13 | return row 14 | 15 | def load_header(self, num_cols): 16 | header = [] 17 | for row in self.sheet.iter_rows(min_row=1, max_row=1, min_col=1, max_col=num_cols, values_only=True): 18 | header = list(row) 19 | return header 20 | 21 | def generate_oid(self, descriptors): 22 | # ensure the element type prefix is not already pre-pended to the OID 23 | if len(descriptors) > 1 and descriptors[1].startswith(descriptors[0] + "."): 24 | oid = ".".join(descriptors[1:]).upper() 25 | else: 26 | oid = ".".join(descriptors).upper() 27 | return oid 28 | 29 | def find_object(self, objects, oid): 30 | for o in objects: 31 | if oid == o.OID: 32 | return o 33 | return None 34 | -------------------------------------------------------------------------------- /xls2define/odm.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_0 import model as DEFINE 2 | import datetime 3 | 4 | 5 | class ODM: 6 | def __init__(self): 7 | self.attrs = self._set_attributes() 8 | 9 | def create_define_objects(self): 10 | odm = DEFINE.ODM(**self.attrs) 11 | return odm 12 | 13 | def _set_attributes(self): 14 | return {"FileOID": "ODM.DEFINE.TEST.001", 15 | "AsOfDateTime": self._set_datetime(), 16 | "CreationDateTime": self._set_datetime(), "ODMVersion": "1.3.2", "FileType": "Snapshot", 17 | "Originator": "Sam Hume", "SourceSystem": "ODMLib", "SourceSystemVersion": "0.1"} 18 | 19 | def _set_datetime(self): 20 | """return the current datetime in ISO 8601 format""" 21 | return datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat() 22 | -------------------------------------------------------------------------------- /xls2define/requirements.txt: -------------------------------------------------------------------------------- 1 | odmlib>=0.1.4 2 | xmlschema>=1.4.1 3 | openpyxl>=3.0.5 -------------------------------------------------------------------------------- /xls2define/supporting_docs.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_0 import model as DEFINE 2 | 3 | 4 | class SupportingDocuments: 5 | 6 | @staticmethod 7 | def create_annotatedcrf(): 8 | acrf = DEFINE.AnnotatedCRF() 9 | dr = DEFINE.DocumentRef(leafID='LF.blankcrf') 10 | acrf.DocumentRef = dr 11 | return acrf 12 | 13 | @staticmethod 14 | def create_supplementaldoc(): 15 | sdoc = DEFINE.SupplementalDoc() 16 | dr1 = DEFINE.DocumentRef(leafID='LF.ReviewersGuide') 17 | sdoc.DocumentRef.append(dr1) 18 | dr2 = DEFINE.DocumentRef(leafID='LF.ComplexAlgorithms') 19 | sdoc.DocumentRef.append(dr2) 20 | return sdoc 21 | -------------------------------------------------------------------------------- /xlsx2define2-1/CodeLists.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_1 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class CodeLists(define_object.DefineObject): 6 | """ create a Define-XML v2.1 CodeList element object """ 7 | def __init__(self): 8 | super().__init__() 9 | self.igd = None 10 | 11 | def create_define_objects(self, sheet, objects, lang, acrf): 12 | """ 13 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary 14 | :param sheet: xlrd Excel sheet object 15 | :param objects: dictionary of odmlib objects updated by this method 16 | :param lang: xml:lang setting for TranslatedText 17 | 18 | """ 19 | self.lang = lang 20 | self.sheet = sheet 21 | header = self.load_header(self.sheet.max_column) 22 | objects["CodeList"] = [] 23 | cl_c_code = "" 24 | cl_name = "" 25 | is_decode_item = False 26 | cl = None 27 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True): 28 | row_content = self.load_row(row, header) 29 | # assumes when this is a new code list the names will not be the same 30 | if row_content["Name"] != cl_name: 31 | if cl_name: 32 | self._add_previous_codelist_to_objects(cl_c_code, cl, objects) 33 | cl = self._create_codelist_object(row_content) 34 | cl_c_code = row_content.get("NCI Codelist Code") 35 | cl_name = row_content.get("Name") 36 | # assumption: if the first term has a decode element then create the list with decodes 37 | if row_content["Decoded Value"]: 38 | is_decode_item = True 39 | else: 40 | is_decode_item = False 41 | if is_decode_item: 42 | cl_item = self._create_codelistitem_object(row_content) 43 | cl.CodeListItem.append(cl_item) 44 | else: 45 | en_item = self._create_enumerateditem_object(row_content) 46 | cl.EnumeratedItem.append(en_item) 47 | self._add_previous_codelist_to_objects(cl_c_code, cl, objects) 48 | 49 | def _add_previous_codelist_to_objects(self, cl_c_code, cl, objects): 50 | """ 51 | finish creating a codelist by adding Alias of a c-code exists and adding the object to the list of codelists 52 | :param row_idx: positive integer indicating which row - skip processing the first row 53 | :param cl_c_code: codelist c-code 54 | :param cl: odmlib codelist object 55 | :param objects: dictionary of odmlib objects created from the Excel input file and updated in this method 56 | """ 57 | if cl_c_code: 58 | alias = DEFINE.Alias(Context="nci:ExtCodeID", Name=cl_c_code) 59 | cl.Alias.append(alias) 60 | # add the code list to the list of code list objects 61 | if cl: 62 | objects["CodeList"].append(cl) 63 | 64 | def _create_codelist_object(self, row): 65 | """ 66 | using the row from the Codelists worksheet create an odmlib CodeList object 67 | :param row: dictionary with contents from a row in the Codelists worksheet 68 | :return: CodeList odmlib object 69 | """ 70 | attr = {"OID": row["OID"], "Name": row["Name"], "DataType": row["Data Type"]} 71 | if row.get("Comment"): 72 | attr["CommentOID"] = row["Comment"] 73 | if row.get("IsNonStandard"): 74 | attr["IsNonStandard"] = row["IsNonStandard"] 75 | if row.get("StandardOID"): 76 | attr["StandardOID"] = row["StandardOID"] 77 | cl = DEFINE.CodeList(**attr) 78 | return cl 79 | 80 | def _create_enumerateditem_object(self, row): 81 | """ 82 | using the row from the Codelists worksheet create an odmlib EnumeratedItem object 83 | :param row: dictionary with contents from a row in the Codelists worksheet 84 | :return: EnumeratedItem odmlib object 85 | """ 86 | attr = {"CodedValue": row["Term"]} 87 | if row.get("Order"): 88 | attr["OrderNumber"] = row["Order"] 89 | en_item = DEFINE.EnumeratedItem(**attr) 90 | if row.get("NCI Term Code"): 91 | alias = DEFINE.Alias(Context="nci:ExtCodeID", Name=row["NCI Term Code"]) 92 | en_item.Alias.append(alias) 93 | return en_item 94 | 95 | def _create_codelistitem_object(self, row): 96 | """ 97 | using the row from the Codelists worksheet create an odmlib CodeListItem object 98 | :param row: dictionary with contents from a row in the Codelists worksheet 99 | :return: CodeListItem odmlib object 100 | """ 101 | attr = {"CodedValue": row["Term"]} 102 | if row.get("Order"): 103 | attr["OrderNumber"] = row["Order"] 104 | cl_item = DEFINE.CodeListItem(**attr) 105 | decode = DEFINE.Decode() 106 | if row["Decoded Value"]: 107 | tt = DEFINE.TranslatedText(_content=row["Decoded Value"], lang="en") 108 | else: 109 | # if no decode for this term the use the submission value 110 | tt = DEFINE.TranslatedText(_content=row["Term"], lang="en") 111 | decode.TranslatedText.append(tt) 112 | cl_item.Decode = decode 113 | if row.get("NCI Term Code"): 114 | alias = DEFINE.Alias(Context="nci:ExtCodeID", Name=row["NCI Term Code"]) 115 | cl_item.Alias.append(alias) 116 | return cl_item 117 | -------------------------------------------------------------------------------- /xlsx2define2-1/Comments.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_1 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class Comments(define_object.DefineObject): 6 | """ create a Define-XML v2.0 CommentDef element object """ 7 | def __init__(self): 8 | super().__init__() 9 | self.lookup_oid = None 10 | self.igd = None 11 | 12 | def create_define_objects(self, sheet, objects, lang, acrf): 13 | """ 14 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary 15 | :param sheet: xlrd Excel sheet object 16 | :param objects: dictionary of odmlib objects updated by this method 17 | :param lang: xml:lang setting for TranslatedText 18 | """ 19 | self.lang = lang 20 | self.sheet = sheet 21 | header = self.load_header(self.sheet.max_column) 22 | objects["CommentDef"] = [] 23 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True): 24 | row_content = self.load_row(row, header) 25 | comment = self._create_commentdef_object(row_content) 26 | objects["CommentDef"].append(comment) 27 | 28 | def _create_commentdef_object(self, row): 29 | """ 30 | use the values from the Comments worksheet row to create a CommentDef odmlib object 31 | :param row: Comments worksheet row values as a dictionary 32 | :return: a CommentDef odmlib object 33 | """ 34 | com = DEFINE.CommentDef(OID=row["OID"]) 35 | tt = DEFINE.TranslatedText(_content=row["Description"], lang=self.lang) 36 | com.Description = DEFINE.Description() 37 | com.Description.TranslatedText.append(tt) 38 | if row.get("Document"): 39 | self._add_document(row, com) 40 | return com 41 | 42 | def _add_document(self, row, com): 43 | """ 44 | creates a DocumentRef object using a row from the Comments Worksheet 45 | :param row: Comments worksheet row values as a dictionary 46 | :param method: odmlib CommentDef object that gets updated with a DocumentRef object 47 | """ 48 | dr = DEFINE.DocumentRef(leafID=row["Document"]) 49 | if row.get("Pages"): 50 | pdf = DEFINE.PDFPageRef(PageRefs=row["Pages"], Type="NamedDestination") 51 | dr.PDFPageRef.append(pdf) 52 | com.DocumentRef.append(dr) 53 | -------------------------------------------------------------------------------- /xlsx2define2-1/Datasets.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_1 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class Datasets(define_object.DefineObject): 6 | """ create a Define-XML v2.0 ItemGroupDef element object """ 7 | def __init__(self): 8 | super().__init__() 9 | 10 | def create_define_objects(self, sheet, objects, lang, acrf): 11 | """ 12 | parse each row in the Excel sheet and create odmlib objects to return in the objects dictionary 13 | :param sheet: xlrd Excel sheet object 14 | :param objects: dictionary of odmlib objects updated by this method 15 | :param lang: xml:lang setting for TranslatedText 16 | """ 17 | self.lang = lang 18 | self.sheet = sheet 19 | num_cols = self.sheet.max_column 20 | header = self.load_header(num_cols) 21 | objects["ItemGroupDef"] = [] 22 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=num_cols, values_only=True): 23 | row_content = self.load_row(row, header) 24 | itg = self._create_itemgroupdef_object(row_content) 25 | objects["ItemGroupDef"].append(itg) 26 | 27 | def _create_itemgroupdef_object(self, row): 28 | """ 29 | use the values from the Dataset worksheet row to create a ItemGroupDef odmlib object 30 | NOTE: since this example is based on the SDTMIG sub-class has not been implemented 31 | :param row: Datasets worksheet row values as a dictionary 32 | :return: odmlib ItemGroupDef object 33 | """ 34 | oid = self.generate_oid(["IG", row["Dataset"]]) 35 | attr = {"OID": oid, "Name": row["Dataset"], "Repeating": row["Repeating"], "Domain": row["Dataset"], 36 | "SASDatasetName": row["Dataset"], "IsReferenceData": row["Reference Data"], "Purpose": row["Purpose"], 37 | "Structure": row["Structure"], "ArchiveLocationID": ".".join(["LF", row["Dataset"]])} 38 | if row.get("Comment"): 39 | attr["CommentOID"] = row["Comment"] 40 | if row.get("IsNonStandard"): 41 | attr["IsNonStandard"] = row["IsNonStandard"] 42 | if row.get("StandardOID"): 43 | attr["StandardOID"] = row["StandardOID"] 44 | if row.get("HasNoData"): 45 | attr["HasNoData"] = row["HasNoData"] 46 | igd = DEFINE.ItemGroupDef(**attr) 47 | tt = DEFINE.TranslatedText(_content=row["Description"], lang=self.lang) 48 | igd.Description = DEFINE.Description() 49 | igd.Description.TranslatedText.append(tt) 50 | # spreadsheet has up to 1 Class per dataset, but spec allows for nested sub-classes 51 | if row.get("Class"): 52 | igd.Class = DEFINE.Class(Name=row["Class"]) 53 | return igd 54 | -------------------------------------------------------------------------------- /xlsx2define2-1/Dictionaries.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_1 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class Dictionaries(define_object.DefineObject): 6 | """ create a Define-XML v2.0 CodeList element object """ 7 | def __init__(self): 8 | super().__init__() 9 | 10 | def create_define_objects(self, sheet, objects, lang, acrf): 11 | """ 12 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary 13 | :param sheet: xlrd Excel sheet object 14 | :param objects: dictionary of odmlib objects updated by this method 15 | :param lang: xml:lang setting for TranslatedText 16 | """ 17 | self.lang = lang 18 | self.sheet = sheet 19 | header = self.load_header(self.sheet.max_column) 20 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True): 21 | row_content = self.load_row(row, header) 22 | cl = self._create_codelist_object(row_content) 23 | objects["CodeList"].append(cl) 24 | 25 | def _create_codelist_object(self, row): 26 | """ 27 | using the row from the Dictionaries worksheet create an odmlib CodeList object and add ExternalCodeList 28 | :param row: dictionary with contents from a row in the Dictionaries worksheet 29 | :return: CodeList odmlib object with ExternalCodeList 30 | """ 31 | cl = DEFINE.CodeList(OID=row["OID"], Name=row["Name"], DataType=row["Data Type"]) 32 | attr = {"Dictionary": row["Dictionary"]} 33 | if row.get("Version"): 34 | attr["Version"] = row["Version"] 35 | exd = DEFINE.ExternalCodeList(**attr) 36 | cl.ExternalCodeList = exd 37 | return cl 38 | -------------------------------------------------------------------------------- /xlsx2define2-1/Documents.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_1 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class Documents(define_object.DefineObject): 6 | """ create a Define-XML v2.0 leaf element object """ 7 | def __init__(self): 8 | super().__init__() 9 | 10 | def create_define_objects(self, sheet, objects, lang, acrf): 11 | """ 12 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary 13 | :param sheet: xlrd Excel sheet object 14 | :param objects: dictionary of odmlib objects updated by this method 15 | :param lang: xml:lang setting for TranslatedText 16 | """ 17 | self.lang = lang 18 | self.sheet = sheet 19 | header = self.load_header(self.sheet.max_column) 20 | objects["leaf"] = [] 21 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True): 22 | row_content = self.load_row(row, header) 23 | leaf = self._create_leaf_object(row_content) 24 | objects["leaf"].append(leaf) 25 | 26 | def _create_leaf_object(self, row): 27 | """ 28 | use the values from the Documents worksheet row to create a leaf odmlib object 29 | :param row: Documents worksheet row values as a dictionary 30 | :return: a leaf odmlib object 31 | """ 32 | lf = DEFINE.leaf(ID=row["ID"], href=row["Href"]) 33 | title = DEFINE.title(_content=row["Title"]) 34 | lf.title = title 35 | return lf 36 | -------------------------------------------------------------------------------- /xlsx2define2-1/Methods.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_1 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class Methods(define_object.DefineObject): 6 | """ create a Define-XML v2.0 MethodDef element object """ 7 | def __init__(self): 8 | super().__init__() 9 | 10 | def create_define_objects(self, sheet, objects, lang, acrf): 11 | """ 12 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary 13 | :param sheet: xlrd Excel sheet object 14 | :param objects: dictionary of odmlib objects updated by this method 15 | :param lang: xml:lang setting for TranslatedText 16 | """ 17 | self.lang = lang 18 | self.sheet = sheet 19 | header = self.load_header(self.sheet.max_column) 20 | objects["MethodDef"] = [] 21 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True): 22 | row_content = self.load_row(row, header) 23 | item = self._create_methoddef_object(row_content) 24 | objects["MethodDef"].append(item) 25 | 26 | def _create_methoddef_object(self, row): 27 | """ 28 | use the values from the Methods worksheet row to create a MethodDef odmlib object 29 | :param row: Methods worksheet row values as a dictionary 30 | :return: a MethodDef odmlib object 31 | """ 32 | attr = {"OID": row["OID"], "Name": row["Name"], "Type": row["Type"]} 33 | method = DEFINE.MethodDef(**attr) 34 | tt = DEFINE.TranslatedText(_content=row["Description"], lang=self.lang) 35 | method.Description = DEFINE.Description() 36 | method.Description.TranslatedText.append(tt) 37 | if row.get("Expression Context"): 38 | method.FormalExpression.append(DEFINE.FormalExpression(Context=row["Expression Context"], _content=row["Expression Code"])) 39 | if row.get("Document"): 40 | self._add_document(row, method) 41 | return method 42 | 43 | def _add_document(self, row, method): 44 | """ 45 | creates a DocumentRef object using a row from the Methods Worksheet 46 | :param row: Methods worksheet row values as a dictionary 47 | :param method: odmlib MethodDef object that gets updated with a DocumentRef object 48 | """ 49 | dr = DEFINE.DocumentRef(leafID=row["Document"]) 50 | pdf = DEFINE.PDFPageRef(PageRefs=row["Pages"], Type="NamedDestination") 51 | dr.PDFPageRef.append(pdf) 52 | method.DocumentRef.append(dr) 53 | -------------------------------------------------------------------------------- /xlsx2define2-1/README.md: -------------------------------------------------------------------------------- 1 | # xlsx2define2-1 2 | 3 | ## Introduction 4 | The xlsx2define2-1 program is an odmlib example application that generates a Define-XML v2.1 file from 5 | an Excel spreadsheet that contains the study metadata needed to create the Define-XML file. The Exel 6 | spreadsheet version of the makes it easier for many to edit or create new content to include in a 7 | Define-XML v2.1 file. The companion define2-1-to-xlsx program takes the generated Define-XML file and creates 8 | a spreadsheet using the metadata. This example demonstrates some basic odmlib features. 9 | 10 | ## Getting Started 11 | To run xls2define.py from the command-line: 12 | 13 | `python xls2define.py -e ./data/odmlib-define-metadata.xlsx -d ./data/odmlib-roundtrip-define.xml` 14 | 15 | Or, to run it with both XML schema validation (-v) and some basic conformance checking (-c): 16 | 17 | `-v -c -e ./data/odmlib-define-metadata.xlsx -d ./data/odmlib-roundtrip-define.xml 18 | -s "/home/sam/standards/DefineV211/schema/cdisc-define-2.1/define2-1-0.xsd` 19 | 20 | The odmlib package must be installed to run xlsx2define2-1. See the 21 | [odmlib repository](https://github.com/swhume/odmlib) to install the odmlib source code and latest features. 22 | The odmlib package can also be installed from PyPi with the understanding that it is still in development 23 | so might not have everything available in the odmlib repository. It can be installed from PyPi using: 24 | 25 | 'pip install odmlib' 26 | 27 | The odmlib README provides instructions for getting started. 28 | 29 | ## Limitations 30 | The odmlib examples are basic programs intended to demonstrate some of the basic capabilities of odmlib. 31 | The examples are not complete, production ready applications. However, I'm happy to update these applications 32 | to accommodate new feature or bug fixes and will also review pull requests. 33 | 34 | The odmlib package is still in development. -------------------------------------------------------------------------------- /xlsx2define2-1/Standards.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_1 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class Standards(define_object.DefineObject): 6 | """ create a Define-XML v2.1 Standards element object """ 7 | def __init__(self): 8 | super().__init__() 9 | 10 | def create_define_objects(self, sheet, objects, lang, acrf): 11 | """ 12 | parse each row in the Standards Excel sheet and create odmlib objects to return in the objects dictionary 13 | :param sheet: Excel sheet object 14 | :param objects: dictionary of odmlib objects updated by this method 15 | :param lang: xml:lang setting for TranslatedText 16 | """ 17 | standards = DEFINE.Standards() 18 | self.lang = lang 19 | self.sheet = sheet 20 | header = self.load_header(self.sheet.max_column) 21 | objects["ItemGroupDef"] = [] 22 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True): 23 | if row[0]: 24 | row_content = self.load_row(row, header) 25 | std = self._create_standard_object(row_content) 26 | standards.Standard.append(std) 27 | objects["Standards"] = standards 28 | 29 | def _create_standard_object(self, row): 30 | """ 31 | use the values from the Standards worksheet row to create a Standard odmlib object 32 | :param row: Standards worksheet row values as a dictionary 33 | :return: odmlib Standard object 34 | """ 35 | attr = {"OID": row["OID"], "Name": row["Name"], "Type": row["Type"], "Version": str(row["Version"]), 36 | "Status": row["Status"]} 37 | if row.get("Publishing Set"): 38 | attr["PublishingSet"] = row["Publishing Set"] 39 | if row.get("Comment"): 40 | attr["CommentOID"] = row["Comment"] 41 | return DEFINE.Standard(**attr) 42 | -------------------------------------------------------------------------------- /xlsx2define2-1/Study.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_1 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class Study(define_object.DefineObject): 6 | """ create a Define-XML v2.1 Study element object and initialize the MetaDataVersion object """ 7 | def __init__(self): 8 | super().__init__() 9 | 10 | def create_define_objects(self, sheet, objects, lang, acrf): 11 | """ 12 | parse each row in the Excel sheet and create ODMLIB objects to return in the objects dictionary 13 | :param sheet: xlrd Excel sheet object 14 | :param objects: dictionary of ODMLIB objects updated by this method 15 | :param lang: xml:lang setting for TranslatedText 16 | """ 17 | self.lang = lang 18 | self.acrf = acrf 19 | self.sheet = sheet 20 | rows = {} 21 | for row in sheet.iter_rows(min_row=1, min_col=1, max_col=2, values_only=True): 22 | row_content = self._load_row(row) 23 | rows.update(row_content) 24 | self.lang = rows["Language"] 25 | self.acrf = rows["Annotated CRF"] 26 | objects["Study"] = self._create_study_object(rows) 27 | objects["MetaDataVersion"] = self._create_metadataversion_object(rows) 28 | 29 | def _create_study_object(self, rows): 30 | """ 31 | create the study ODMLIB object from the Study worksheet and return it 32 | :param rows: dictionary created from the rows in the study worksheet 33 | :return: odmlib Study object 34 | """ 35 | study_oid = self.generate_oid(['ODM', rows["StudyName"]]) 36 | study = DEFINE.Study(OID=study_oid) 37 | gv = DEFINE.GlobalVariables() 38 | gv.StudyName = DEFINE.StudyName(_content=rows["StudyName"]) 39 | gv.StudyDescription = DEFINE.StudyDescription(_content=rows["StudyDescription"]) 40 | gv.ProtocolName = DEFINE.ProtocolName(_content=rows["ProtocolName"]) 41 | study.GlobalVariables = gv 42 | return study 43 | 44 | def _create_metadataversion_object(self, rows): 45 | """ 46 | create the MetaDataVersion ODMLIB object from the Study worksheet and return it 47 | :param rows: dictionary created from the rows in the study worksheet 48 | :return: odmlib MetaDataVersion object 49 | """ 50 | mdv_oid = self.generate_oid(["MDV", rows["StudyName"]]) 51 | mdv = DEFINE.MetaDataVersion(OID=mdv_oid, Name="MDV " + rows["StudyName"], Description="Data Definitions for " 52 | + rows["StudyName"], DefineVersion="2.1.0") 53 | return mdv 54 | 55 | def _load_row(self, row_values): 56 | """ 57 | load the Study worksheet row and return a dictionary 58 | :param row_idx: index indicating the row to load 59 | :return: dictionary with the row attribute as key and value as dictionary value 60 | """ 61 | row = {} 62 | row[row_values[0]] = row_values[1] 63 | return row 64 | -------------------------------------------------------------------------------- /xlsx2define2-1/ValueLevel.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_1 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class ValueLevel(define_object.DefineObject): 6 | """ create a Define-XML v2.0 ValueListDef element object """ 7 | def __init__(self): 8 | super().__init__() 9 | self.lookup_oid = None 10 | self.vld = None 11 | 12 | def create_define_objects(self, sheet, objects, lang, acrf): 13 | """ 14 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary 15 | :param sheet: xlrd Excel sheet object 16 | :param objects: dictionary of odmlib objects updated by this method 17 | :param lang: xml:lang setting for TranslatedText 18 | """ 19 | self.lang = lang 20 | self.acrf = acrf 21 | self.sheet = sheet 22 | header = self.load_header(self.sheet.max_column) 23 | objects["ValueListDef"] = [] 24 | vl_oid = "" 25 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True): 26 | row_content = self.load_row(row, header) 27 | if vl_oid != row_content["OID"]: 28 | self._create_valuelistdef_object(row_content, objects) 29 | vl_oid = row_content["OID"] 30 | self._create_itemref_object(row_content) 31 | self._create_itemdef_object(row_content, objects) 32 | 33 | def _create_valuelistdef_object(self, row, objects): 34 | """ 35 | use the values from the ValueLevel worksheet row to create a ValueListDef odmlib object 36 | :param row: ValueList worksheet row values as a dictionary 37 | :param objects: dictionary of odmlib objects updated by this method 38 | """ 39 | self.vld = DEFINE.ValueListDef(OID=row["OID"]) 40 | objects["ValueListDef"].append(self.vld) 41 | 42 | def _create_itemref_object(self, row): 43 | """ 44 | use the values from the ValueLevel worksheet row to create ItemRef objects for ValueListDef 45 | :param row: ValueList worksheet row values as a dictionary 46 | """ 47 | # oid = self.generate_oid(["IT", row["Dataset"], row["Variable"], row["Where Clause"][3:]]) 48 | attr = {"ItemOID": row["ItemOID"], "Mandatory": row["Mandatory"], "OrderNumber": int(row["Order"])} 49 | if row.get("Method"): 50 | attr["MethodOID"] = self.generate_oid(["MT", row["Method"]]) 51 | item = DEFINE.ItemRef(**attr) 52 | wc = DEFINE.WhereClauseRef(WhereClauseOID=row["Where Clause"]) 53 | item.WhereClauseRef.append(wc) 54 | self.vld.ItemRef.append(item) 55 | 56 | def _create_itemdef_object(self, row, objects): 57 | """ 58 | use the values from the ValueLevel worksheet row to create ItemDef objects referenced by ValueListDef ItemRefs 59 | :param row: ValueList worksheet row values as a dictionary 60 | :param objects: dictionary of odmlib objects updated by this method 61 | """ 62 | # oid = self.generate_oid(["IT", row["Dataset"], row["Variable"], row["Where Clause"][3:]]) 63 | attr = {"OID": row["ItemOID"], "Name": row["Variable"], "DataType": row["Data Type"]} 64 | self._add_optional_itemdef_attributes(attr, row) 65 | item = DEFINE.ItemDef(**attr) 66 | self._add_optional_itemdef_elements(item, row) 67 | objects["ItemDef"].append(item) 68 | 69 | def _add_optional_itemdef_elements(self, item, row): 70 | """ 71 | use the values from the ValueList worksheet row to add the optional ELEMENTS to the ItemDef object 72 | :param item: ItemDef odmlib object updated with optional ELEMENTS 73 | :param row: ValueList worksheet row values as a dictionary 74 | """ 75 | if row.get("Codelist"): 76 | cl = DEFINE.CodeListRef(CodeListOID=row.get("Codelist")) 77 | item.CodeListRef = cl 78 | if row.get("Origin Type"): 79 | # spreadsheet input only provides for 1 Origin, but multiple are supported by the spec 80 | attr = {"Type": row["Origin Type"]} 81 | if row.get("Origin Source"): 82 | attr["Source"] = row["Origin Source"] 83 | item.Origin.append(DEFINE.Origin(**attr)) 84 | if row.get("Predecessor"): 85 | item.Origin[0].Description = DEFINE.Description() 86 | item.Origin[0].Description.TranslatedText.append(DEFINE.TranslatedText(_content=row["Predecessor"])) 87 | if row.get("Pages"): 88 | dr = DEFINE.DocumentRef(leafID=self.acrf) 89 | dr.PDFPageRef.append(DEFINE.PDFPageRef(PageRefs=row["Pages"], Type="PhysicalRef")) 90 | item.Origin[0].DocumentRef.append(dr) 91 | 92 | def _add_optional_itemdef_attributes(self, attr, row): 93 | """ 94 | use the values from the ValueList worksheet row to add the optional attributes to the ItemDef object 95 | :param item: ItemDef odmlib object updated with optional attributes 96 | :param row: ValueList worksheet row values as a dictionary 97 | """ 98 | if len(row["Variable"]) < 9: 99 | attr["SASFieldName"] = row["Variable"] 100 | else: 101 | print(f"Skipping SASFieldName for ItemDef {row['Variable']} because it exceeds the 8 character limit") 102 | if row.get("Length"): 103 | attr["Length"] = row["Length"] 104 | if row.get("Significant Digits"): 105 | attr["SignificantDigits"] = row["Significant Digits"] 106 | if row.get("Format"): 107 | attr["DisplayFormat"] = row["Format"] 108 | if row.get("Comment"): 109 | attr["CommentOID"] = self.generate_oid(["COM", row["Comment"]]) 110 | -------------------------------------------------------------------------------- /xlsx2define2-1/WhereClauses.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_1 import model as DEFINE 2 | import define_object 3 | 4 | 5 | class WhereClauses(define_object.DefineObject): 6 | """ create a Define-XML v2.0 WhereClauseDef element object """ 7 | def __init__(self): 8 | super().__init__() 9 | 10 | def create_define_objects(self, sheet, objects, lang, acrf): 11 | """ 12 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary 13 | :param sheet: xlrd Excel sheet object 14 | :param objects: dictionary of odmlib objects updated by this method 15 | :param lang: xml:lang setting for TranslatedText 16 | """ 17 | self.lang = lang 18 | self.sheet = sheet 19 | header = self.load_header(self.sheet.max_column) 20 | objects["WhereClauseDef"] = [] 21 | prev_oid = "" 22 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True): 23 | row_content = self.load_row(row, header) 24 | # if the current id is the same as the previous, we're just adding another range_check 25 | oid = row_content["OID"] 26 | if oid != prev_oid: 27 | wcd = self._create_whereclausedef_object(row_content) 28 | objects["WhereClauseDef"].append(wcd) 29 | prev_oid = oid 30 | else: 31 | rc = self._create_rangecheck(row_content) 32 | objects["WhereClauseDef"][-1].RangeCheck.append(rc) 33 | 34 | def _create_whereclausedef_object(self, row): 35 | """ 36 | use the values from the WhereClauses worksheet row to create a WhereClauseDef odmlib object 37 | :param row: WhereClauses worksheet row values as a dictionary 38 | :return: a WhereClause odmlib object 39 | """ 40 | attr = {"OID": row["OID"]} 41 | if row.get("Comment"): 42 | attr["CommentOID"] = self.generate_oid(["COM", row["Comment"]]) 43 | wc = DEFINE.WhereClauseDef(**attr) 44 | item_oid = self.generate_oid(["IT", row["Dataset"], row["Variable"]]) 45 | rc_attr = {"SoftHard": "Soft", "ItemOID": item_oid, "Comparator": row["Comparator"]} 46 | rc = DEFINE.RangeCheck(**rc_attr) 47 | if row["Value"]: 48 | for value in row["Value"].split(", "): 49 | cv = DEFINE.CheckValue(_content=value) 50 | rc.CheckValue.append(cv) 51 | else: 52 | cv = DEFINE.CheckValue(_content="") 53 | rc.CheckValue.append(cv) 54 | wc.RangeCheck.append(rc) 55 | return wc 56 | 57 | def _create_rangecheck(self, row): 58 | """ 59 | use the values from the WhereClauses worksheet to create a RangeCheck odmlinb object 60 | :param row: WhereClauses worksheet row values as a dictionary 61 | :return: a RangeCheck odmlib object 62 | """ 63 | item_oid = self.generate_oid(["IT", row["Dataset"], row["Variable"]]) 64 | rc_attr = {"SoftHard": "Soft", "ItemOID": item_oid, "Comparator": row["Comparator"]} 65 | rc = DEFINE.RangeCheck(**rc_attr) 66 | for value in row["Value"].split(", "): 67 | cv = DEFINE.CheckValue(_content=value) 68 | rc.CheckValue.append(cv) 69 | return rc 70 | -------------------------------------------------------------------------------- /xlsx2define2-1/data/odmlib-define-metadata.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/xlsx2define2-1/data/odmlib-define-metadata.xlsx -------------------------------------------------------------------------------- /xlsx2define2-1/define_object.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | 3 | 4 | class DefineObject(ABC): 5 | def __init__(self): 6 | self.sheet = None 7 | self.lang = "en" 8 | 9 | def load_row(self, row_values, header): 10 | row = {} 11 | for cell in zip(header, row_values): 12 | row[cell[0]] = cell[1] 13 | return row 14 | 15 | def load_header(self, num_cols): 16 | header = [] 17 | for row in self.sheet.iter_rows(min_row=1, max_row=1, min_col=1, max_col=num_cols, values_only=True): 18 | header = list(row) 19 | return header 20 | 21 | def generate_oid(self, descriptors): 22 | # ensure the element type prefix is not already pre-pended to the OID 23 | if len(descriptors) > 1 and descriptors[1].startswith(descriptors[0] + "."): 24 | oid = ".".join(descriptors[1:]).upper() 25 | else: 26 | oid = ".".join(descriptors).upper() 27 | return oid 28 | 29 | def find_object(self, objects, oid): 30 | for o in objects: 31 | if oid == o.OID: 32 | return o 33 | return None 34 | -------------------------------------------------------------------------------- /xlsx2define2-1/odm.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_1 import model as DEFINE 2 | import datetime 3 | 4 | 5 | class ODM: 6 | def __init__(self): 7 | self.attrs = self._set_attributes() 8 | 9 | def create_define_objects(self): 10 | odm = DEFINE.ODM(**self.attrs) 11 | return odm 12 | 13 | def _set_attributes(self): 14 | return {"FileOID": "ODM.DEFINE21.TEST.001", "AsOfDateTime": self._set_datetime(), 15 | "CreationDateTime": self._set_datetime(), "ODMVersion": "1.3.2", "FileType": "Snapshot", 16 | "Originator": "Sam Hume", "SourceSystem": "odmlib", "SourceSystemVersion": "0.2", "Context": "Other"} 17 | 18 | def _set_datetime(self): 19 | """return the current datetime in ISO 8601 format""" 20 | return datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat() 21 | -------------------------------------------------------------------------------- /xlsx2define2-1/requirements.txt: -------------------------------------------------------------------------------- 1 | odmlib>=0.1.4 2 | xmlschema>=1.10.0 3 | openpyxl>=3.0.9 -------------------------------------------------------------------------------- /xlsx2define2-1/supporting_docs.py: -------------------------------------------------------------------------------- 1 | from odmlib.define_2_1 import model as DEFINE 2 | 3 | 4 | class SupportingDocuments: 5 | 6 | @staticmethod 7 | def create_annotatedcrf(annotated_crf): 8 | acrf = DEFINE.AnnotatedCRF() 9 | dr = DEFINE.DocumentRef(leafID=annotated_crf) 10 | acrf.DocumentRef = dr 11 | return acrf 12 | 13 | @staticmethod 14 | def create_supplementaldoc(annotated_crf, leaf_objects): 15 | sdoc = DEFINE.SupplementalDoc() if leaf_objects else None 16 | for lo in leaf_objects: 17 | if leaf_objects and lo.ID != annotated_crf: 18 | dr = DEFINE.DocumentRef(leafID=lo.ID) 19 | sdoc.DocumentRef.append(dr) 20 | return sdoc 21 | --------------------------------------------------------------------------------