├── .gitignore
├── LICENSE.md
├── README.md
├── ct2json
├── ct2json.py
├── data
│ ├── sdtm-ct.json
│ └── sdtm-ct.xml
└── schema
│ ├── controlledterminology-extension.xsd
│ ├── controlledterminology-ns.xsd
│ ├── controlledterminology1-1-1.xsd
│ └── foundation
│ ├── ODM1-3-2-foundation.xsd
│ ├── ODM1-3-2.xsd
│ ├── xlink.xsd
│ ├── xml.xsd
│ └── xmldsig-core-schema.xsd
├── ct2odm
├── ct2odm.py
└── data
│ ├── sdtm-ct.txt
│ ├── sdtm-ct.xls
│ └── sdtm-ct.xml
├── define2-1-to-xlsx
├── README.md
├── codelists.py
├── comments.py
├── data
│ ├── define.xml
│ ├── defineV21-SDTM.xml
│ ├── odmlib-define-metadata.xlsx
│ └── odmlib-roundtrip-define.xml
├── datasets.py
├── define2-1-to-xlsx.py
├── dictionaries.py
├── documents.py
├── excel_define_file.py
├── methods.py
├── requirements.txt
├── standards.py
├── study.py
├── value_level.py
├── variables.py
└── where_clauses.py
├── define2xls
├── .idea
│ ├── .gitignore
│ ├── define2xls.iml
│ ├── inspectionProfiles
│ │ └── profiles_settings.xml
│ ├── misc.xml
│ ├── modules.xml
│ └── vcs.xml
├── README.md
├── codelists.py
├── comments.py
├── data
│ ├── codelists.csv
│ ├── comments.csv
│ ├── datasets.csv
│ ├── dictionaries.csv
│ ├── documents.csv
│ ├── methods.csv
│ ├── odmlib-define-metadata-clean.xlsx
│ ├── odmlib-define-metadata-save.xlsx
│ ├── odmlib-define-metadata-temp.xlsx
│ ├── odmlib-define-metadata.xlsx
│ ├── odmlib-roundtrip-define.xml
│ ├── odmlib-rt-test-define.xml
│ ├── sdtm-xls-define.xml
│ ├── study.csv
│ ├── valuelevel.csv
│ ├── variables.csv
│ └── whereclauses.csv
├── datasets.py
├── define2xls.py
├── dictionaries.py
├── documents.py
├── excel_define_file.py
├── methods.py
├── requirements.txt
├── study.py
├── value_level.py
├── variables.py
└── where_clauses.py
├── get_started
├── .idea
│ ├── .gitignore
│ ├── get_started.iml
│ ├── inspectionProfiles
│ │ └── profiles_settings.xml
│ ├── misc.xml
│ ├── modules.xml
│ └── vcs.xml
├── README.md
├── data
│ └── odm_demo.xml
├── get_started.py
└── requirements.txt
├── library_xml
├── .idea
│ ├── .gitignore
│ ├── .name
│ ├── inspectionProfiles
│ │ └── profiles_settings.xml
│ ├── library_1_0.iml
│ ├── misc.xml
│ ├── modules.xml
│ └── vcs.xml
├── README.md
├── __init__.py
├── data
│ ├── library-cdash-2-2.xml
│ ├── library-odmlib-cdashig2-2.json
│ ├── library-odmlib.json
│ ├── library-sdtm-3-4.xml
│ └── odmlib.xml
├── library_define_1_0
│ ├── __init__.py
│ └── model.py
├── library_odm_1_0
│ ├── __init__.py
│ └── model.py
├── library_xml.py
├── requirements.txt
└── tests
│ └── test_local_library_loader.py
├── merge_odm
├── .idea
│ ├── .gitignore
│ ├── .name
│ ├── inspectionProfiles
│ │ └── profiles_settings.xml
│ ├── merge_odm.iml
│ ├── misc.xml
│ ├── modules.xml
│ └── vcs.xml
├── README.md
├── data
│ ├── cdash-odm-source.xml
│ ├── cdash-odm-target-clean.xml
│ └── cdash-odm-target.xml
├── merge_odm.py
└── requirements.txt
├── notebooks
├── data
│ ├── cosa_define_demo.xml
│ └── cosa_demo.xml
├── first_define.ipynb
├── first_odm.ipynb
└── generate_define.ipynb
├── snippets
├── data
│ ├── cdash-odm-test.xml
│ ├── defineV21-SDTM.xml
│ └── simple_create.xml
├── odmlib_first_define.py
├── simple_create_odm.py
├── validate_define.py
├── validate_odm.py
└── validate_odm_metadata.py
├── xls2define
├── .idea
│ ├── .gitignore
│ ├── inspectionProfiles
│ │ └── profiles_settings.xml
│ ├── misc.xml
│ ├── modules.xml
│ ├── vcs.xml
│ └── xls2define.iml
├── CodeLists.py
├── Comments.py
├── Datasets.py
├── Dictionaries.py
├── Documents.py
├── Methods.py
├── README.md
├── Study.py
├── ValueLevel.py
├── Variables.py
├── WhereClauses.py
├── data
│ ├── SDTM-Metadata-Worksheet.xlsx
│ ├── odmlib-define-metadata.xlsx
│ ├── odmlib-roundtrip-define.xml
│ └── odmlib-rt-test-define.xml
├── define_object.py
├── odm.py
├── requirements.txt
├── supporting_docs.py
└── xls2define.py
└── xlsx2define2-1
├── CodeLists.py
├── Comments.py
├── Datasets.py
├── Dictionaries.py
├── Documents.py
├── Methods.py
├── README.md
├── Standards.py
├── Study.py
├── ValueLevel.py
├── Variables.py
├── WhereClauses.py
├── data
├── odmlib-define-metadata.xlsx
└── odmlib-roundtrip-define.xml
├── define_object.py
├── odm.py
├── requirements.txt
├── supporting_docs.py
└── xlsx2define2-1.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | define2-1-to-xlsx/venv/
6 | xlsx2define2-1/venv/
7 | ct2json/venv/
8 | ct2odm/venv/
9 | define2xls/venv/
10 | get_started/venv/
11 | merge_odm/venv/
12 | snippets/venv/
13 | xls2define/venv/
14 | *.egg-info/
15 | docs/build/
16 | odmlib.egg-info/
17 | define2-1-to-xlsx/data/*.csv
18 | define2xls/data/*.csv
19 | define2-1-to-xlsx/.idea
20 | xlsx2define2-1/.idea
21 | xlsx2define2-1/data/t1d*.*
22 | xlsx2define2-1/data/T1-Dexi*.xlsx
23 | ct2json/.idea
24 | ct2odm/.idea
25 | define2xls/.idea
26 | get_started/.idea
27 | merge_odm/.idea
28 | snippets/.idea
29 | snippets/data/ODM*.xml
30 | snippets/data/t1d-define.xml
31 | notebooks/.idea
32 | notebooks/.ipynb_checkpoints
33 | .gitignore.swp
34 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 | =====================
3 |
4 | Copyright © 2022 Sam Hume
5 |
6 | Permission is hereby granted, free of charge, to any person
7 | obtaining a copy of this software and associated documentation
8 | files (the “Software”), to deal in the Software without
9 | restriction, including without limitation the rights to use,
10 | copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the
12 | Software is furnished to do so, subject to the following
13 | conditions:
14 |
15 | The above copyright notice and this permission notice shall be
16 | included in all copies or substantial portions of the Software.
17 |
18 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 | OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # odmlib examples
2 |
3 | ## Introduction
4 | The odmlib examples are small applications that demonstrate the use of the odmlib Python package for creating
5 | and process ODM files, including extensions like Define-XML. The examples are intended to make it easier
6 | to get started using the odmlib package.
7 |
8 | The odmlib package simplifies working with the CDISC ODM data exchange standard and its extensions, such as
9 | Define-XML, in Python. The odmlib package provides an object-oriented interface to working with ODM documents
10 | that simplifies creating and processing them.
11 |
12 | ## Why odmlib?
13 | The odmlib package satisfies my personal interest in working with ODM using an object-oriented
14 | interface in Python.
15 |
16 | ## Getting Started
17 | See the [odmlib repository](https://github.com/swhume/odmlib) to get the odmlib pacakge. Eventually, it may
18 | make its way into PyPi, but for now you'll need to install from the source. The odmlib README provides
19 | instructions for getting started.
20 |
21 | ## Note
22 | Effort will be made to update the odmlib_examples as odmlib and it's associated models are updated. If
23 | an example doesn't run correctly, please update to the latest version of odmlib.
24 |
25 | ## Limitations
26 | The odmlib examples are simple programs intended to demonstrate some of the basic capabilities of odmlib.
27 | The examples are not complete, production ready applications.
28 |
29 | The odmlib package is still in development. Although is being actively used on several projects, additional
30 | use and testing may trigger updates or bug fixes. Create an issue in GitHub if you need some help getting
31 | an example to run. Thanks for your patience.
--------------------------------------------------------------------------------
/ct2json/ct2json.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import odmlib.loader as LD
3 | import odmlib.odm_loader as OL
4 | import odmlib.odm_parser as P
5 | import xmlschema as XSD
6 | import os
7 |
8 | CT_SCHEMA = "./schema/controlledterminology1-1-1.xsd"
9 |
10 | """
11 | ct2json.py - an example program using odmlib to read a CT-XML ODM file and convert it to JSON.
12 | Command-line examples:
13 | python ct2json.py -x ./data/sdtm-ct.xml -j ./data/sdtm-ct.json
14 | python ct2json.py -v -x ./data/sdtm-ct.xml -j ./data/sdtm-ct.json
15 | python ct2json.py -v -x ./data/sdtm-ct.xml -j ./data/sdtm-ct.json -s "/home/sam/src/ct2json/schema/controlledterminology1-1-1.xsd
16 | """
17 |
18 |
19 | class CT2Json:
20 | """ generate a CT JSON file from a CT-XML ODM file """
21 | def __init__(self, ct_file, json_file, language="en"):
22 | self.ct_file = ct_file
23 | self.json_file = json_file
24 | self.lang = language
25 |
26 | def create(self):
27 | loader = LD.ODMLoader(OL.XMLODMLoader(model_package="ct_1_1_1", ns_uri="http://ncicb.nci.nih.gov/xml/odm/EVS/CDISC"))
28 | loader.open_odm_document(self.ct_file)
29 | ct_odmlib = loader.root()
30 | ct_odmlib.write_json(self.json_file)
31 |
32 |
33 | class CTValidator:
34 | """ CT-XML schema validation """
35 | def __init__(self, schema, ct_file):
36 | """
37 | :param schema: str - the path and filename for the Define-XML schema
38 | :param define_file: str - the path and filename for the Define-XML to validate
39 | """
40 | self.schema_file = schema
41 | self.ct_file = ct_file
42 |
43 | def validate(self):
44 | """" execute the schema validation and report the results """
45 | validator = P.ODMSchemaValidator(self.schema_file)
46 | try:
47 | validator.validate_file(self.ct_file)
48 | print("CT-XML schema validation completed successfully...")
49 | except XSD.validators.exceptions.XMLSchemaChildrenValidationError as ve:
50 | print(f"schema validation errors: {ve}")
51 |
52 | def _check_file_existence(self):
53 | """ throw an error if the schema of Define-XML file cannot be found """
54 | if not os.path.isfile(self.schema_file):
55 | raise ValueError("The schema validate flag is set, but the schema file cannot be found.")
56 | if not os.path.isfile(self.ct_file):
57 | raise ValueError("The CT-XML file cannot be found.")
58 |
59 |
60 | def set_cmd_line_args():
61 | """
62 | get the command-line arguments needed to convert the CT-XML input file into JSON
63 | :return: return the argparse object with the command-line parameters
64 | """
65 | parser = argparse.ArgumentParser()
66 | parser.add_argument("-x", "--ct", help="path and file name of CT-XML input file", required=True,
67 | dest="ct_file")
68 | parser.add_argument("-j", "--json", help="path and file to write the generated JSON file to", required=False,
69 | dest="json_file", default="./")
70 | parser.add_argument("-s", "--schema", help="path and file name of CT-XML schema", dest="schema_file",
71 | default=CT_SCHEMA)
72 | parser.add_argument("-v", "--validate", help="schema validate the CT-XML file", default=False, const=True,
73 | nargs='?', dest="is_validate")
74 | parser.add_argument("-l", "--lang", help="language code", default="en", dest="language", required=False)
75 | args = parser.parse_args()
76 | return args
77 |
78 |
79 | def main():
80 | """ main driver method that generates an Excel file using tje Define-XML v2.0 metadata """
81 | args = set_cmd_line_args()
82 | if args.is_validate:
83 | validator = CTValidator(args.schema_file, args.ct_file)
84 | validator.validate()
85 | ct2json = CT2Json(args.ct_file, args.json_file, args.language)
86 | ct2json.create()
87 |
88 |
89 | if __name__ == "__main__":
90 | main()
91 |
--------------------------------------------------------------------------------
/ct2json/schema/controlledterminology-extension.xsd:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
--------------------------------------------------------------------------------
/ct2json/schema/controlledterminology-ns.xsd:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
9 |
10 |
11 | The version of the CT-XML standard.
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/ct2json/schema/controlledterminology1-1-1.xsd:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/ct2json/schema/foundation/ODM1-3-2.xsd:
--------------------------------------------------------------------------------
1 |
2 |
8 |
9 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/ct2json/schema/foundation/xlink.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 | Comment describing your root element
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
--------------------------------------------------------------------------------
/ct2json/schema/foundation/xml.xsd:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
8 | See http://www.w3.org/XML/1998/namespace.html and
9 | http://www.w3.org/TR/REC-xml for information about this namespace.
10 |
11 | This schema document describes the XML namespace, in a form
12 | suitable for import by other schema documents.
13 |
14 | Note that local names in this namespace are intended to be defined
15 | only by the World Wide Web Consortium or its subgroups. The
16 | following names are currently defined in this namespace and should
17 | not be used with conflicting semantics by any Working Group,
18 | specification, or document instance:
19 |
20 | base (as an attribute name): denotes an attribute whose value
21 | provides a URI to be used as the base for interpreting any
22 | relative URIs in the scope of the element on which it
23 | appears; its value is inherited. This name is reserved
24 | by virtue of its definition in the XML Base specification.
25 |
26 | lang (as an attribute name): denotes an attribute whose value
27 | is a language code for the natural language of the content of
28 | any element; its value is inherited. This name is reserved
29 | by virtue of its definition in the XML specification.
30 |
31 | space (as an attribute name): denotes an attribute whose
32 | value is a keyword indicating what whitespace processing
33 | discipline is intended for the content of the element; its
34 | value is inherited. This name is reserved by virtue of its
35 | definition in the XML specification.
36 |
37 | Father (in any context at all): denotes Jon Bosak, the chair of
38 | the original XML Working Group. This name is reserved by
39 | the following decision of the W3C XML Plenary and
40 | XML Coordination groups:
41 |
42 | In appreciation for his vision, leadership and dedication
43 | the W3C XML Plenary on this 10th day of February, 2000
44 | reserves for Jon Bosak in perpetuity the XML name
45 | xml:Father
46 |
47 |
48 |
49 |
50 | This schema defines attributes and an attribute group
51 | suitable for use by
52 | schemas wishing to allow xml:base, xml:lang or xml:space attributes
53 | on elements they define.
54 |
55 | To enable this, such a schema must import this schema
56 | for the XML namespace, e.g. as follows:
57 | <schema . . .>
58 | . . .
59 | <import namespace="http://www.w3.org/XML/1998/namespace"
60 | schemaLocation="http://www.w3.org/2001/03/xml.xsd"/>
61 |
62 | Subsequently, qualified reference to any of the attributes
63 | or the group defined below will have the desired effect, e.g.
64 |
65 | <type . . .>
66 | . . .
67 | <attributeGroup ref="xml:specialAttrs"/>
68 |
69 | will define a type which will schema-validate an instance
70 | element with any of those attributes
71 |
72 |
73 |
74 | In keeping with the XML Schema WG's standard versioning
75 | policy, this schema document will persist at
76 | http://www.w3.org/2001/03/xml.xsd.
77 | At the date of issue it can also be found at
78 | http://www.w3.org/2001/xml.xsd.
79 | The schema document at that URI may however change in the future,
80 | in order to remain compatible with the latest version of XML Schema
81 | itself. In other words, if the XML Schema namespace changes, the version
82 | of this document at
83 | http://www.w3.org/2001/xml.xsd will change
84 | accordingly; the version at
85 | http://www.w3.org/2001/03/xml.xsd will not change.
86 |
87 |
88 |
89 |
90 |
91 | In due course, we should install the relevant ISO 2- and 3-letter
92 | codes as the enumerated possible values . . .
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 | See http://www.w3.org/TR/xmlbase/ for
109 | information about this attribute.
110 |
111 |
112 |
113 |
114 |
121 |
122 |
--------------------------------------------------------------------------------
/ct2odm/ct2odm.py:
--------------------------------------------------------------------------------
1 | import csv
2 | from odmlib.ct_1_1_1 import model as CT
3 | import datetime
4 |
5 |
6 | class CT2ODM:
7 | def __init__(self, csv_file, odm_file, standard, package_date):
8 | self.csv_file = csv_file
9 | self.odm_file = odm_file
10 | self.standard = standard
11 | self.pkg_date = package_date
12 |
13 | def create(self):
14 | odm = self._create_odm()
15 | odm.Study.append(self._create_study())
16 | odm.Study[0].MetaDataVersion.append(self._create_mdv())
17 | with open(self.csv_file, "r") as csv_file:
18 | csv_reader = csv.DictReader(csv_file, delimiter='\t')
19 | line_count = 0
20 | cl_dict = {}
21 | cl_c_code = ""
22 | cl = None
23 | for row in csv_reader:
24 | if row["Code"] and row["Codelist Extensible (Yes/No)"] and not row["Codelist Code"]:
25 | if cl_dict and cl_c_code != row["Code"]:
26 | self._complete_codelist(odm, cl, cl_dict)
27 | # assumes Codelist comes before associated terms
28 | cl, cl_dict = self._create_codelist(row)
29 | cl_c_code = row["Code"]
30 | else:
31 | cl.EnumeratedItem.append(self._create_enumerated_item(row))
32 | line_count += 1
33 | self._complete_codelist(odm, cl, cl_dict)
34 | print(f'Processed {line_count} lines.')
35 | odm.write_xml(self.odm_file)
36 |
37 | def _create_enumerated_item(self, row):
38 | ei = CT.EnumeratedItem(CodedValue=row["CDISC Submission Value"], ExtCodeID=row["Code"])
39 | if row["CDISC Synonym(s)"]:
40 | for synonym in self._get_synonyms(row["CDISC Synonym(s)"]):
41 | ei.CDISCSynonym.append(CT.CDISCSynonym(_content=synonym))
42 | ei.CDISCDefinition = CT.CDISCDefinition(_content=row["CDISC Definition"])
43 | ei.PreferredTerm = CT.PreferredTerm(_content=row["NCI Preferred Term"])
44 | return ei
45 |
46 | def _complete_codelist(self, odm, cl, cl_dict):
47 | self._update_codelist(cl, cl_dict)
48 | odm.Study[0].MetaDataVersion[0].CodeList.append(cl)
49 |
50 | def _update_codelist(self, cl, cl_dict):
51 | cl.CDISCSubmissionValue = CT.CDISCSubmissionValue(_content=cl_dict["sub_val"])
52 | cl.CDISCSynonym = CT.CDISCSynonym(_content=cl_dict["synonyms"])
53 | cl.PreferredTerm = CT.PreferredTerm(_content=cl_dict["preferred_term"])
54 |
55 | def _create_codelist(self, row):
56 | cl = CT.CodeList(
57 | OID="CL." + row["Code"] + "." + row["CDISC Submission Value"],
58 | Name=row["CDISC Synonym(s)"],
59 | DataType="text",
60 | ExtCodeID=row["Code"],
61 | CodeListExtensible=row["Codelist Extensible (Yes/No)"]
62 | )
63 | cl.Description = CT.Description()
64 | cl.Description.TranslatedText.append(CT.TranslatedText(_content=row["CDISC Definition"], lang="en"))
65 | cl_dict = {
66 | "sub_val": row["CDISC Submission Value"],
67 | "synonyms": row["CDISC Synonym(s)"],
68 | "preferred_term": row["NCI Preferred Term"]
69 | }
70 | return cl, cl_dict
71 |
72 | def _get_synonyms(self, synonyms_string):
73 | synonyms = []
74 | for synonym in synonyms_string.split(";"):
75 | synonyms.append(synonym.strip())
76 | return synonyms
77 |
78 | def _create_odm(self):
79 | odm = CT.ODM(
80 | FileOID="CDISC_CT." + self.standard + "." + self.pkg_date,
81 | AsOfDateTime=self.pkg_date + "T00:00:00",
82 | CreationDateTime=self._set_datetime(),
83 | ODMVersion="1.3.2",
84 | FileType="Snapshot",
85 | Granularity="Metadata",
86 | Originator="Sam Hume",
87 | SourceSystem="NCI Thesaurus",
88 | SourceSystemVersion=self.pkg_date
89 | )
90 | return odm
91 |
92 | def _create_study(self):
93 | """
94 | create the study ODMLIB object from the Study worksheet and return it
95 | :param rows: dictionary created from the rows in the study worksheet
96 | :return: odmlib Study object
97 | """
98 | study = CT.Study(OID="CDISC_CT." + self.standard + "." + self.pkg_date)
99 | gv = CT.GlobalVariables()
100 | gv.StudyName = CT.StudyName(_content="CDISC " + self.standard + " Controlled Terminology")
101 | gv.StudyDescription = CT.StudyDescription(_content="CDISC " + self.standard + " Controlled Terminology, " + self.pkg_date)
102 | gv.ProtocolName = CT.ProtocolName(_content="CDISC " + self.standard + " Controlled Terminology")
103 | study.GlobalVariables = gv
104 | return study
105 |
106 |
107 | def _create_mdv(self):
108 | """
109 | create the MetaDataVersion ODMLIB object and return it
110 | :return: odmlib MetaDataVersion object
111 | """
112 | mdv = CT.MetaDataVersion(
113 | OID="CDISC_CT_MetaDataVersion." + self.standard + "." + self.pkg_date,
114 | Name="CDISC " + self.standard + " Controlled Terminology",
115 | Description="CDISC " + self.standard + " Controlled Terminology, " + self.pkg_date,
116 | )
117 | return mdv
118 |
119 | def _set_datetime(self):
120 | """return the current datetime in ISO 8601 format"""
121 | return datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat()
122 |
123 |
124 | if __name__ == '__main__':
125 | ct2odm = CT2ODM(csv_file="./data/sdtm-ct.txt", odm_file="./data/sdtm-ct.xml", standard="SDTM", package_date="2021-06-25")
126 | ct2odm.create()
127 |
--------------------------------------------------------------------------------
/ct2odm/data/sdtm-ct.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/ct2odm/data/sdtm-ct.xls
--------------------------------------------------------------------------------
/define2-1-to-xlsx/README.md:
--------------------------------------------------------------------------------
1 | # define2-1-to-xlsx
2 |
3 | ## Introduction
4 | The define2-1-to-xlsx program is an odmlib example application that generates an Excel spreadsheet that contains the
5 | content of a Define-XML v2.1 file. The Exel spreadsheet version of the makes it easier for many to edit or create new
6 | content to include in a Define-XML v2.1 file. The companion xlsx2define2-1 program takes the updated spreadsheet and
7 | generates a Define-XML v2.1 file. This example demonstrates some basic odmlib features.
8 |
9 | ## Getting Started
10 | To run define2-1-to-xlsx.py from the command-line:
11 |
12 | `python define2-1-to-xlsx.py -d ./data/sdtm-xls-define.xml -p ./data/`
13 |
14 | The odmlib package must be installed to run define2-1-to-xlsx. See the
15 | [odmlib repository](https://github.com/swhume/odmlib) to get the source code and the latest version of the odmlib
16 | package. You may also install odmlib from PyPi with the understanding that it is still in development so might
17 | not have everything available in the odmlib repository. To install from PyPi:
18 |
19 | 'pip install odmlib'
20 |
21 | The odmlib README provides instructions for getting started.
22 |
23 | ## Limitations
24 | The odmlib examples are basic programs intended to demonstrate some of the basic capabilities of odmlib.
25 | The examples are not complete, production ready applications. However, I'm happy to update these applications to
26 | accommodate new feature or bug fixes and will also review pull requests.
27 |
28 | The odmlib package is still in development.
--------------------------------------------------------------------------------
/define2-1-to-xlsx/codelists.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class CodeLists:
6 | HEADERS = ["OID", "Name", "NCI Codelist Code", "Data Type", "Order", "Term", "NCI Term Code", "Decoded Value",
7 | "Comment", "IsNonStandard", "StandardOID"]
8 |
9 | def __init__(self, odmlib_mdv, data_path):
10 | self.mdv = odmlib_mdv
11 | self.path = data_path
12 | self.file_name = os.path.join(self.path, "codelists.csv")
13 |
14 | def extract(self):
15 | with open(self.file_name, 'w', newline='') as f:
16 | writer = csv.writer(f, dialect="excel")
17 | writer.writerow(self.HEADERS)
18 | for cl in self.mdv.CodeList:
19 | if cl.EnumeratedItem:
20 | self._write_enumerated_item_row(cl, writer)
21 | elif cl.CodeListItem:
22 | self._write_code_list_item_row(cl, writer)
23 |
24 | def _write_enumerated_item_row(self, cl, writer):
25 | attr = self._conditional_codelist_content(cl)
26 | for ei in cl.EnumeratedItem:
27 | order_number = ""
28 | if ei.OrderNumber:
29 | order_number = ei.OrderNumber
30 | ei_c_code = ""
31 | if ei.Alias:
32 | ei_c_code = ei.Alias[0].Name
33 | writer.writerow([cl.OID, cl.Name, attr["cl_c_code"], cl.DataType, order_number, ei.CodedValue, ei_c_code, "",
34 | attr["comment_oid"], attr["is_non_std"], attr["standard_oid"]])
35 |
36 | def _write_code_list_item_row(self, cl, writer):
37 | attr = self._conditional_codelist_content(cl)
38 | for cli in cl.CodeListItem:
39 | order_number = ""
40 | if cli.OrderNumber:
41 | order_number = cli.OrderNumber
42 | cli_c_code = ""
43 | if cli.Alias:
44 | cli_c_code = cli.Alias[0].Name
45 | decode = cli.Decode.TranslatedText[0]._content
46 | writer.writerow([cl.OID, cl.Name, attr["cl_c_code"], cl.DataType, order_number, cli.CodedValue, cli_c_code,
47 | decode, attr["comment_oid"], attr["is_non_std"], attr["standard_oid"]])
48 |
49 | def _conditional_codelist_content(self ,cl):
50 | attr = {"cl_c_code": ""}
51 | if cl.Alias:
52 | attr["cl_c_code"] = cl.Alias[0].Name
53 | attr["comment_oid"] = ""
54 | if cl.CommentOID:
55 | attr["comment_oid"] = cl.CommentOID
56 | attr["is_non_std"] = ""
57 | if cl.IsNonStandard:
58 | attr["is_non_std"] = cl.IsNonStandard
59 | attr["standard_oid"] = ""
60 | if cl.StandardOID:
61 | attr["standard_oid"] = cl.StandardOID
62 | return attr
63 |
--------------------------------------------------------------------------------
/define2-1-to-xlsx/comments.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class Comments:
6 | HEADERS = ["OID", "Description", "Document", "Pages"]
7 |
8 | def __init__(self, odmlib_mdv, data_path):
9 | self.mdv = odmlib_mdv
10 | self.path = data_path
11 | self.file_name = os.path.join(self.path, "comments.csv")
12 |
13 | def extract(self):
14 | with open(self.file_name, 'w', newline='') as f:
15 | writer = csv.writer(f, dialect="excel")
16 | writer.writerow(self.HEADERS)
17 | for com in self.mdv.CommentDef:
18 | leaf_id = ""
19 | page_refs = ""
20 | if com.DocumentRef:
21 | leaf_id = com.DocumentRef[0].leafID
22 | if com.DocumentRef[0].PDFPageRef:
23 | page_refs = com.DocumentRef[0].PDFPageRef[0].PageRefs
24 | comment = " ".join(com.Description.TranslatedText[0]._content.split())
25 | writer.writerow([com.OID, comment, leaf_id, page_refs])
26 |
--------------------------------------------------------------------------------
/define2-1-to-xlsx/data/odmlib-define-metadata.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/define2-1-to-xlsx/data/odmlib-define-metadata.xlsx
--------------------------------------------------------------------------------
/define2-1-to-xlsx/datasets.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class Datasets:
6 | HEADERS = ["OID", "Dataset", "Description", "Class", "Structure", "Purpose", "Repeating", "Reference Data", "Comment",
7 | "IsNonStandard", "StandardOID", "HasNoData"]
8 |
9 | def __init__(self, odmlib_mdv, data_path):
10 | self.mdv = odmlib_mdv
11 | self.path = data_path
12 | self.file_name = os.path.join(self.path, "datasets.csv")
13 |
14 | def extract(self):
15 | with open(self.file_name, 'w', newline='') as f:
16 | writer = csv.writer(f, dialect="excel")
17 | writer.writerow(self.HEADERS)
18 | for ig in self.mdv.ItemGroupDef:
19 | writer.writerow([ig.OID, ig.Name, ig.Description.TranslatedText[0]._content, ig.Class.Name, ig.Structure, ig.Purpose,
20 | ig.Repeating, ig.IsReferenceData, ig.CommentOID, ig.IsNonStandard, ig.StandardOID,
21 | ig.HasNoData])
22 |
--------------------------------------------------------------------------------
/define2-1-to-xlsx/define2-1-to-xlsx.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import odmlib.define_loader as OL
3 | import odmlib.loader as LD
4 | import excel_define_file as EX
5 | import odmlib.odm_parser as P
6 | import xmlschema as XSD
7 | import os
8 | import study, standards, datasets, variables, value_level as valuelevel, where_clauses as whereclauses, codelists
9 | import dictionaries, methods, comments, documents
10 |
11 | WORKSHEETS = ["Study", "Standards", "Datasets", "Variables", "ValueLevel", "WhereClauses", "CodeLists", "Dictionaries",
12 | "Methods", "Comments", "Documents"]
13 | EXCEL_NAME = "odmlib-define-metadata.xlsx"
14 |
15 | """
16 | define2-1-to-xlsx.py - an example program using odmlib to convert a Define-XML file into a metadata spreadsheet
17 | ex. cmd-line args: -d ./data/odmlib-roundtrip-define.xml -p ./data/
18 | ex. cmd-line args: -d ./data/odmlib-roundtrip-define.xml -p ./data/ -v
19 | -s "/home/sam/standards/DefineV211/schema/cdisc-define-2.1/define2-1-0.xsd
20 | """
21 |
22 | class Define2Xls:
23 | """ generate a metadata spreadsheet from a Define-XML v2.1 file """
24 | def __init__(self, define_file, excel_path, excel_filename=EXCEL_NAME, language="en"):
25 | self.define_file = define_file
26 | self.data_path = excel_path
27 | self.excel_filename = excel_filename
28 | self.lang = language
29 | self.acrf = ""
30 |
31 | def create(self):
32 | loader = LD.ODMLoader(OL.XMLDefineLoader(model_package="define_2_1", ns_uri="http://www.cdisc.org/ns/def/v2.1"))
33 | loader.open_odm_document(self.define_file)
34 | mdv_odmlib = loader.MetaDataVersion()
35 | study_odmlib = loader.Study()
36 | self._set_acrf(mdv_odmlib)
37 | ws_files = []
38 | for worksheet in WORKSHEETS:
39 | if worksheet == "Study":
40 | ws = eval(worksheet.lower() + "." + worksheet + "(study_odmlib, mdv_odmlib, self.data_path, self.lang, self.acrf)")
41 | else:
42 | ws = eval(worksheet.lower() + "." + worksheet + "(mdv_odmlib, self.data_path)")
43 | ws.extract()
44 | ws_files.append(ws.file_name)
45 | self._write_excel(ws_files)
46 |
47 | def _set_acrf(self, mdv):
48 | if mdv.AnnotatedCRF.DocumentRef:
49 | self.acrf = mdv.AnnotatedCRF.DocumentRef.leafID
50 | else:
51 | for leaf in mdv.leaf:
52 | if leaf.title and "annotated" in leaf.title._content.lower():
53 | self.acrf = leaf.ID
54 | break
55 |
56 | def _write_excel(self, ws_files):
57 | excel = EX.ExcelDefineFile(ws_files, WORKSHEETS, self.data_path, self.excel_filename)
58 | excel.create_excel()
59 |
60 |
61 | class DefineValidator:
62 | """ Define-XML schema validation """
63 | def __init__(self, schema, define_file):
64 | """
65 | :param schema: str - the path and filename for the Define-XML schema
66 | :param define_file: str - the path and filename for the Define-XML to validate
67 | """
68 | self.schema_file = schema
69 | self.define_file = define_file
70 |
71 | def validate(self):
72 | """" execute the schema validation and report the results """
73 | validator = P.ODMSchemaValidator(self.schema_file)
74 | try:
75 | validator.validate_file(self.define_file)
76 | print("define-XML schema validation completed successfully...")
77 | except XSD.validators.exceptions.XMLSchemaChildrenValidationError as ve:
78 | print(f"schema validation errors: {ve}")
79 |
80 | def _check_file_existence(self):
81 | """ throw an error if the schema of Define-XML file cannot be found """
82 | if not os.path.isfile(self.schema_file):
83 | raise ValueError("The schema validate flag is set, but the schema file cannot be found.")
84 | if not os.path.isfile(self.define_file):
85 | raise ValueError("The define-xml file cannot be found.")
86 |
87 |
88 | def set_cmd_line_args():
89 | """
90 | get the command-line arguments needed to convert the Define-XML input file into Excel
91 | :return: return the argparse object with the command-line parameters
92 | """
93 | parser = argparse.ArgumentParser()
94 | parser.add_argument("-d", "--define", help="path and file name of Define-XML v2 input file", required=True,
95 | dest="define_file")
96 | parser.add_argument("-p", "--path", help="path to write the generated Excel file to", required=False,
97 | dest="excel_path", default="./")
98 | parser.add_argument("-e", "--excel", help="Name of Excel file without path", required=False,
99 | dest="excel_filename", default=EXCEL_NAME)
100 | parser.add_argument("-s", "--schema", help="path and file name of Define-XML schema", dest="schema_file")
101 | parser.add_argument("-v", "--validate", help="schema validate the Define-XML file", default=False, const=True,
102 | nargs='?', dest="is_validate")
103 | parser.add_argument("-l", "--lang", help="language code", default="en", dest="language", required=False)
104 | args = parser.parse_args()
105 | return args
106 |
107 |
108 | def main():
109 | """ main driver method that generates an Excel file using tje Define-XML v2.0 metadata """
110 | args = set_cmd_line_args()
111 | if args.is_validate:
112 | validator = DefineValidator(args.schema_file, args.define_file)
113 | validator.validate()
114 | d2x = Define2Xls(args.define_file, args.excel_path, args.excel_filename, args.language)
115 | d2x.create()
116 |
117 |
118 | if __name__ == "__main__":
119 | main()
120 |
--------------------------------------------------------------------------------
/define2-1-to-xlsx/dictionaries.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class Dictionaries:
6 | HEADERS = ["OID", "Name", "Data Type", "Dictionary", "Version"]
7 |
8 | def __init__(self, odmlib_mdv, data_path):
9 | self.mdv = odmlib_mdv
10 | self.path = data_path
11 | self.file_name = os.path.join(self.path, "dictionaries.csv")
12 |
13 | def extract(self):
14 | with open(self.file_name, 'w', newline='') as f:
15 | writer = csv.writer(f, dialect="excel")
16 | writer.writerow(self.HEADERS)
17 | for cl in self.mdv.CodeList:
18 | if cl.ExternalCodeList.Dictionary:
19 | self._write_external_code_list_row(cl, writer)
20 |
21 | def _write_external_code_list_row(self, cl, writer):
22 | ext_cl = cl.ExternalCodeList
23 | version = ""
24 | if ext_cl.Version:
25 | version = ext_cl.Version
26 | writer.writerow([cl.OID, cl.Name, cl.DataType, ext_cl.Dictionary, version])
27 |
--------------------------------------------------------------------------------
/define2-1-to-xlsx/documents.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class Documents:
6 | HEADERS = ["ID", "Title", "Href"]
7 |
8 | def __init__(self, odmlib_mdv, data_path):
9 | self.mdv = odmlib_mdv
10 | self.path = data_path
11 | self.file_name = os.path.join(self.path, "documents.csv")
12 |
13 | def extract(self):
14 | with open(self.file_name, 'w', newline='') as f:
15 | writer = csv.writer(f, dialect="excel")
16 | writer.writerow(self.HEADERS)
17 | for lf in self.mdv.leaf:
18 | writer.writerow([lf.ID, lf.title._content, lf.href])
19 |
--------------------------------------------------------------------------------
/define2-1-to-xlsx/excel_define_file.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import xlsxwriter as XLS
3 | import os
4 |
5 |
6 | class ExcelDefineFile:
7 | def __init__(self, files, tabs, data_path, excel_filename):
8 | self.xlsx_file = os.path.join(data_path, excel_filename)
9 | self.files = files
10 | self.tabs = tabs
11 |
12 | def create_excel(self):
13 | workbook = XLS.Workbook(self.xlsx_file, {"strings_to_numbers": False})
14 | header_format = workbook.add_format({"bold": True, "bg_color": "#CCFFFF", "border": True, "border_color": "black"})
15 | for index, csv_file in enumerate(self.files):
16 | worksheet = workbook.add_worksheet(self.tabs[index])
17 | is_header_row = True
18 | try:
19 | with open(csv_file, 'rt', encoding='utf8') as f:
20 | reader = csv.reader(f)
21 | for r, row in enumerate(reader):
22 | for c, col in enumerate(row):
23 | if is_header_row:
24 | worksheet.write(r, c, col, header_format)
25 | worksheet.set_column(r, c, 30)
26 | else:
27 | worksheet.write(r, c, col)
28 | is_header_row = False
29 | except UnicodeDecodeError as ue:
30 | print(f"Encoding error writing load file for row {row} and col {col}: {ue}")
31 | workbook.close()
32 | return len(workbook.sheetnames)
33 |
--------------------------------------------------------------------------------
/define2-1-to-xlsx/methods.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class Methods:
6 | HEADERS = ["OID", "Name", "Type", "Description", "Expression Context", "Expression Code", "Document", "Pages"]
7 |
8 | def __init__(self, odmlib_mdv, data_path):
9 | self.mdv = odmlib_mdv
10 | self.path = data_path
11 | self.file_name = os.path.join(self.path, "methods.csv")
12 |
13 | def extract(self):
14 | with open(self.file_name, 'w', newline='') as f:
15 | writer = csv.writer(f, dialect="excel")
16 | writer.writerow(self.HEADERS)
17 | for md in self.mdv.MethodDef:
18 | context = ""
19 | code = ""
20 | if md.FormalExpression:
21 | context = md.FormalExpression[0].Context
22 | code = md.FormalExpression[0]._content
23 | leaf_id = ""
24 | page_refs = ""
25 | if md.DocumentRef:
26 | leaf_id = md.DocumentRef[0].leafID
27 | page_refs = md.DocumentRef[0].PDFPageRef[0].PageRefs
28 | description = " ".join(md.Description.TranslatedText[0]._content.split())
29 | writer.writerow([md.OID, md.Name, md.Type, description, context, code,
30 | leaf_id, page_refs])
31 |
--------------------------------------------------------------------------------
/define2-1-to-xlsx/requirements.txt:
--------------------------------------------------------------------------------
1 | odmlib>=0.1.4
2 | xmlschema>=1.10.0
3 | XlsxWriter>=3.0.3
4 |
--------------------------------------------------------------------------------
/define2-1-to-xlsx/standards.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class Standards:
6 | HEADERS = ["OID", "Name", "Type", "Publishing Set", "Version", "Status", "Comment"]
7 |
8 | def __init__(self, odmlib_mdv, data_path):
9 | self.mdv = odmlib_mdv
10 | self.path = data_path
11 | self.file_name = os.path.join(self.path, "standards.csv")
12 |
13 | def extract(self):
14 | with open(self.file_name, 'w', newline='') as f:
15 | writer = csv.writer(f, dialect="excel")
16 | writer.writerow(self.HEADERS)
17 | for std in self.mdv.Standards.Standard:
18 | pset = ""
19 | status = ""
20 | comment = ""
21 | if std.PublishingSet:
22 | pset = std.PublishingSet
23 | if std.Status:
24 | status = std.Status
25 | if std.CommentOID:
26 | comment = std.CommentOID
27 | writer.writerow([std.OID, std.Name, std.Type, pset, std.Version, status, comment])
28 |
--------------------------------------------------------------------------------
/define2-1-to-xlsx/study.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class Study:
6 | HEADERS = ["Attribute", "Value"]
7 |
8 | def __init__(self, odmlib_study, odmlib_mdv, data_path, language="en", acrf="LF.acrf"):
9 | self.study = odmlib_study
10 | self.mdv = odmlib_mdv
11 | self.path = data_path
12 | self.acrf = acrf
13 | self.language = language
14 | self.file_name = os.path.join(self.path, "study.csv")
15 |
16 | def extract(self):
17 | print(f"Study OID: {self.study.GlobalVariables.StudyName}")
18 | with open(self.file_name, 'w', newline='') as f:
19 | writer = csv.writer(f, dialect="excel")
20 | writer.writerow(self.HEADERS)
21 | writer.writerow(["StudyName", self.study.GlobalVariables.StudyName])
22 | writer.writerow(["StudyDescription", self.study.GlobalVariables.StudyDescription])
23 | writer.writerow(["ProtocolName", self.study.GlobalVariables.ProtocolName])
24 | writer.writerow(["Language", self.language])
25 | writer.writerow(["Annotated CRF", self.acrf])
26 |
--------------------------------------------------------------------------------
/define2-1-to-xlsx/value_level.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class ValueLevel:
6 | # does not include the IsNonStandard and HasNoData attributes for value level ItemRefs
7 | HEADERS = ["OID", "Order", "Dataset", "Variable", "ItemOID", "Where Clause", "Data Type", "Length",
8 | "Significant Digits", "Format", "Mandatory", "Codelist", "Origin Type", "Origin Source", "Pages",
9 | "Method", "Predecessor", "Comment"]
10 |
11 | def __init__(self, odmlib_mdv, data_path):
12 | self.mdv = odmlib_mdv
13 | self.path = data_path
14 | self.file_name = os.path.join(self.path, "valuelevel.csv")
15 |
16 | def extract(self):
17 | with open(self.file_name, 'w', newline='') as f:
18 | writer = csv.writer(f, dialect="excel")
19 | writer.writerow(self.HEADERS)
20 | for vl in self.mdv.ValueListDef:
21 | dataset = self._get_dataset_name(vl.OID)
22 | for ir in vl.ItemRef:
23 | # assumes all ItemDefs are referenced by an ItemRef
24 | ird = self._load_item_ref(ir)
25 | idd = self._load_item_def(ir.ItemOID)
26 | writer.writerow([vl.OID, ird["Order"], dataset, idd["Variable"], ir.ItemOID, ird["Where Clause"],
27 | idd["Data Type"], idd["Length"], idd["Significant Digits"], idd["Format"],
28 | ird["Mandatory"], idd["Codelist"], idd["Origin Type"], idd["Origin Source"],
29 | idd["Pages"], ird["Method"], idd["Predecessor"], idd["Comment"]])
30 |
31 | def _get_dataset_name(self, vl_oid):
32 | for item in self.mdv.ItemDef:
33 | if item.ValueListRef and item.ValueListRef.ValueListOID == vl_oid:
34 | for igd in self.mdv.ItemGroupDef:
35 | ir = igd.find("ItemRef", "ItemOID", item.OID)
36 | if ir:
37 | return igd.Name
38 | raise ValueError(f"Dataset for ValueListDef {vl_oid} not found in the Define-XML file")
39 |
40 |
41 | def _load_item_ref(self, ir):
42 | ird = {}
43 | ird["Order"] = ir.OrderNumber
44 | ird["Mandatory"] = ir.Mandatory
45 | ird["Method"] = ir.MethodOID
46 | ird["Where Clause"] = self._get_where_clause_oid(ir)
47 | return ird
48 |
49 | def _load_item_def(self, item_oid):
50 | idd = {}
51 | it = self.mdv.find("ItemDef", "OID", item_oid)
52 | idd["Variable"] = it.Name
53 | idd["Data Type"] = it.DataType
54 | idd["Length"] = it.Length
55 | idd["Significant Digits"] = it.SignificantDigits
56 | idd["Format"] = it.DisplayFormat
57 | idd["Codelist"] = it.CodeListRef.CodeListOID if it.CodeListRef else ""
58 | # TODO add support for multiple Origins
59 | idd["Origin Type"] = it.Origin[0].Type if it.Origin else ""
60 | idd["Origin Source"] = it.Origin[0].Source if it.Origin and it.Origin[0].Source else ""
61 | idd["Pages"] = it.Origin[0].DocumentRef[0].PDFPageRef[0].PageRefs \
62 | if it.Origin and it.Origin[0].DocumentRef and it.Origin[0].DocumentRef[0].PDFPageRef else ""
63 | idd["Predecessor"] = it.Origin[0].Description.TranslatedText[0]._content \
64 | if it.Origin and it.Origin[0].Type == "Predecessor" else ""
65 | idd["Comment"] = it.CommentOID if it.CommentOID else ""
66 | return idd
67 |
68 | def _get_where_clause_oid(self, item):
69 | wc_oids = []
70 | for wc in item.WhereClauseRef:
71 | wc_oids.append(wc.WhereClauseOID)
72 | return "'".join(wc_oids)
--------------------------------------------------------------------------------
/define2-1-to-xlsx/variables.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class Variables:
6 | HEADERS = ["OID", "Order", "Dataset", "Variable", "Label", "Data Type", "Length", "Significant Digits", "Format",
7 | "KeySequence", "Mandatory", "CodeList", "Valuelist", "Origin Type", "Origin Source", "Pages", "Method",
8 | "Predecessor", "Role", "Comment", "IsNonStandard", "HasNoData"]
9 |
10 | def __init__(self, odmlib_mdv, data_path):
11 | self.mdv = odmlib_mdv
12 | self.path = data_path
13 | self.file_name = os.path.join(self.path, "variables.csv")
14 |
15 | def extract(self):
16 | with open(self.file_name, 'w', newline='') as f:
17 | writer = csv.writer(f, dialect="excel")
18 | writer.writerow(self.HEADERS)
19 | for ig in self.mdv.ItemGroupDef:
20 | for ir in ig.ItemRef:
21 | # assumes all ItemDefs are referenced by an ItemRef
22 | ird = self._load_item_ref(ir)
23 | idd = self._load_item_def(ir.ItemOID)
24 | writer.writerow([idd["OID"], ird["Order"], ig.Name, idd["Variable"], idd["Label"], idd["Data Type"], idd["Length"],
25 | idd["Significant Digits"], idd["Format"], ird["KeySequence"], ird["Mandatory"],
26 | idd["Codelist"], idd["Valuelist"], idd["Origin Type"], idd["Origin Source"],
27 | idd["Pages"], ird["Method"], idd["Predecessor"], ird["Role"], idd["Comment"],
28 | ird["IsNonStandard"], ird["HasNoData"]])
29 |
30 | def _load_item_ref(self, ir):
31 | ird = {}
32 | ird["Order"] = ir.OrderNumber
33 | ird["Mandatory"] = ir.Mandatory
34 | ird["KeySequence"] = ir.KeySequence
35 | ird["Method"] = ir.MethodOID
36 | ird["Role"] = ir.Role
37 | ird["IsNonStandard"] = ir.IsNonStandard if ir.IsNonStandard else ""
38 | ird["HasNoData"] = ir.HasNoData if ir.HasNoData else ""
39 | return ird
40 |
41 | def _load_item_def(self, item_oid):
42 | idd = {}
43 | it = self.mdv.find("ItemDef", "OID", item_oid)
44 | idd["OID"] = item_oid
45 | idd["Variable"] = it.Name
46 | idd["Data Type"] = it.DataType
47 | idd["Length"] = it.Length
48 | idd["Significant Digits"] = it.SignificantDigits
49 | idd["Format"] = it.DisplayFormat
50 | idd["Label"] = " ".join(it.Description.TranslatedText[0]._content.split())
51 | idd["Codelist"] = it.CodeListRef.CodeListOID if it.CodeListRef else ""
52 | idd["Valuelist"] = it.ValueListRef.ValueListOID if it.ValueListRef else ""
53 | # TODO add support for multiple Origins
54 | idd["Origin Type"] = it.Origin[0].Type if it.Origin else ""
55 | idd["Origin Source"] = it.Origin[0].Source if it.Origin and it.Origin[0].Source else ""
56 | idd["Pages"] = it.Origin[0].DocumentRef[0].PDFPageRef[0].PageRefs \
57 | if it.Origin and it.Origin[0].DocumentRef and it.Origin[0].DocumentRef[0].PDFPageRef else ""
58 | idd["Predecessor"] = it.Origin[0].Description.TranslatedText[0]._content \
59 | if it.Origin and it.Origin[0].Type == "Predecessor" else ""
60 | idd["Comment"] = it.CommentOID if it.CommentOID else ""
61 | return idd
62 |
--------------------------------------------------------------------------------
/define2-1-to-xlsx/where_clauses.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class WhereClauses:
6 | HEADERS = ["OID", "Dataset", "Variable", "Comparator", "Value", "Comment"]
7 |
8 | def __init__(self, odmlib_mdv, data_path):
9 | self.mdv = odmlib_mdv
10 | self.path = data_path
11 | self.file_name = os.path.join(self.path, "whereclauses.csv")
12 |
13 | def extract(self):
14 | with open(self.file_name, 'w', newline='') as f:
15 | writer = csv.writer(f, dialect="excel")
16 | writer.writerow(self.HEADERS)
17 | wc_oid = ""
18 | for wc in self.mdv.WhereClauseDef:
19 | comment_oid = ""
20 | if wc.CommentOID:
21 | comment_oid = wc.CommentOID
22 | for rc in wc.RangeCheck:
23 | dataset = self._get_dataset_name(rc.ItemOID)
24 | variable_name = self._get_variable_name(rc.ItemOID)
25 | value = self._load_check_values(rc)
26 | # TODO fix the multiple level RC with join
27 | writer.writerow([wc.OID, dataset, variable_name, rc.Comparator, value, comment_oid])
28 |
29 | def _get_dataset_name(self, item_oid):
30 | for igd in self.mdv.ItemGroupDef:
31 | ir = igd.find("ItemRef", "ItemOID", item_oid)
32 | if ir:
33 | return igd.Name
34 | raise ValueError(f"Dataset for ItemRef {item_oid} not found in the Define-XML file")
35 |
36 | def _get_variable_name(self, item_oid):
37 | item = self.mdv.find("ItemDef", "OID", item_oid)
38 | if item:
39 | return item.Name
40 | else:
41 | raise ValueError(f"ItemDef for ItemRef {item_oid} not found in the Define-XML file")
42 |
43 | def _load_check_values(self, rc):
44 | check_values = []
45 | for cv in rc.CheckValue:
46 | if cv._content:
47 | check_values.append(cv._content)
48 | else:
49 | check_values.append("")
50 | return ",".join(check_values)
51 |
--------------------------------------------------------------------------------
/define2xls/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /../../../../../../../:\Users\shume\Dropbox\odmlib_examples\define2xls\.idea/dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 |
--------------------------------------------------------------------------------
/define2xls/.idea/define2xls.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/define2xls/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/define2xls/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/define2xls/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/define2xls/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/define2xls/README.md:
--------------------------------------------------------------------------------
1 | # define2xls
2 |
3 | ## Introduction
4 | Use the define2-1-to-xlsx example instead of this one. The Define-XML v2.1 examples are getting more use and testing
5 | creating study Define-XML files, so they're getting updated more frequently.
6 |
7 | The define2xls program is an odmlib example application that generates an Excel spreadsheet that contains the content
8 | of a Define-XML v2.0 file. The Exel spreadsheet version of the makes it easier for many to edit or create new content
9 | to include in a Define-XML v2.0 file. The companion xls2define program takes the updated spreadsheet and generates a
10 | Define-XML file. This example demonstrates some basic odmlib
11 | features.
12 |
13 | ## Getting Started
14 | To run define2xls.py from the command-line:
15 |
16 | `python define2xls.py -d ./data/sdtm-xls-define.xml -p ./data/`
17 |
18 | The odmlib package must be installed to run define2xls. See the [odmlib repository](https://github.com/swhume/odmlib)
19 | to get the odmlib package. Eventually, it may make its way into PyPi, but for now you'll need to install from the
20 | source. The odmlib README provides instructions for getting started.
21 |
22 | ## Limitations
23 | The odmlib examples are basic programs intended to demonstrate some of the basic capabilities of odmlib.
24 | The examples are not complete, production ready applications.
25 |
26 | The odmlib package is still in development. Although odmlib supports all of ODM more work remains
27 | to complete all features for processing ClinicalData. The initial focus has been on getting
28 | the metadata sections complete.
--------------------------------------------------------------------------------
/define2xls/codelists.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class CodeLists:
6 | HEADERS = ["OID", "Name", "NCI Codelist Code", "Data Type", "Order", "Term", "NCI Term Code", "Decoded Value"]
7 |
8 | def __init__(self, odmlib_mdv, data_path):
9 | self.mdv = odmlib_mdv
10 | self.path = data_path
11 | self.file_name = os.path.join(self.path, "codelists.csv")
12 |
13 | def extract(self):
14 | with open(self.file_name, 'w', newline='') as f:
15 | writer = csv.writer(f, dialect="excel")
16 | writer.writerow(self.HEADERS)
17 | for cl in self.mdv.CodeList:
18 | if cl.EnumeratedItem:
19 | self._write_enumerated_item_row(cl, writer)
20 | elif cl.CodeListItem:
21 | self._write_code_list_item_row(cl, writer)
22 |
23 | def _write_enumerated_item_row(self, cl, writer):
24 | cl_c_code = ""
25 | if cl.Alias:
26 | cl_c_code = cl.Alias[0].Name
27 | for ei in cl.EnumeratedItem:
28 | order_number = ""
29 | if ei.OrderNumber:
30 | order_number = ei.OrderNumber
31 | ei_c_code = ""
32 | if ei.Alias:
33 | ei_c_code = ei.Alias[0].Name
34 | writer.writerow([cl.OID, cl.Name, cl_c_code, cl.DataType, order_number, ei.CodedValue, ei_c_code, ""])
35 |
36 | def _write_code_list_item_row(self, cl, writer):
37 | cl_c_code = ""
38 | if cl.Alias:
39 | cl_c_code = cl.Alias[0].Name
40 | for cli in cl.CodeListItem:
41 | order_number = ""
42 | if cli.OrderNumber:
43 | order_number = cli.OrderNumber
44 | cli_c_code = ""
45 | if cli.Alias:
46 | cli_c_code = cli.Alias[0].Name
47 | decode = cli.Decode.TranslatedText[0]._content
48 | writer.writerow([cl.OID, cl.Name, cl_c_code, cl.DataType, order_number, cli.CodedValue, cli_c_code, decode])
49 |
--------------------------------------------------------------------------------
/define2xls/comments.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class Comments:
6 | HEADERS = ["OID", "Description", "Document", "Pages"]
7 |
8 | def __init__(self, odmlib_mdv, data_path):
9 | self.mdv = odmlib_mdv
10 | self.path = data_path
11 | self.file_name = os.path.join(self.path, "comments.csv")
12 |
13 | def extract(self):
14 | with open(self.file_name, 'w', newline='') as f:
15 | writer = csv.writer(f, dialect="excel")
16 | writer.writerow(self.HEADERS)
17 | for com in self.mdv.CommentDef:
18 | leaf_id = ""
19 | page_refs = ""
20 | if com.DocumentRef:
21 | leaf_id = com.DocumentRef[0].leafID
22 | if com.DocumentRef[0].PDFPageRef:
23 | page_refs = com.DocumentRef[0].PDFPageRef[0].PageRefs
24 | comment = " ".join(com.Description.TranslatedText[0]._content.split())
25 | writer.writerow([com.OID, comment, leaf_id, page_refs])
26 |
--------------------------------------------------------------------------------
/define2xls/data/comments.csv:
--------------------------------------------------------------------------------
1 | OID,Description,Document,Pages
2 | COM.AGEU,Defaulted to YEARS,,
3 | COM.ARM,Assigned from TA.ARM based on ARMCD.,,
4 | COM.ARMCD,Assigned based on Randomization Number. See Note 2.1,LF.ReviewersGuide,
5 | COM.CMCLAS,Coded to ATC level 3 Term based on CMINDC,,
6 | COM.CMCLASCD,Coded to ATC level 3 Code based on CMINDC,,
7 | COM.CMROUTE,Free text from CRF mapped to CDISC CT,,
8 | COM.DOMAIN.DM,"See Reviewer's Guide, Section 2.1 Demographics",LF.ReviewersGuide,section2.1
9 | COM.DOMAIN.QS,"QS is submitted as a split dataset. The split was done based on QSCAT as QSCG (CLINICAL GLOBAL IMPRESSIONS), QSCS (CORNELL SCALE FOR DEPRESSION INDEMENTIA) and QSMM (MINI MENTAL STATE EXAMINATION). See additional documentation in the Reviewer's Guide, Split Datasets Section.",LF.ReviewersGuide,
10 | COM.DSDECOD,CRF controlled terminology was mapped to match CDISC controlled terminology.,,
11 | COM.EG.VISITNUM,Assigned from the TV domain based on the VISIT,,
12 | COM.EGEVAL,Equal to INVESTIGATOR for CRF data,,
13 | COM.EGPOS,Equal to SUPINE,,
14 | COM.EGSPID,ECG parameter ordering variable,,
15 | COM.IDVAR,Name of the variables for the related records.,,
16 | COM.IDVARVAL,Value of identifying variable described in IDVAR.,,
17 | COM.IE.VISITNUM,Assigned from the TV domain based on the VISIT,,
18 | COM.LBREFID,Accession number,,
19 | COM.MHBODSYS,Assigned for Medical History but not Psychiatric History,,
20 | COM.MHENRF,CRF controlled terminology was mapped to match CDISC controlled terminology.,,
21 | COM.PE.VISITNUM,Assigned from the TV domain based on the VISIT,,
22 | COM.QS.VISITNUM,Assigned from the TV domain based on the VISIT,,
23 | COM.RELTYPE,All values are null since this is used only when identifying a dataset-level relationship.,,
24 | COM.STUDY.DATA,The data submitted only includes subjects in the USA since other sites did not enroll any subjects.,,
25 | COM.SUBJECTDATA-JOIN-DM,"Join any Subject Level dataset with the Demographics dataset based on [IG.datasetname]IT.USUBJID = [IG.DM]IT.USUBJID, assuming 'IG.datasetname' is the OID of the ItemGroupDef that defines the subject-level dataset to be joined with the Demographics dataset.",,
26 | COM.SUPPQS.QVAL.RTRINIT,QSMM-CRF Page 13; QSCS-CRF Pages 14; QSCG-CRF Page 17,,
27 | COM.VS.VISITNUM,Assigned from the TV domain based on the VISIT,,
28 | COM.VSSTRESU,Standard units consistent with CDISC controlled terminology,,
29 |
--------------------------------------------------------------------------------
/define2xls/data/datasets.csv:
--------------------------------------------------------------------------------
1 | Dataset,Description,Class,Structure,Purpose,Repeating,Reference Data,Comment
2 | AE,Adverse Events,EVENTS,One record per adverse event per subject,Tabulation,Yes,No,
3 | CM,Concomitant Medications,INTERVENTIONS,One record per recorded medication occurrence or constant-dosing interval per subject,Tabulation,Yes,No,
4 | DA,Drug Accountability,FINDINGS,One record per drug accountability finding per subject,Tabulation,Yes,No,
5 | DM,Demographics,SPECIAL PURPOSE,One record per subject,Tabulation,No,No,COM.DOMAIN.DM
6 | DS,Disposition,EVENTS,One record per disposition status or protocol milestone per subject,Tabulation,Yes,No,
7 | EG,ECG Test Results,FINDINGS,One record per ECG observation per visit per subject,Tabulation,Yes,No,
8 | EX,Exposure,INTERVENTIONS,One record per constant dosing interval per subject,Tabulation,Yes,No,
9 | IE,Inclusion/Exclusion Criteria Not Met,FINDINGS,One record per inclusion/exclusion criterion not met per subject,Tabulation,Yes,No,
10 | LB,Laboratory Tests Results,FINDINGS,One record per analyte per visit per subject,Tabulation,Yes,No,
11 | MH,Medical History,EVENTS,One record per medical history event per subject,Tabulation,Yes,No,
12 | PE,Physical Examination,FINDINGS,One record per body system or abnormality per visit per subject,Tabulation,Yes,No,
13 | QSCG,Questionnaire-QSCG,FINDINGS,One record per questionnaire per question per visit per subject,Tabulation,Yes,No,COM.DOMAIN.QS
14 | QSCS,Questionnaire-QSCS,FINDINGS,One record per questionnaire per question per visit per subject,Tabulation,Yes,No,COM.DOMAIN.QS
15 | QSMM,Questionnaire-QSMM,FINDINGS,One record per questionnaire per question per visit per subject,Tabulation,Yes,No,COM.DOMAIN.QS
16 | RELREC,Related Records,RELATIONSHIP,"One record per related record, group of records or dataset",Tabulation,Yes,No,
17 | SC,Subject Characteristics,FINDINGS,One record per characteristic per subject,Tabulation,No,No,
18 | SE,Subject Elements,SPECIAL PURPOSE,One record per actual Element per subject,Tabulation,Yes,No,
19 | SUPPAE,Supplemental Qualifiers for AE,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No,
20 | SUPPCM,Supplemental Qualifiers for CM,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No,
21 | SUPPDM,Supplemental Qualifiers for DM,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No,
22 | SUPPEG,Supplemental Qualifiers for EG,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No,
23 | SUPPEX,Supplemental Qualifiers for EX,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No,
24 | SUPPLB,Supplemental Qualifiers for LB,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No,
25 | SUPPQSCG,Supplemental Qualifiers for QSCG,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No,
26 | SUPPQSCS,Supplemental Qualifiers for QSCS,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No,
27 | SUPPQSMM,Supplemental Qualifiers for QSMM,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No,
28 | SUPPVS,Supplemental Qualifiers for VS,RELATIONSHIP,"One record per IDVAR, IDVARVAL, and QNAM value per subject",Tabulation,Yes,No,
29 | SV,Subject Visits,SPECIAL PURPOSE,One record per actual visit per subject,Tabulation,Yes,No,
30 | TA,Trial Arms,TRIAL DESIGN,One record per planned Element per Arm,Tabulation,No,Yes,
31 | TE,Trial Elements,TRIAL DESIGN,One record per planned Element,Tabulation,No,Yes,
32 | TI,Trial Inclusion/Exclusion Criteria,TRIAL DESIGN,One record per I/E criterion,Tabulation,No,Yes,
33 | TS,Trial Summary,TRIAL DESIGN,One record per trial summary parameter value,Tabulation,No,Yes,
34 | TV,Trial Visits,TRIAL DESIGN,One record per planned Visit per Arm,Tabulation,No,Yes,
35 | VS,Vital Signs,FINDINGS,One record per vital sign measurement per visit per subject,Tabulation,Yes,No,
36 |
--------------------------------------------------------------------------------
/define2xls/data/dictionaries.csv:
--------------------------------------------------------------------------------
1 | OID,Name,Data Type,Dictionary,Version
2 | CL.AEDICT_F,Adverse Event Dictionary,text,MEDDRA,8.0
3 | CL.DRUGDICT_F,Drug Dictionary,text,WHODRUG,200204
4 | CL.ISO3166,ISO3166,text,ISO3166,
5 |
--------------------------------------------------------------------------------
/define2xls/data/documents.csv:
--------------------------------------------------------------------------------
1 | ID,Title,Href
2 | LF.ReviewersGuide,Reviewers Guide,reviewersguide.pdf
3 | LF.ComplexAlgorithms,Complex Algorithms,complexalgorithms.pdf
4 | LF.blankcrf,Annotated Case Report Form,blankcrf.pdf
5 |
--------------------------------------------------------------------------------
/define2xls/data/odmlib-define-metadata-clean.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/define2xls/data/odmlib-define-metadata-clean.xlsx
--------------------------------------------------------------------------------
/define2xls/data/odmlib-define-metadata-save.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/define2xls/data/odmlib-define-metadata-save.xlsx
--------------------------------------------------------------------------------
/define2xls/data/odmlib-define-metadata-temp.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/define2xls/data/odmlib-define-metadata-temp.xlsx
--------------------------------------------------------------------------------
/define2xls/data/odmlib-define-metadata.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/define2xls/data/odmlib-define-metadata.xlsx
--------------------------------------------------------------------------------
/define2xls/data/study.csv:
--------------------------------------------------------------------------------
1 | Attribute,Value
2 | StudyName,CDISC01
3 | StudyDescription,CDISC Test Study
4 | ProtocolName,CDISC01
5 | StandardName,SDTM-IG
6 | StandardVersion,3.1.2
7 | Language,en
8 |
--------------------------------------------------------------------------------
/define2xls/datasets.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class Datasets:
6 | HEADERS = ["Dataset", "Description", "Class", "Structure", "Purpose", "Repeating", "Reference Data", "Comment"]
7 |
8 | def __init__(self, odmlib_mdv, data_path):
9 | self.mdv = odmlib_mdv
10 | self.path = data_path
11 | self.file_name = os.path.join(self.path, "datasets.csv")
12 |
13 | def extract(self):
14 | with open(self.file_name, 'w', newline='') as f:
15 | writer = csv.writer(f, dialect="excel")
16 | writer.writerow(self.HEADERS)
17 | for ig in self.mdv.ItemGroupDef:
18 | writer.writerow([ig.Name, ig.Description.TranslatedText[0]._content, ig.Class, ig.Structure, ig.Purpose,
19 | ig.Repeating, ig.IsReferenceData, ig.CommentOID])
20 |
--------------------------------------------------------------------------------
/define2xls/define2xls.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import odmlib.define_loader as OL
3 | import odmlib.loader as LD
4 | import excel_define_file as EX
5 | import odmlib.odm_parser as P
6 | import xmlschema as XSD
7 | import os
8 | import study, datasets, variables, value_level as valuelevel, where_clauses as whereclauses, codelists, dictionaries
9 | import methods, comments, documents
10 |
11 | WORKSHEETS = ["Study", "Datasets", "Variables", "ValueLevel", "WhereClauses", "CodeLists", "Dictionaries", "Methods",
12 | "Comments", "Documents"]
13 | EXCEL_NAME = "odmlib-define-metadata.xlsx"
14 |
15 | """
16 | define2xls.py - an example program using odmlib to convert a Define-XML file into a metadata spreadsheet
17 | ex. cmd-line args: -d ./data/sdtm-xls-define.xml -p ./data/
18 | ex. cmd-line args: -d C:\\Users\\shume\\Dropbox\\odm_api\\odm_360\\xls2define\\data\\sdtm-xls-define.xml -p ./data/ -v
19 | -s "C:\\Users\\shume\\Dropbox\\04. XML Tech\\Define-XML\\define_xml_2_0\\define_xml_2_0_releasepackage20140424\\schema\\cdisc-define-2.0\\define2-0-0.xsd"
20 | """
21 |
22 | class Define2Xls:
23 | """ generate a metadata spreadsheet from a Define-XML v2.0 file """
24 | def __init__(self, define_file, excel_path, excel_filename=EXCEL_NAME, language="en"):
25 | self.define_file = define_file
26 | self.data_path = excel_path
27 | self.excel_filename = excel_filename
28 | self.lang = language
29 |
30 | def create(self):
31 | loader = LD.ODMLoader(OL.XMLDefineLoader())
32 | loader.open_odm_document(self.define_file)
33 | mdv_odmlib = loader.MetaDataVersion()
34 | study_odmlib = loader.Study()
35 | ws_files = []
36 | for worksheet in WORKSHEETS:
37 | if worksheet == "Study":
38 | ws = eval(worksheet.lower() + "." + worksheet + "(study_odmlib, mdv_odmlib, self.data_path, self.lang)")
39 | else:
40 | ws = eval(worksheet.lower() + "." + worksheet + "(mdv_odmlib, self.data_path)")
41 | ws.extract()
42 | ws_files.append(ws.file_name)
43 | self._write_excel(ws_files)
44 |
45 | def _write_excel(self, ws_files):
46 | excel = EX.ExcelDefineFile(ws_files, WORKSHEETS, self.data_path, self.excel_filename)
47 | excel.create_excel()
48 |
49 |
50 | class DefineValidator:
51 | """ Define-XML schema validation """
52 | def __init__(self, schema, define_file):
53 | """
54 | :param schema: str - the path and filename for the Define-XML schema
55 | :param define_file: str - the path and filename for the Define-XML to validate
56 | """
57 | self.schema_file = schema
58 | self.define_file = define_file
59 |
60 | def validate(self):
61 | """" execute the schema validation and report the results """
62 | validator = P.ODMSchemaValidator(self.schema_file)
63 | try:
64 | validator.validate_file(self.define_file)
65 | print("define-XML schema validation completed successfully...")
66 | except XSD.validators.exceptions.XMLSchemaChildrenValidationError as ve:
67 | print(f"schema validation errors: {ve}")
68 |
69 | def _check_file_existence(self):
70 | """ throw an error if the schema of Define-XML file cannot be found """
71 | if not os.path.isfile(self.schema_file):
72 | raise ValueError("The schema validate flag is set, but the schema file cannot be found.")
73 | if not os.path.isfile(self.define_file):
74 | raise ValueError("The define-xml file cannot be found.")
75 |
76 |
77 | def set_cmd_line_args():
78 | """
79 | get the command-line arguments needed to convert the Define-XML input file into Excel
80 | :return: return the argparse object with the command-line parameters
81 | """
82 | parser = argparse.ArgumentParser()
83 | parser.add_argument("-d", "--define", help="path and file name of Define-XML v2 input file", required=True,
84 | dest="define_file")
85 | parser.add_argument("-p", "--path", help="path to write the generated Excel file to", required=False,
86 | dest="excel_path", default="./")
87 | parser.add_argument("-e", "--excel", help="Name of Excel file without path", required=False,
88 | dest="excel_filename", default=EXCEL_NAME)
89 | parser.add_argument("-s", "--schema", help="path and file name of Define-XML schema", dest="schema_file")
90 | parser.add_argument("-v", "--validate", help="schema validate the Define-XML file", default=False, const=True,
91 | nargs='?', dest="is_validate")
92 | parser.add_argument("-l", "--lang", help="language code", default="en", dest="language", required=False)
93 | args = parser.parse_args()
94 | return args
95 |
96 |
97 | def main():
98 | """ main driver method that generates an Excel file using tje Define-XML v2.0 metadata """
99 | args = set_cmd_line_args()
100 | if args.is_validate:
101 | validator = DefineValidator(args.schema_file, args.define_file)
102 | validator.validate()
103 | d2x = Define2Xls(args.define_file, args.excel_path, args.excel_filename, args.language)
104 | d2x.create()
105 |
106 |
107 | if __name__ == "__main__":
108 | main()
109 |
--------------------------------------------------------------------------------
/define2xls/dictionaries.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class Dictionaries:
6 | HEADERS = ["OID", "Name", "Data Type", "Dictionary", "Version"]
7 |
8 | def __init__(self, odmlib_mdv, data_path):
9 | self.mdv = odmlib_mdv
10 | self.path = data_path
11 | self.file_name = os.path.join(self.path, "dictionaries.csv")
12 |
13 | def extract(self):
14 | with open(self.file_name, 'w', newline='') as f:
15 | writer = csv.writer(f, dialect="excel")
16 | writer.writerow(self.HEADERS)
17 | for cl in self.mdv.CodeList:
18 | if cl.ExternalCodeList.Dictionary:
19 | self._write_external_code_list_row(cl, writer)
20 |
21 | def _write_external_code_list_row(self, cl, writer):
22 | ext_cl = cl.ExternalCodeList
23 | version = ""
24 | if ext_cl.Version:
25 | version = ext_cl.Version
26 | writer.writerow([cl.OID, cl.Name, cl.DataType, ext_cl.Dictionary, version])
27 |
--------------------------------------------------------------------------------
/define2xls/documents.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class Documents:
6 | HEADERS = ["ID", "Title", "Href"]
7 |
8 | def __init__(self, odmlib_mdv, data_path):
9 | self.mdv = odmlib_mdv
10 | self.path = data_path
11 | self.file_name = os.path.join(self.path, "documents.csv")
12 |
13 | def extract(self):
14 | with open(self.file_name, 'w', newline='') as f:
15 | writer = csv.writer(f, dialect="excel")
16 | writer.writerow(self.HEADERS)
17 | for lf in self.mdv.leaf:
18 | writer.writerow([lf.ID, lf.title._content, lf.href])
19 |
--------------------------------------------------------------------------------
/define2xls/excel_define_file.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import xlsxwriter as XLS
3 | import os
4 |
5 |
6 | class ExcelDefineFile:
7 | def __init__(self, files, tabs, data_path, excel_filename):
8 | self.xlsx_file = os.path.join(data_path, excel_filename)
9 | self.files = files
10 | self.tabs = tabs
11 |
12 | def create_excel(self):
13 | workbook = XLS.Workbook(self.xlsx_file, {"strings_to_numbers": False})
14 | header_format = workbook.add_format({"bold": True, "bg_color": "#CCFFFF", "border": True, "border_color": "black"})
15 | for index, csv_file in enumerate(self.files):
16 | worksheet = workbook.add_worksheet(self.tabs[index])
17 | is_header_row = True
18 | try:
19 | with open(csv_file, 'rt', encoding='utf8') as f:
20 | reader = csv.reader(f)
21 | for r, row in enumerate(reader):
22 | for c, col in enumerate(row):
23 | if is_header_row:
24 | worksheet.write(r, c, col, header_format)
25 | worksheet.set_column(r, c, 30)
26 | else:
27 | worksheet.write(r, c, col)
28 | is_header_row = False
29 | except UnicodeDecodeError as ue:
30 | print(f"Encoding error writing load file for row {row} and col {col}: {ue}")
31 | workbook.close()
32 | return len(workbook.sheetnames)
33 |
--------------------------------------------------------------------------------
/define2xls/methods.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class Methods:
6 | HEADERS = ["OID", "Name", "Type", "Description", "Expression Context", "Expression Code", "Document", "Pages"]
7 |
8 | def __init__(self, odmlib_mdv, data_path):
9 | self.mdv = odmlib_mdv
10 | self.path = data_path
11 | self.file_name = os.path.join(self.path, "methods.csv")
12 |
13 | def extract(self):
14 | with open(self.file_name, 'w', newline='') as f:
15 | writer = csv.writer(f, dialect="excel")
16 | writer.writerow(self.HEADERS)
17 | for md in self.mdv.MethodDef:
18 | context = ""
19 | code = ""
20 | if md.FormalExpression:
21 | context = md.FormalExpression[0].Context
22 | code = md.FormalExpression[0]._content
23 | leaf_id = ""
24 | page_refs = ""
25 | if md.DocumentRef:
26 | leaf_id = md.DocumentRef[0].leafID
27 | page_refs = md.DocumentRef[0].PDFPageRef[0].PageRefs
28 | description = " ".join(md.Description.TranslatedText[0]._content.split())
29 | writer.writerow([md.OID, md.Name, md.Type, description, context, code,
30 | leaf_id, page_refs])
31 |
--------------------------------------------------------------------------------
/define2xls/requirements.txt:
--------------------------------------------------------------------------------
1 | odmlib>=0.1.4
2 | xmlschema>=1.4.1
3 | XlsxWriter>=1.3.7
--------------------------------------------------------------------------------
/define2xls/study.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class Study:
6 | HEADERS = ["Attribute", "Value"]
7 |
8 | def __init__(self, odmlib_study, odmlib_mdv, data_path, language="en"):
9 | self.study = odmlib_study
10 | self.mdv = odmlib_mdv
11 | self.path = data_path
12 | self.language = language
13 | self.file_name = os.path.join(self.path, "study.csv")
14 |
15 | def extract(self):
16 | print(f"Study OID: {self.study.GlobalVariables.StudyName}")
17 | with open(self.file_name, 'w', newline='') as f:
18 | writer = csv.writer(f, dialect="excel")
19 | writer.writerow(self.HEADERS)
20 | writer.writerow(["StudyName", self.study.GlobalVariables.StudyName])
21 | writer.writerow(["StudyDescription", self.study.GlobalVariables.StudyDescription])
22 | writer.writerow(["ProtocolName", self.study.GlobalVariables.ProtocolName])
23 | writer.writerow(["StandardName", self.mdv.StandardName])
24 | writer.writerow(["StandardVersion", self.mdv.StandardVersion])
25 | writer.writerow(["Language", self.language])
26 |
--------------------------------------------------------------------------------
/define2xls/value_level.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class ValueLevel:
6 | HEADERS = ["Order", "Dataset", "Variable", "Where Clause", "Data Type", "Length", "Significant Digits", "Format",
7 | "Mandatory", "Codelist", "Origin", "Pages", "Method", "Predecessor", "Comment"]
8 |
9 | def __init__(self, odmlib_mdv, data_path):
10 | self.mdv = odmlib_mdv
11 | self.path = data_path
12 | self.file_name = os.path.join(self.path, "valuelevel.csv")
13 |
14 | def extract(self):
15 | with open(self.file_name, 'w', newline='') as f:
16 | writer = csv.writer(f, dialect="excel")
17 | writer.writerow(self.HEADERS)
18 | for vl in self.mdv.ValueListDef:
19 | for ir in vl.ItemRef:
20 | # assumes all ItemDefs are referenced by an ItemRef
21 | ird = self._load_item_ref(ir)
22 | idd = self._load_item_def(ir.ItemOID)
23 | # using OID to get dataset is a hack, but dataset column only used to create the OID for VLD
24 | dataset = vl.OID.split(".")[1]
25 | writer.writerow([ird["Order"], dataset, idd["Variable"], ird["Where Clause"], idd["Data Type"],
26 | idd["Length"], idd["Significant Digits"], idd["Format"], ird["Mandatory"],
27 | idd["Codelist"], idd["Origin"], idd["Pages"], ird["Method"],
28 | idd["Predecessor"], idd["Comment"]])
29 |
30 | def _load_item_ref(self, ir):
31 | ird = {}
32 | ird["Order"] = ir.OrderNumber
33 | ird["Mandatory"] = ir.Mandatory
34 | ird["Method"] = ir.MethodOID
35 | ird["Where Clause"] = self._get_where_clause_oid(ir)
36 | return ird
37 |
38 | def _load_item_def(self, item_oid):
39 | idd = {}
40 | it = self.mdv.find("ItemDef", "OID", item_oid)
41 | idd["Variable"] = it.Name
42 | idd["Data Type"] = it.DataType
43 | idd["Length"] = it.Length
44 | idd["Significant Digits"] = it.SignificantDigits
45 | idd["Format"] = it.DisplayFormat
46 | idd["Codelist"] = it.CodeListRef.CodeListOID if it.CodeListRef is not None else ""
47 | idd["Origin"] = it.Origin.Type if it.Origin else ""
48 | idd["Pages"] = it.Origin.DocumentRef[0].PDFPageRef[0].PageRefs \
49 | if it.Origin.DocumentRef and it.Origin.DocumentRef[0].PDFPageRef else ""
50 | idd["Predecessor"] = it.Origin.Description.TranslatedText[0]._content if it.Origin.Type == "Predecessor" else ""
51 | idd["Comment"] = it.CommentOID if it.CommentOID else ""
52 | return idd
53 |
54 | def _get_where_clause_oid(self, item):
55 | wc_oids = []
56 | for wc in item.WhereClauseRef:
57 | wc_oids.append(wc.WhereClauseOID)
58 | return "'".join(wc_oids)
--------------------------------------------------------------------------------
/define2xls/variables.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class Variables:
6 | HEADERS = ["Order", "Dataset", "Variable", "Label", "Data Type", "Length", "Significant Digits", "Format",
7 | "KeySequence", "Mandatory", "CodeList", "Valuelist", "Origin", "Pages", "Method", "Predecessor",
8 | "Role", "Comment"]
9 |
10 | def __init__(self, odmlib_mdv, data_path):
11 | self.mdv = odmlib_mdv
12 | self.path = data_path
13 | self.file_name = os.path.join(self.path, "variables.csv")
14 |
15 | def extract(self):
16 | with open(self.file_name, 'w', newline='') as f:
17 | writer = csv.writer(f, dialect="excel")
18 | writer.writerow(self.HEADERS)
19 | for ig in self.mdv.ItemGroupDef:
20 | for ir in ig.ItemRef:
21 | # assumes all ItemDefs are referenced by an ItemRef
22 | ird = self._load_item_ref(ir)
23 | idd = self._load_item_def(ir.ItemOID)
24 | writer.writerow([ird["Order"], ig.Name, idd["Variable"], idd["Label"], idd["Data Type"], idd["Length"],
25 | idd["Significant Digits"], idd["Format"], ird["KeySequence"], ird["Mandatory"],
26 | idd["Codelist"], idd["Valuelist"], idd["Origin"], idd["Pages"], ird["Method"],
27 | idd["Predecessor"], ird["Role"], idd["Comment"]])
28 |
29 | def _load_item_ref(self, ir):
30 | ird = {}
31 | ird["Order"] = ir.OrderNumber
32 | ird["Mandatory"] = ir.Mandatory
33 | ird["KeySequence"] = ir.KeySequence
34 | ird["Method"] = ir.MethodOID
35 | ird["Role"] = ir.Role
36 | return ird
37 |
38 | def _load_item_def(self, item_oid):
39 | idd = {}
40 | it = self.mdv.find("ItemDef", "OID", item_oid)
41 | idd["Variable"] = it.Name
42 | idd["Data Type"] = it.DataType
43 | idd["Length"] = it.Length
44 | idd["Significant Digits"] = it.SignificantDigits
45 | idd["Format"] = it.DisplayFormat
46 | idd["Label"] = " ".join(it.Description.TranslatedText[0]._content.split())
47 | idd["Codelist"] = it.CodeListRef.CodeListOID if it.CodeListRef is not None else ""
48 | idd["Valuelist"] = it.ValueListRef.ValueListOID if it.ValueListRef else ""
49 | idd["Origin"] = it.Origin.Type if it.Origin else ""
50 | idd["Pages"] = it.Origin.DocumentRef[0].PDFPageRef[0].PageRefs \
51 | if it.Origin.DocumentRef and it.Origin.DocumentRef[0].PDFPageRef else ""
52 | idd["Predecessor"] = it.Origin.Description.TranslatedText[0]._content if it.Origin.Type == "Predecessor" else ""
53 | idd["Comment"] = it.CommentOID if it.CommentOID else ""
54 | return idd
55 |
--------------------------------------------------------------------------------
/define2xls/where_clauses.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 |
4 |
5 | class WhereClauses:
6 | HEADERS = ["OID", "Dataset", "Variable", "Comparator", "Value", "Comment"]
7 |
8 | def __init__(self, odmlib_mdv, data_path):
9 | self.mdv = odmlib_mdv
10 | self.path = data_path
11 | self.file_name = os.path.join(self.path, "whereclauses.csv")
12 |
13 | def extract(self):
14 | with open(self.file_name, 'w', newline='') as f:
15 | writer = csv.writer(f, dialect="excel")
16 | writer.writerow(self.HEADERS)
17 | wc_oid = ""
18 | for wc in self.mdv.WhereClauseDef:
19 | # using OID to get dataset is a hack, but dataset column only used to create the OID for def:ItemOID
20 | dataset = wc.OID.split(".")[1]
21 | comment_oid = ""
22 | if wc.CommentOID:
23 | comment_oid = wc.CommentOID
24 | for rc in wc.RangeCheck:
25 | value = self._load_check_values(rc)
26 | variable_name = self._extract_variable_name(rc.ItemOID)
27 | writer.writerow([wc.OID, dataset, variable_name, rc.Comparator, value, comment_oid])
28 |
29 | def _load_check_values(self, rc):
30 | check_values = []
31 | for cv in rc.CheckValue:
32 | check_values.append(cv._content)
33 | return ",".join(check_values)
34 |
35 | def _extract_variable_name(self, item_oid):
36 | return item_oid.split(".")[-1]
--------------------------------------------------------------------------------
/get_started/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 |
--------------------------------------------------------------------------------
/get_started/.idea/get_started.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/get_started/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/get_started/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/get_started/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/get_started/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/get_started/README.md:
--------------------------------------------------------------------------------
1 | # odmlib examples
2 |
3 | ## Introduction
4 | The odmlib examples are small applications that demonstrate the use of the odmlib Python package for creating
5 | and process ODM files, including extensions like Define-XML. The examples are intended to make it easier
6 | to get started using the odmlib package.
7 |
8 | The odmlib package simplifies working with the CDISC ODM data exchange standard and its extensions, such as
9 | Define-XML, in Python. The odmlib package provides an object-oriented interface to working with ODM documents
10 | that simplifies creating and processing them.
11 |
12 | ## Why odmlib?
13 | The odmlib package satisfies my personal interest in working with ODM using an object-oriented
14 | interface in Python.
15 |
16 | ## Getting Started
17 | See the [odmlib repository](https://github.com/swhume/odmlib) to get the odmlib pacakge. Eventually, it may
18 | make its way into PyPi, but for now you'll need to install from the source. The odmlib README provides
19 | instructions for getting started.
20 |
21 | ## Limitations
22 | The odmlib examples are simple programs intended to demonstrate some of the basic capabilities of odmlib.
23 | The examples are not complete, production ready applications.
24 |
25 | The odmlib package is still in development. Although odmlib supports all of ODM more work remains
26 | to complete all features for processing ClinicalData. The initial focus has been on getting
27 | the metadata sections complete.
--------------------------------------------------------------------------------
/get_started/data/odm_demo.xml:
--------------------------------------------------------------------------------
1 |
2 | Get Started with ODM XMLDemo to get started with odmlibODM XML Get StartedGet Started ProtocolDate of measurementsDateResult of the vital signs measurement as originally received or collected.DiastolicNoYesConcatenation of BRTHYR, BRTHMO, and BRTHDY in ISO 8601 format
--------------------------------------------------------------------------------
/get_started/requirements.txt:
--------------------------------------------------------------------------------
1 | pip>=20.3.3
2 | xmlschema>=1.10.0
3 | validators>=0.18.2
4 | elementpath>=2.5.0
5 | setuptools>=51.1.2
6 | six>=1.15.0
7 | decorator>=4.4.2
8 | odmlib>=0.1.4
--------------------------------------------------------------------------------
/library_xml/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Editor-based HTTP Client requests
5 | /httpRequests/
6 | # Datasource local storage ignored files
7 | /dataSources/
8 | /dataSources.local.xml
9 |
--------------------------------------------------------------------------------
/library_xml/.idea/.name:
--------------------------------------------------------------------------------
1 | library_1_0
--------------------------------------------------------------------------------
/library_xml/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/library_xml/.idea/library_1_0.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/library_xml/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/library_xml/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/library_xml/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/library_xml/README.md:
--------------------------------------------------------------------------------
1 | # library_xml
2 |
3 | ## Introduction
4 | library_xml retrieves a standard from the CDISC Library as Library-XML (an ODM media type) and loads it into odmlib
5 | for processing. This example program includes the models for Library-XML based on ODM (e.g. CDASHIG) and Define-XML
6 | (e.g. SDTMIG). Library-XML shows how to use external odmlib models (ODM extensions) and can be used as a method for
7 | implementing other extensions. That is, this example includes the Library-XML models to be used with odmlib.
8 | Library-XML also could be enhanced to process standards retrieved from the CDISC Library using the odm+xml media type.
9 |
10 | This example application uses the CDISC Library API. In order to run the application you will need to create an
11 | account and use your own API key.
12 |
13 | ## Getting Started
14 | To run library-xml.py from the command-line to retrieve SDTMIG v3.4:
15 |
16 | `python library-xml.py -d -e "/mdr/sdtmig/3-4" -k e5a7d2b9bg1a4066ae4b25133a091574`
17 |
18 | The -d indicates that the standard retrieved uses the Define-XML model for Library-XML. You will need to replace the
19 | -k value with your own CDISC Library API Key. The endpoint to retrieve is specified in the -e value. Since no -f was
20 | provided, the default output filename used.
21 |
22 | Or, to run it to retrieve CDASHIG v2.2:
23 |
24 | `python library-xml.py -e "/mdr/cdashig/2-2" -f library-odmlib-cdashig2-2.json -k e5a7d2b9bg1a4066ae4b25133a091574`
25 |
26 | In this example, the output filename was specified in the value of -f.
27 |
28 | The odmlib package must be installed to run library-xml. See the
29 | [odmlib repository](https://github.com/swhume/odmlib) to install the odmlib source code and latest features.
30 | The odmlib package can also be installed from PyPi with the understanding that it is still in development
31 | so might not have everything available in the odmlib repository. It can be installed from PyPi using:
32 |
33 | 'pip install odmlib'
34 |
35 | The odmlib README provides instructions for getting started.
36 |
37 | ## Limitations
38 | The odmlib examples are basic programs intended to introduce programmers to the basic capabilities of odmlib.
39 | The examples are not complete, production ready applications. However, I'm happy to update these applications
40 | to accommodate new feature or bug fixes and will also review pull requests.
41 |
42 | The Library-XML program demonstrates the use of the Library-XML extension and could be enhanced to address real
43 | use cases.
44 |
45 | To use the CDISC Library API you will need to create an account and use your credentials to authenticate.
46 |
47 | The odmlib package is still in development.
--------------------------------------------------------------------------------
/library_xml/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/library_xml/__init__.py
--------------------------------------------------------------------------------
/library_xml/library_define_1_0/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/library_xml/library_define_1_0/__init__.py
--------------------------------------------------------------------------------
/library_xml/library_odm_1_0/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/library_xml/library_odm_1_0/__init__.py
--------------------------------------------------------------------------------
/library_xml/library_xml.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import os
3 | import argparse
4 | import odmlib.ns_registry as NS
5 | import odmlib.define_loader as DL
6 | import odmlib.odm_loader as OL
7 |
8 | """
9 | Example Cmd-line Args:
10 | SDTMIG v3.4: -d -e "/mdr/sdtmig/3-4" -k e5a7d2b9bg1a4066ae4b25133a091574
11 | CDASHIG v2.2: -e "/mdr/cdashig/2-2" -f library-odmlib-cdashig2-2.json -k e5a7d2b9bg1a4066ae4b25133a091574
12 | NOTE: you will need to replace the -k arg with our own CDISC Library API key
13 | """
14 |
15 |
16 | def write_odm_as_json(odm, filename):
17 | print(f"Saving {odm.Study[0].GlobalVariables.StudyName} in Library-XML version {odm.LibraryXMLVersion} as JSON")
18 | with open(filename, 'w') as f:
19 | f.write(odm.to_json())
20 |
21 |
22 | def load_odmlib(endpoint, filename, model_package, ns, api_key):
23 | base_url = "https://library.cdisc.org/api"
24 | headers = {"Accept": "application/odm+xml", "User-Agent": "crawler", "api-key": api_key}
25 | r = requests.get(base_url + endpoint, headers=headers)
26 | if r.status_code == 200:
27 | if "define" in model_package:
28 | loader = DL.XMLDefineLoader(model_package=model_package, ns_uri="http://www.cdisc.org/ns/library-xml/v1.0", local_model=True)
29 | else:
30 | loader = OL.XMLODMLoader(model_package=model_package, ns_uri="http://www.cdisc.org/ns/library-xml/v1.0", local_model=True)
31 | loader.create_document_from_string(r.text, ns)
32 | odm = loader.load_odm()
33 | write_odm_as_json(odm, filename)
34 | else:
35 | if r.status_code == "406":
36 | print(f"{endpoint} is not available from CDISC Library as odm+xml")
37 | else:
38 | print(f"HTTPError {r.status_code} for url {base_url + endpoint}")
39 |
40 |
41 | def set_cmd_line_args():
42 | parser = argparse.ArgumentParser()
43 | parser.add_argument("-f", "--file", help="json file name to write output", required=False,
44 | dest="file_out", default="library-odmlib.json")
45 | parser.add_argument("-e", "--endpoint", help="CDISC Library API endpoint to retrieve", required=True,
46 | dest="endpoint", )
47 | parser.add_argument("-k", "--apikey", help="the CDISC Library API Key", required=True, dest="api_key")
48 | parser.add_argument("-d", "--define", help="is the Library-XML content in Define-XML?", default=False, const=True,
49 | nargs='?', dest="is_define")
50 | args = parser.parse_args()
51 | return args
52 |
53 |
54 | if __name__ == '__main__':
55 | args = set_cmd_line_args()
56 | if args.is_define:
57 | model_package = "library_define_1_0"
58 | NS.NamespaceRegistry(prefix="def", uri="http://www.cdisc.org/ns/def/v2.1")
59 | else:
60 | model_package = "library_odm_1_0"
61 | ns = NS.NamespaceRegistry(prefix="mdr", uri="http://www.cdisc.org/ns/library-xml/v1.0")
62 |
63 | print(f"Requesting {args.endpoint} from the CDISC Library...")
64 | odmlib_json_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', args.file_out)
65 | load_odmlib(args.endpoint, odmlib_json_file, model_package, ns, args.api_key)
66 |
--------------------------------------------------------------------------------
/library_xml/requirements.txt:
--------------------------------------------------------------------------------
1 | requests>=2.26.0
2 | odmlib>=0.1.4
3 |
--------------------------------------------------------------------------------
/library_xml/tests/test_local_library_loader.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import odmlib.ns_registry as NS
3 | import odmlib.define_loader as DL
4 | import odmlib.odm_loader as OL
5 | from odmlib.define_2_1.rules import oid_ref as OID
6 |
7 |
8 | class TestLocalLibraryLoader(unittest.TestCase):
9 | def test_odmlib_sdtmig(self):
10 | model_package = "library_define_1_0"
11 | NS.NamespaceRegistry(prefix="def", uri="http://www.cdisc.org/ns/def/v2.1")
12 | ns = NS.NamespaceRegistry(prefix="mdr", uri="http://www.cdisc.org/ns/library-xml/v1.0")
13 | with open("../data/library-sdtm-3-4.xml", "r", encoding="utf-8") as f:
14 | odm_string = f.read()
15 | loader = DL.XMLDefineLoader(model_package=model_package, ns_uri="http://www.cdisc.org/ns/library-xml/v1.0", local_model=True)
16 | loader.create_document_from_string(odm_string, ns)
17 | odm = loader.load_odm()
18 | self.assertEqual(odm.FileOID, "ODM.SDTMIGv3.4.2021-11-29")
19 | self.assertEqual(odm.LibraryXMLVersion, "1.0.0")
20 | self.assertEqual(odm.Context, "Other")
21 | self.assertEqual(odm.Study[0].MetaDataVersion.DatePublished, "2021-11-29")
22 | self.assertEqual(odm.Study[0].MetaDataVersion.Standards[0].Name, "SDTMIG v3.4")
23 | self.assertEqual(odm.Study[0].MetaDataVersion.ItemGroupDef[1].OID, "IGD.CM")
24 | self.assertEqual(odm.Study[0].MetaDataVersion.ItemGroupDef[1].Class.Name, "INTERVENTIONS")
25 | self.assertEqual(odm.Study[0].MetaDataVersion.ItemDef[0].CDISCNotes.TranslatedText[0]._content, "Unique identifier for a study.")
26 | self.assertEqual(odm.Study[0].MetaDataVersion.ItemDef[0].CDISCNotes.TranslatedText[0].lang, "en")
27 | self.assertEqual(odm.Study[0].MetaDataVersion.ItemDef[1].SubmissionDataType, "Char")
28 | mdv = odm.Study[0].MetaDataVersion
29 | it = mdv.find("ItemDef", "OID", "IT.DS.DSDECOD")
30 | self.assertEqual(it.Name, "DSDECOD")
31 | self.assertEqual(it.AltCodeListRef[0].CodeListOID, "CL.C114118")
32 | self.assertEqual(it.AltCodeListRef[1].CodeListOID, "CL.C150811")
33 |
34 | def test_odmlib_cdashig(self):
35 | model_package = "library_odm_1_0"
36 | NS.NamespaceRegistry(prefix="odm", uri="http://www.cdisc.org/ns/odm/v1.3", is_default=True)
37 | ns = NS.NamespaceRegistry(prefix="mdr", uri="http://www.cdisc.org/ns/library-xml/v1.0")
38 | with open("../data/library-cdash-2-2.xml", "r", encoding="utf-8") as f:
39 | odm_string = f.read()
40 | loader = OL.XMLODMLoader(model_package=model_package, ns_uri="http://www.cdisc.org/ns/library-xml/v1.0", local_model=True)
41 | loader.create_document_from_string(odm_string, ns)
42 | odm = loader.load_odm()
43 | self.assertEqual(odm.FileOID, "ODM.CDASHIGv2.2.2021-09-28")
44 | self.assertEqual(odm.LibraryXMLVersion, "1.0.0")
45 | mdv = odm.Study[0].MetaDataVersion
46 | self.assertEqual(mdv.DatePublished, "2021-09-28")
47 | self.assertEqual(mdv.ItemGroupDef[1].OID, "IGD.CM")
48 | self.assertEqual(odm.Study[0].MetaDataVersion.ItemGroupDef[1].Class.Name, "INTERVENTIONS")
49 | mapping_instructions = odm.Study[0].MetaDataVersion.ItemDef[0].MappingInstructions.TranslatedText[0]._content.replace("\n", "")
50 | self.assertEqual(" ".join(mapping_instructions.split()), "Maps directly to the SDTMIG variable listed in the SDTMIG Target column.")
51 | self.assertEqual(odm.Study[0].MetaDataVersion.ItemDef[0].Definition.TranslatedText[0]._content, "A unique identifier for a study.")
52 | self.assertEqual(odm.Study[0].MetaDataVersion.ItemDef[1].SubmissionDataType, "Char")
53 | it = mdv.find("ItemDef", "OID", "IT.AG.AGSCAT")
54 | self.assertEqual(it.Name, "AGSCAT")
55 | self.assertEqual(it.Core, "O")
56 |
57 | def test_oid_checks(self):
58 | model_package = "library_define_1_0"
59 | NS.NamespaceRegistry(prefix="def", uri="http://www.cdisc.org/ns/def/v2.1")
60 | ns = NS.NamespaceRegistry(prefix="mdr", uri="http://www.cdisc.org/ns/library-xml/v1.0")
61 | with open("../data/library-sdtm-3-4.xml", "r", encoding="utf-8") as f:
62 | odm_string = f.read()
63 | loader = DL.XMLDefineLoader(model_package=model_package, ns_uri="http://www.cdisc.org/ns/library-xml/v1.0", local_model=True)
64 | loader.create_document_from_string(odm_string, ns)
65 | odm = loader.load_odm()
66 | oid_checker = OID.OIDRef()
67 | odm.verify_oids(oid_checker)
68 | self.assertTrue(oid_checker.check_oid_refs())
69 | orphans = oid_checker.check_unreferenced_oids()
70 | self.assertDictEqual(orphans, {'STD.SDTMIGv3.4': 'StandardOID'})
71 |
72 | def test_oid_checks_skip(self):
73 | model_package = "library_define_1_0"
74 | NS.NamespaceRegistry(prefix="def", uri="http://www.cdisc.org/ns/def/v2.1")
75 | ns = NS.NamespaceRegistry(prefix="mdr", uri="http://www.cdisc.org/ns/library-xml/v1.0")
76 | with open("../data/library-sdtm-3-4.xml", "r", encoding="utf-8") as f:
77 | odm_string = f.read()
78 | loader = DL.XMLDefineLoader(model_package=model_package, ns_uri="http://www.cdisc.org/ns/library-xml/v1.0", local_model=True)
79 | loader.create_document_from_string(odm_string, ns)
80 | odm = loader.load_odm()
81 | oid_checker = OID.OIDRef(skip_attrs=["StandardOID"], skip_elems=["Standard"])
82 | odm.verify_oids(oid_checker)
83 | self.assertTrue(oid_checker.check_oid_refs())
84 | orphans = oid_checker.check_unreferenced_oids()
85 | print(f"Orphans: {orphans}")
86 | self.assertDictEqual(orphans, {})
87 |
88 |
89 | if __name__ == '__main__':
90 | unittest.main()
91 |
--------------------------------------------------------------------------------
/merge_odm/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /../../../../../../../:\Users\shume\Dropbox\odmlib_examples\odm_merge\.idea/dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 |
--------------------------------------------------------------------------------
/merge_odm/.idea/.name:
--------------------------------------------------------------------------------
1 | merge_odm
--------------------------------------------------------------------------------
/merge_odm/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/merge_odm/.idea/merge_odm.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/merge_odm/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/merge_odm/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/merge_odm/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/merge_odm/README.md:
--------------------------------------------------------------------------------
1 | # merge_odm
2 |
3 | ## Introduction
4 | The merge_odm program is an odmlib example application that generates a target ODM file with a CRF
5 | moved from a source ODM file. The merge_odm application merges a form in a source ODM file, such as might be
6 | used as a CRF library, and moved into another, target, ODM file. This example demonstrates some basic odmlib
7 | features.
8 |
9 | ## Getting Started
10 | To run merge_odm.py from the command-line: `python merge_odm.py`
11 |
12 | The application expects a source and target xml file in a data directory that exists in the same path as the
13 | merge_odm.py application.
14 |
15 | The odmlib package must be installed to run merge_odm. See the [odmlib repository](https://github.com/swhume/odmlib)
16 | to get the odmlib package. Eventually, it may make its way into PyPi, but for now you'll need to install from the
17 | source. The odmlib README provides instructions for getting started.
18 |
19 | ## Limitations
20 | The odmlib examples are basic programs intended to demonstrate some of the basic capabilities of odmlib.
21 | The examples are not complete, production ready applications.
22 |
23 | The odmlib package is still in development. Although odmlib supports all of ODM more work remains
24 | to complete all features for processing ClinicalData. The initial focus has been on getting
25 | the metadata sections complete.
--------------------------------------------------------------------------------
/merge_odm/merge_odm.py:
--------------------------------------------------------------------------------
1 | import odmlib.odm_loader as OL
2 | import odmlib.loader as LD
3 | import os
4 |
5 | # An odmlib example application
6 |
7 | SOURCE = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'cdash-odm-source.xml')
8 | TARGET = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'cdash-odm-target.xml')
9 |
10 |
11 | class MergeODM:
12 | def __init__(self, source_file, target_file, form_oid):
13 | """ simple merge application that generates a target ODM file with a CRF moved from a source ODM file """
14 | self.source_file = source_file
15 | self.target_file = target_file
16 | self.form_oid = form_oid
17 |
18 | def merge(self):
19 | source_loader = LD.ODMLoader(OL.XMLODMLoader())
20 | source_loader.open_odm_document(self.source_file)
21 | source_mdv = source_loader.MetaDataVersion()
22 | target_loader = LD.ODMLoader(OL.XMLODMLoader())
23 | target_loader.open_odm_document(self.target_file)
24 | target_root = target_loader.root()
25 | self._merge_form_def(source_mdv, target_root.Study[0].MetaDataVersion[0])
26 | self._write_target_odm(target_root)
27 |
28 | def _merge_form_def(self, source_mdv, target_mdv):
29 | vs_form = source_mdv.find("FormDef", "OID", self.form_oid)
30 | if self._element_does_not_exist(target_mdv, vs_form.OID, "FormDef"):
31 | target_mdv.FormDef.append(vs_form)
32 | self._merge_item_group_def(source_mdv, target_mdv, vs_form)
33 |
34 | def _merge_item_group_def(self, source_mdv, target_mdv, form):
35 | for igr in form.ItemGroupRef:
36 | igd = source_mdv.find("ItemGroupDef", "OID", igr.ItemGroupOID)
37 | if self._element_does_not_exist(target_mdv, igd.OID, "ItemGroupDef"):
38 | target_mdv.ItemGroupDef.append(igd)
39 | self._merge_items(source_mdv, target_mdv, igd)
40 |
41 | def _merge_items(self, source_mdv, target_mdv, igd):
42 | for itr in igd.ItemRef:
43 | item = source_mdv.find("ItemDef", "OID", itr.ItemOID)
44 | if self._element_does_not_exist(target_mdv, item.OID, "ItemDef"):
45 | target_mdv.ItemDef.append(item)
46 | self._merge_method(source_mdv, target_mdv, itr)
47 | self._merge_codelist(source_mdv, target_mdv, item)
48 |
49 | def _merge_method(self, source_mdv, target_mdv, itr):
50 | if itr.MethodOID:
51 | method = source_mdv.find("MethodDef", "OID", itr.MethodOID)
52 | if self._element_does_not_exist(target_mdv, method.OID, "MethodDef"):
53 | target_mdv.MethodDef.append(method)
54 |
55 | def _merge_codelist(self, source_mdv, target_mdv, item):
56 | if item.CodeListRef:
57 | codelist = source_mdv.find("CodeList", "OID", item.CodeListRef.CodeListOID)
58 | if self._element_does_not_exist(target_mdv, codelist.OID, "CodeList"):
59 | target_mdv.CodeList.append(codelist)
60 |
61 | def _write_target_odm(self, target_root):
62 | target_root.write_xml(self.target_file)
63 |
64 | def _element_does_not_exist(self, mdv, oid, element_type):
65 | if mdv.find(element_type, "OID", oid):
66 | return False
67 | else:
68 | return True
69 |
70 |
71 | def main():
72 | """ main driver method that merges a CRF in the source ODM file into a target ODM file """
73 | m = MergeODM(SOURCE, TARGET, "ODM.F.VS")
74 | m.merge()
75 |
76 |
77 | if __name__ == "__main__":
78 | main()
79 |
--------------------------------------------------------------------------------
/merge_odm/requirements.txt:
--------------------------------------------------------------------------------
1 | odmlib>=0.1.4
--------------------------------------------------------------------------------
/notebooks/data/cosa_define_demo.xml:
--------------------------------------------------------------------------------
1 |
2 | TEST Define-XML ItemGroupDefItemGroupDef 001Define-XML ItemGroupDefVital Signs
--------------------------------------------------------------------------------
/notebooks/data/cosa_demo.xml:
--------------------------------------------------------------------------------
1 |
2 | COSA Demo StudyDemo to get started with odmlibCOSA odmlib Demonstration
--------------------------------------------------------------------------------
/notebooks/generate_define.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "pycharm": {
7 | "name": "#%% md\n"
8 | }
9 | },
10 | "source": [
11 | "## Generating a Define-XML v2.1 File\n",
12 | "\n",
13 | "This notebook demonstrates how to run the odmlib example program xlsx2define2-1.py. This example program takes a\n",
14 | "spreadsheet containing Define-XML v2.1 metadata and generates a Define-XML file. The program runs on the command-line\n",
15 | "and provides a number of options.\n",
16 | "\n",
17 | "A number of odmlib example programs, including xlsx2define2-1.py, are located in GitHub at\n",
18 | "https://github.com/swhume/odmlib_examples\n",
19 | "\n",
20 | "The example programs are open-source using the MIT license.\n",
21 | "\n",
22 | "### Install odmlib and other libraries\n",
23 | "Before you can run the odmlib examples, if you haven't already installed odmlib you'll need to do that.\n",
24 | "```\n",
25 | "pip install odmlib\n",
26 | "```\n",
27 | "The xlsx2define2-1.py application also requires xmlschema and openpyxl to be installed using pip in the same\n",
28 | "way that odmlib was installed above.\n",
29 | "\n",
30 | "### Use the example metadata spreadsheet\n",
31 | "\n",
32 | "The first time you run this use the provided example metadata spreadsheet to test it. The\n",
33 | "odmlib-define-metadata.xlsx spreadsheet contains the metadata to generate a Define-XML v2.1 file.\n",
34 | "\n",
35 | "### Run the program to generate the Define-XML file\n",
36 | "\n",
37 | "This example program runs on using a command-line interface. We'll use the following options when running\n",
38 | "the application:\n",
39 | "* -e provides the name and location of the spreadsheet file to use as an input.\n",
40 | "* -d provides the name and location of the Define-XML file to generate as output.\n",
41 | "\n",
42 | "Now, let's run the program.\n",
43 | "\n",
44 | "```\n",
45 | "python xlsx2define2-1.py -e ./data/odmlib-define-metadata.xlsx -d ./data/odmlib-define.xml\n",
46 | "```"
47 | ]
48 | }
49 | ],
50 | "metadata": {
51 | "kernelspec": {
52 | "display_name": "Python 3 (ipykernel)",
53 | "language": "python",
54 | "name": "python3"
55 | },
56 | "language_info": {
57 | "codemirror_mode": {
58 | "name": "ipython",
59 | "version": 3
60 | },
61 | "file_extension": ".py",
62 | "mimetype": "text/x-python",
63 | "name": "python",
64 | "nbconvert_exporter": "python",
65 | "pygments_lexer": "ipython3",
66 | "version": "3.9.5"
67 | }
68 | },
69 | "nbformat": 4,
70 | "nbformat_minor": 1
71 | }
--------------------------------------------------------------------------------
/snippets/data/simple_create.xml:
--------------------------------------------------------------------------------
1 |
2 | Get Started with ODM XMLDemo to get started with odmlibODM XML Get StartedGet Started ProtocolYear of the subject's birthBirth Year
--------------------------------------------------------------------------------
/snippets/odmlib_first_define.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Sam Hume. Licensed under the MIT-0 license https://opensource.org/licenses/MIT-0
2 | import odmlib.define_2_1.model as DEFINE
3 | import datetime
4 |
5 | """
6 | This is the code presented at the PHUSE US Connect 2022 and described in paper PAP_OS01.
7 | The purpose of this code is to demonstrate using odmlib to create and process a very simple Define-XML v2.1 file.
8 | NOTE: In places where paths are referenced, you will need to update them to reflect your system.
9 | """
10 |
11 | current_datetime = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat()
12 | odm = DEFINE.ODM(FileOID="DEF.COSA.DEMO",
13 | AsOfDateTime=current_datetime,
14 | CreationDateTime=current_datetime,
15 | ODMVersion="1.3.2",
16 | FileType="Snapshot",
17 | Originator="Sam Hume",
18 | SourceSystem="odmlib",
19 | SourceSystemVersion="0.1.4",
20 | Context="Other")
21 |
22 | study = DEFINE.Study(OID="ST.DEFINE.COSA.001")
23 | study.GlobalVariables.StudyName = DEFINE.StudyName(_content="TEST Define-XML ItemGroupDef")
24 | study.GlobalVariables.StudyDescription = DEFINE.StudyDescription(_content="ItemGroupDef 001")
25 | study.GlobalVariables.ProtocolName = DEFINE.ProtocolName(_content="Define-XML ItemGroupDef")
26 | odm.Study = study
27 |
28 | mdv = DEFINE.MetaDataVersion(OID="MDV.COSA.IGD.001", Name="ItemGroupDefDemo001",
29 | Description="ItemGroupDef COSA Demo", DefineVersion="2.1.0")
30 |
31 | mdv.Standards.Standard.append(DEFINE.Standard(OID="STD.1", Name="SDTMIG", Type="IG", Version="3.2", Status="Final"))
32 | mdv.Standards.Standard.append(DEFINE.Standard(OID="STD.2", Name="CDISC/NCI", Type="CT", PublishingSet="SDTM",
33 | Version="2021-12-17", Status="Final"))
34 |
35 | igd = DEFINE.ItemGroupDef(OID="IG.VS",
36 | Name="VS",
37 | Repeating="Yes",
38 | Domain="VS",
39 | SASDatasetName="VS",
40 | IsReferenceData="No",
41 | Purpose="Tabulation",
42 | ArchiveLocationID="LF.VS",
43 | Structure="One record per vital sign measurement per visit per subject",
44 | StandardOID="STD.1",
45 | IsNonStandard="Yes",
46 | HasNoData="Yes")
47 |
48 | igd.Description.TranslatedText.append(DEFINE.TranslatedText(_content="Vital Signs", lang="en"))
49 | igd.ItemRef.append(DEFINE.ItemRef(ItemOID="IT.STUDYID", Mandatory="Yes", OrderNumber=1, KeySequence=1))
50 | igd.ItemRef.append(DEFINE.ItemRef(ItemOID="IT.VS.DOMIAN", Mandatory="Yes", OrderNumber=2))
51 | igd.ItemRef.append(DEFINE.ItemRef(ItemOID="IT.USUBJID", Mandatory="Yes", OrderNumber=3, KeySequence=2))
52 | igd.ItemRef.append(DEFINE.ItemRef(ItemOID="IT.VS.VSSEQ", Mandatory="Yes", OrderNumber=4))
53 | igd.ItemRef.append(DEFINE.ItemRef(ItemOID="IT.VS.VSTESTCD", Mandatory="Yes", OrderNumber=5, KeySequence=3))
54 | igd.ItemRef.append(DEFINE.ItemRef(ItemOID="IT.VS.VSTEST", Mandatory="Yes", OrderNumber=6))
55 |
56 | try:
57 | ir = DEFINE.ItemRef(Mandatory="Yes", OrderNumber=1)
58 | except ValueError as ve:
59 | print(f"Error creating ItemRef: {ve}")
60 |
61 | igd.Class = DEFINE.Class(Name="FINDINGS")
62 |
63 | odm.Study.MetaDataVersion = mdv
64 | odm.Study.MetaDataVersion.ItemGroupDef.append(igd)
65 |
66 | # update the path to reflect your system
67 | odm.write_xml(odm_file="./data/cosa_define_demo.xml")
68 |
69 | odm.write_json(odm_file="./data/cosa_define_demo.json")
70 |
71 | # update the path to reflect your system
72 | with open("./data/cosa_define_demo.xml", 'r') as file:
73 | cosa_odm = file.read()
74 | print(cosa_odm)
75 |
76 | from odmlib import odm_parser as P
77 | # relpace the path below to your Define-XML v2.1 schema
78 | schema_file = "/home/sam/standards/DefineV211/schema/cdisc-define-2.1/define2-1-0.xsd"
79 |
80 | validator = P.ODMSchemaValidator(schema_file)
81 | try:
82 | # update the path to reflect your system
83 | validator.validate_file("./data/cosa_define_demo.xml")
84 | print("define-XML schema validation completed successfully...")
85 | except P.OdmlibSchemaValidationError as ve:
86 | print(f"schema validation errors: {ve}")
87 |
88 | from odmlib import define_loader as DL, loader as LD
89 | loader = LD.ODMLoader(DL.XMLDefineLoader(model_package="define_2_1", ns_uri="http://www.cdisc.org/ns/def/v2.1"))
90 | loader.open_odm_document("./data/cosa_define_demo.xml")
91 |
92 | odm = loader.load_odm()
93 | print(f"Study OID is {odm.Study.OID}")
94 | print(f"Study Name is {odm.Study.GlobalVariables.StudyName}")
95 | print(f"Study Description is {odm.Study.GlobalVariables.StudyDescription}")
96 | print(f"Protocol Name is {odm.Study.GlobalVariables.ProtocolName}")
97 |
98 | cosa_dict = odm.to_dict()
99 | print(cosa_dict)
100 |
101 | cosa_json = odm.to_json()
102 |
--------------------------------------------------------------------------------
/snippets/simple_create_odm.py:
--------------------------------------------------------------------------------
1 | import odmlib.odm_1_3_2.model as ODM
2 | import datetime
3 |
4 | ODM_FILE = "./data/simple_create.xml"
5 |
6 | current_datetime = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat()
7 | root = ODM.ODM(FileOID="ODM.DEMO.001", Granularity="Metadata", AsOfDateTime=current_datetime,
8 | CreationDateTime=current_datetime, ODMVersion="1.3.2", FileType="Snapshot",
9 | Originator="swhume", SourceSystem="odmlib", SourceSystemVersion="0.1")
10 |
11 | # create Study and add to ODM
12 | root.Study.append(ODM.Study(OID="ODM.GET.STARTED"))
13 |
14 | # create the global variables
15 | root.Study[0].GlobalVariables = ODM.GlobalVariables()
16 | root.Study[0].GlobalVariables.StudyName = ODM.StudyName(_content="Get Started with ODM XML")
17 | root.Study[0].GlobalVariables.StudyDescription = ODM.StudyDescription(_content="Demo to get started with odmlib")
18 | root.Study[0].GlobalVariables.ProtocolName = ODM.ProtocolName(_content="ODM XML Get Started")
19 |
20 | # create the MetaDataVersion
21 | root.Study[0].MetaDataVersion.append(ODM.MetaDataVersion(OID="MDV.DEMO-ODM-01", Name="Get Started MDV",
22 | Description="Get Started Demo"))
23 | # create Protocol
24 | p = ODM.Protocol()
25 | p.Description = ODM.Description()
26 | p.Description.TranslatedText.append(ODM.TranslatedText(_content="Get Started Protocol", lang="en"))
27 | p.StudyEventRef.append(ODM.StudyEventRef(StudyEventOID="BASELINE", OrderNumber=1, Mandatory="Yes"))
28 | root.Study[0].MetaDataVersion[0].Protocol = p
29 |
30 | # create a StudyEventDef
31 | sed = ODM.StudyEventDef(OID="BASELINE", Name="Baseline Visit", Repeating="No", Type="Scheduled")
32 | sed.FormRef.append(ODM.FormRef(FormOID="ODM.F.DM", Mandatory="Yes", OrderNumber=1))
33 | root.Study[0].MetaDataVersion[0].StudyEventDef.append(sed)
34 |
35 | # create a FormDef
36 | fd = ODM.FormDef(OID="ODM.F.DM", Name="Demographics", Repeating="No")
37 | fd.ItemGroupRef.append(ODM.ItemGroupRef(ItemGroupOID="ODM.IG.DM", Mandatory="Yes", OrderNumber=2))
38 | root.Study[0].MetaDataVersion[0].ItemGroupDef.append(fd)
39 |
40 | # create an ItemGroupDef
41 | igd = ODM.ItemGroupDef(OID="ODM.IG.DM", Name="Demographics", Repeating="No")
42 | igd.ItemRef.append(ODM.ItemRef(ItemOID="ODM.IT.DM.BRTHYR", Mandatory="Yes"))
43 | root.Study[0].MetaDataVersion[0].ItemGroupDef.append(igd)
44 |
45 | # create an ItemDef
46 | itd = ODM.ItemDef(OID="ODM.IT.DM.BRTHYR", Name="Birth Year", DataType="integer")
47 | itd.Description = ODM.Description()
48 | itd.Description.TranslatedText.append(ODM.TranslatedText(_content="Year of the subject's birth", lang="en"))
49 | itd.Question = ODM.Question()
50 | itd.Question.TranslatedText.append(ODM.TranslatedText(_content="Birth Year", lang="en"))
51 | itd.Alias.append(ODM.Alias(Context="CDASH", Name="BRTHYR"))
52 | itd.Alias.append(ODM.Alias(Context="SDTM", Name="BRTHDTC"))
53 | root.Study[0].MetaDataVersion[0].ItemDef.append(itd)
54 |
55 | # save the new ODM document to a file
56 | root.write_xml(ODM_FILE)
57 |
--------------------------------------------------------------------------------
/snippets/validate_define.py:
--------------------------------------------------------------------------------
1 | from odmlib import odm_parser as P
2 | import odmlib.define_2_1.rules.oid_ref as OID
3 | import odmlib.define_loader as OL
4 | import odmlib.loader as LD
5 | import odmlib.define_2_1.rules.metadata_schema as METADATA
6 | import xmlschema as XSD
7 | import os
8 |
9 | from odmlib.define_2_1.rules import metadata_schema as METADATA
10 | from odmlib.define_2_1.rules import oid_ref as OID
11 |
12 |
13 | # DEF_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'defineV21-SDTM.xml')
14 | DEF_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 't1d-define.xml')
15 | SCHEMA_FILE = os.path.join(os.sep, 'home', 'sam', 'standards', 'DefineV211', 'schema', 'cdisc-define-2.1', 'define2-1-0.xsd')
16 |
17 |
18 | def validate_odm_xml_file():
19 | validator = P.ODMSchemaValidator(SCHEMA_FILE)
20 | try:
21 | validator.validate_file(DEF_FILE)
22 | except XSD.validators.exceptions.XMLSchemaChildrenValidationError as ve:
23 | print(f"schema validation errors: {ve}")
24 | else:
25 | print("Define-XML schema validation completed successfully...")
26 |
27 |
28 | def load_root():
29 | loader = LD.ODMLoader(OL.XMLDefineLoader(model_package="define_2_1", ns_uri="http://www.cdisc.org/ns/def/v2.1"))
30 | loader.open_odm_document(DEF_FILE)
31 | root = loader.load_odm()
32 | return root
33 |
34 |
35 | def load_mdv():
36 | loader = LD.ODMLoader(OL.XMLDefineLoader(model_package="define_2_1", ns_uri="http://www.cdisc.org/ns/def/v2.1"))
37 | loader.open_odm_document(DEF_FILE)
38 | mdv = loader.MetaDataVersion()
39 | return mdv
40 |
41 | def verify_oids(root):
42 | oid_checker = OID.OIDRef()
43 | try:
44 | # checks for non-unique OIDs and runs the ref/def check
45 | root.verify_oids(oid_checker)
46 | except ValueError as ve:
47 | print(f"Error verifying OIDs: {ve}")
48 | else:
49 | print(f"OIDs verified as valid")
50 |
51 |
52 | def find_unreferenced_oids(mdv):
53 | oid_checker = OID.OIDRef()
54 | mdv.verify_oids(oid_checker)
55 | orphans = oid_checker.check_unreferenced_oids()
56 | print(f"found {len(orphans)} missing OID Defs")
57 | if orphans:
58 | print(f"Orphaned OIDs: {orphans}")
59 |
60 |
61 | def verify_schema_rules(root):
62 | validator = METADATA.MetadataSchema()
63 | is_valid = validator.verify_conformance(root.to_dict(), "ODM")
64 | if is_valid:
65 | print("MetaDataVersion conforms to schema rules...")
66 | else:
67 | print("Errors found checking the MetaDataVersion against the schema rules...")
68 |
69 |
70 | def verify_element_order(mdv):
71 | try:
72 | mdv.verify_order()
73 | except ValueError as ve:
74 | print(f"Error verifying element order in MetaDataVersion: {ve}")
75 | else:
76 | print(f"MetaDataVersion element order is verified")
77 |
78 |
79 | def main():
80 | validate_odm_xml_file()
81 | mdv = load_mdv()
82 | root = load_root()
83 | verify_oids(root)
84 | find_unreferenced_oids(root)
85 | verify_element_order(mdv)
86 |
87 |
88 | if __name__ == "__main__":
89 | main()
90 |
--------------------------------------------------------------------------------
/snippets/validate_odm_metadata.py:
--------------------------------------------------------------------------------
1 | from odmlib import odm_parser as P
2 | import odmlib.odm_1_3_2.rules.oid_ref as OID
3 | #import cerberus as C
4 | # import odmlib.odm_1_3_2.model as ODM
5 | import odmlib.odm_loader as OL
6 | import odmlib.loader as LD
7 | import odmlib.odm_1_3_2.rules.metadata_schema as METADATA
8 | import xmlschema as XSD
9 | import os
10 |
11 | ODM_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'ODM-SnapShot-Export.xml')
12 | SCHEMA_FILE = os.path.join(os.sep, 'home', 'sam', 'standards', 'odm1-3-2', 'ODM1-3-2.xsd')
13 |
14 |
15 | def validate_odm_xml_file():
16 | validator = P.ODMSchemaValidator(SCHEMA_FILE)
17 | try:
18 | validator.validate_file(ODM_FILE)
19 | except XSD.validators.exceptions.XMLSchemaChildrenValidationError as ve:
20 | print(f"schema validation errors: {ve}")
21 | else:
22 | print("ODM XML schema validation completed successfully...")
23 |
24 |
25 | def load_root():
26 | loader = LD.ODMLoader(OL.XMLODMLoader(model_package="odm_1_3_2", ns_uri="http://www.cdisc.org/ns/odm/v1.3"))
27 | loader.open_odm_document(ODM_FILE)
28 | root = loader.load_odm()
29 | return root
30 |
31 |
32 | def load_mdv():
33 | loader = LD.ODMLoader(OL.XMLODMLoader(model_package="odm_1_3_2", ns_uri="http://www.cdisc.org/ns/odm/v1.3"))
34 | loader.open_odm_document(ODM_FILE)
35 | mdv = loader.MetaDataVersion()
36 | return mdv
37 |
38 | def verify_oids(root):
39 | oid_checker = OID.OIDRef()
40 | try:
41 | # checks for non-unique OIDs and runs the ref/def check
42 | root.verify_oids(oid_checker)
43 | except ValueError as ve:
44 | print(f"Error verifying OIDs: {ve}")
45 | else:
46 | print(f"OIDs verified as valid")
47 |
48 |
49 | def find_unreferenced_oids(mdv):
50 | oid_checker = OID.OIDRef()
51 | mdv.verify_oids(oid_checker)
52 | orphans = oid_checker.check_unreferenced_oids()
53 | print(f"found {len(orphans)} missing OID Defs")
54 | if orphans:
55 | print(f"Orphaned OIDs: {orphans}")
56 |
57 |
58 | def verify_schema_rules(root):
59 | validator = METADATA.MetadataSchema()
60 | is_valid = validator.check_conformance(root.to_dict(), "ODM")
61 | if is_valid:
62 | print("MetaDataVersion conforms to schema rules...")
63 | else:
64 | print("Errors found checking the MetaDataVersion against the schema rules...")
65 |
66 |
67 | def verify_element_order(mdv):
68 | try:
69 | mdv.verify_order()
70 | except ValueError as ve:
71 | print(f"Error verifying element order in MetaDataVersion: {ve}")
72 | else:
73 | print(f"MetaDataVersion element order is verified")
74 |
75 |
76 | def main():
77 | validate_odm_xml_file()
78 | mdv = load_mdv()
79 | root = load_root()
80 | verify_oids(root)
81 | find_unreferenced_oids(root)
82 | # TODO schema rules only implemented for metadata at this point
83 | # verify_schema_rules(root)
84 | verify_element_order(mdv)
85 |
86 |
87 | if __name__ == "__main__":
88 | main()
89 |
--------------------------------------------------------------------------------
/xls2define/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /../../../../../../../:\Users\shume\Dropbox\odmlib_examples\xls2define\.idea/dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 |
--------------------------------------------------------------------------------
/xls2define/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/xls2define/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/xls2define/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/xls2define/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/xls2define/.idea/xls2define.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/xls2define/CodeLists.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_0 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class CodeLists(define_object.DefineObject):
6 | """ create a Define-XML v2.0 CodeList element object """
7 | def __init__(self):
8 | super().__init__()
9 | self.igd = None
10 |
11 | def create_define_objects(self, sheet, objects, lang):
12 | """
13 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary
14 | :param sheet: xlrd Excel sheet object
15 | :param objects: dictionary of odmlib objects updated by this method
16 | :param lang: xml:lang setting for TranslatedText
17 |
18 | """
19 | self.lang = lang
20 | self.sheet = sheet
21 | header = self.load_header(self.sheet.max_column)
22 | objects["CodeList"] = []
23 | cl_c_code = ""
24 | cl_name = ""
25 | cl = None
26 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True):
27 | row_content = self.load_row(row, header)
28 | # when this is a new code list the names will not be the same
29 | if row_content["Name"] != cl_name:
30 | if cl_name:
31 | self._add_previous_codelist_to_objects(cl_c_code, cl, objects)
32 | cl = self._create_codelist_object(row_content)
33 | cl_c_code = row_content.get("NCI Codelist Code")
34 | cl_name = row_content.get("Name")
35 | if row_content["Decoded Value"]:
36 | cl_item = self._create_codelistitem_object(row_content)
37 | cl.CodeListItem.append(cl_item)
38 | else:
39 | en_item = self._create_enumerateditem_object(row_content)
40 | cl.EnumeratedItem.append(en_item)
41 | self._add_previous_codelist_to_objects(cl_c_code, cl, objects)
42 |
43 | def _add_previous_codelist_to_objects(self, cl_c_code, cl, objects):
44 | """
45 | finish creating a codelist by adding Alias of a c-code exists and adding the object to the list of codelists
46 | :param row_idx: positive integer indicating which row - skip processing the first row
47 | :param cl_c_code: codelist c-code
48 | :param cl: odmlib codelist object
49 | :param objects: dictionary of odmlib objects created from the Excel input file and updated in this method
50 | """
51 | # finish processing the first list before saving it
52 | # if row_idx > 1 and cl:
53 | # if the code list has an associated c-code add it to the code list as an Alias element
54 | if cl_c_code:
55 | alias = DEFINE.Alias(Context="nci:ExtCodeID", Name=cl_c_code)
56 | cl.Alias.append(alias)
57 | # add the code list to the list of code list objects
58 | objects["CodeList"].append(cl)
59 |
60 | def _create_codelist_object(self, row):
61 | """
62 | using the row from the Codelists worksheet create an odmlib CodeList object
63 | :param row: dictionary with contents from a row in the Codelists worksheet
64 | :return: CodeList odmlib object
65 | """
66 | cl = DEFINE.CodeList(OID=row["OID"], Name=row["Name"], DataType=row["Data Type"])
67 | return cl
68 |
69 | def _create_enumerateditem_object(self, row):
70 | """
71 | using the row from the Codelists worksheet create an odmlib EnumeratedItem object
72 | :param row: dictionary with contents from a row in the Codelists worksheet
73 | :return: EnumeratedItem odmlib object
74 | """
75 | attr = {"CodedValue": row["Term"]}
76 | if row.get("Order"):
77 | attr["OrderNumber"] = row["Order"]
78 | en_item = DEFINE.EnumeratedItem(**attr)
79 | if row.get("NCI Term Code"):
80 | alias = DEFINE.Alias(Context="nci:ExtCodeID", Name=row["NCI Term Code"])
81 | en_item.Alias.append(alias)
82 | return en_item
83 |
84 | def _create_codelistitem_object(self, row):
85 | """
86 | using the row from the Codelists worksheet create an odmlib CodeListItem object
87 | :param row: dictionary with contents from a row in the Codelists worksheet
88 | :return: CodeListItem odmlib object
89 | """
90 | attr = {"CodedValue": row["Term"]}
91 | if row.get("Order"):
92 | attr["OrderNumber"] = row["Order"]
93 | cl_item = DEFINE.CodeListItem(**attr)
94 | decode = DEFINE.Decode()
95 | tt = DEFINE.TranslatedText(_content=row["Decoded Value"], lang="en")
96 | decode.TranslatedText.append(tt)
97 | cl_item.Decode = decode
98 | if row.get("NCI Term Code"):
99 | alias = DEFINE.Alias(Context="nci:ExtCodeID", Name=row["NCI Term Code"])
100 | cl_item.Alias.append(alias)
101 | return cl_item
102 |
--------------------------------------------------------------------------------
/xls2define/Comments.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_0 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class Comments(define_object.DefineObject):
6 | """ create a Define-XML v2.0 CommentDef element object """
7 | def __init__(self):
8 | super().__init__()
9 | self.lookup_oid = None
10 | self.igd = None
11 |
12 | def create_define_objects(self, sheet, objects, lang):
13 | """
14 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary
15 | :param sheet: xlrd Excel sheet object
16 | :param objects: dictionary of odmlib objects updated by this method
17 | :param lang: xml:lang setting for TranslatedText
18 | """
19 | self.lang = lang
20 | self.sheet = sheet
21 | header = self.load_header(self.sheet.max_column)
22 | objects["CommentDef"] = []
23 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True):
24 | row_content = self.load_row(row, header)
25 | comment = self._create_commentdef_object(row_content)
26 | objects["CommentDef"].append(comment)
27 |
28 | def _create_commentdef_object(self, row):
29 | """
30 | use the values from the Comments worksheet row to create a CommentDef odmlib object
31 | :param row: Comments worksheet row values as a dictionary
32 | :return: a CommentDef odmlib object
33 | """
34 | com = DEFINE.CommentDef(OID=row["OID"])
35 | tt = DEFINE.TranslatedText(_content=row["Description"], lang=self.lang)
36 | com.Description = DEFINE.Description()
37 | com.Description.TranslatedText.append(tt)
38 | if row.get("Document"):
39 | self._add_document(row, com)
40 | return com
41 |
42 | def _add_document(self, row, com):
43 | """
44 | creates a DocumentRef object using a row from the Comments Worksheet
45 | :param row: Comments worksheet row values as a dictionary
46 | :param method: odmlib CommentDef object that gets updated with a DocumentRef object
47 | """
48 | dr = DEFINE.DocumentRef(leafID=row["Document"])
49 | if row.get("Pages"):
50 | pdf = DEFINE.PDFPageRef(PageRefs=row["Pages"], Type="NamedDestination")
51 | dr.PDFPageRef.append(pdf)
52 | com.DocumentRef.append(dr)
53 |
--------------------------------------------------------------------------------
/xls2define/Datasets.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_0 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class Datasets(define_object.DefineObject):
6 | """ create a Define-XML v2.0 ItemGroupDef element object """
7 | def __init__(self):
8 | super().__init__()
9 |
10 | def create_define_objects(self, sheet, objects, lang):
11 | """
12 | parse each row in the Excel sheet and create odmlib objects to return in the objects dictionary
13 | :param sheet: xlrd Excel sheet object
14 | :param objects: dictionary of odmlib objects updated by this method
15 | :param lang: xml:lang setting for TranslatedText
16 | """
17 | self.lang = lang
18 | self.sheet = sheet
19 | num_cols = self.sheet.max_column
20 | header = self.load_header(num_cols)
21 | objects["ItemGroupDef"] = []
22 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=num_cols, values_only=True):
23 | row_content = self.load_row(row, header)
24 | itg = self._create_itemgroupdef_object(row_content)
25 | objects["ItemGroupDef"].append(itg)
26 |
27 | def _create_itemgroupdef_object(self, row):
28 | """
29 | use the values from the Dataset worksheet row to create a ItemGroupDef odmlib object
30 | :param row: Datasets worksheet row values as a dictionary
31 | :return: odmlib ItemGroupDef object
32 | """
33 | oid = self.generate_oid(["IG", row["Dataset"]])
34 | attr = {"OID": oid, "Name": row["Dataset"], "Repeating": row["Repeating"], "Domain": row["Dataset"],
35 | "SASDatasetName": row["Dataset"], "IsReferenceData": row["Reference Data"], "Purpose": row["Purpose"],
36 | "Class": row["Class"], "Structure": row["Structure"],
37 | "ArchiveLocationID": ".".join(["LF", row["Dataset"]])}
38 | if row.get("Comment"):
39 | attr["CommentOID"] = row["Comment"]
40 | igd = DEFINE.ItemGroupDef(**attr)
41 | tt = DEFINE.TranslatedText(_content=row["Description"], lang=self.lang)
42 | igd.Description = DEFINE.Description()
43 | igd.Description.TranslatedText.append(tt)
44 | return igd
45 |
--------------------------------------------------------------------------------
/xls2define/Dictionaries.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_0 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class Dictionaries(define_object.DefineObject):
6 | """ create a Define-XML v2.0 CodeList element object """
7 | def __init__(self):
8 | super().__init__()
9 |
10 | def create_define_objects(self, sheet, objects, lang):
11 | """
12 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary
13 | :param sheet: xlrd Excel sheet object
14 | :param objects: dictionary of odmlib objects updated by this method
15 | :param lang: xml:lang setting for TranslatedText
16 | """
17 | self.lang = lang
18 | self.sheet = sheet
19 | header = self.load_header(self.sheet.max_column)
20 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True):
21 | row_content = self.load_row(row, header)
22 | cl = self._create_codelist_object(row_content)
23 | objects["CodeList"].append(cl)
24 |
25 | def _create_codelist_object(self, row):
26 | """
27 | using the row from the Dictionaries worksheet create an odmlib CodeList object and add ExternalCodeList
28 | :param row: dictionary with contents from a row in the Dictionaries worksheet
29 | :return: CodeList odmlib object with ExternalCodeList
30 | """
31 | cl = DEFINE.CodeList(OID=row["OID"], Name=row["Name"], DataType=row["Data Type"])
32 | attr = {"Dictionary": row["Dictionary"]}
33 | if row.get("Version"):
34 | attr["Version"] = row["Version"]
35 | exd = DEFINE.ExternalCodeList(**attr)
36 | cl.ExternalCodeList = exd
37 | return cl
38 |
--------------------------------------------------------------------------------
/xls2define/Documents.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_0 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class Documents(define_object.DefineObject):
6 | """ create a Define-XML v2.0 leaf element object """
7 | def __init__(self):
8 | super().__init__()
9 |
10 | def create_define_objects(self, sheet, objects, lang):
11 | """
12 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary
13 | :param sheet: xlrd Excel sheet object
14 | :param objects: dictionary of odmlib objects updated by this method
15 | :param lang: xml:lang setting for TranslatedText
16 | """
17 | self.lang = lang
18 | self.sheet = sheet
19 | header = self.load_header(self.sheet.max_column)
20 | objects["leaf"] = []
21 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True):
22 | row_content = self.load_row(row, header)
23 | leaf = self._create_leaf_object(row_content)
24 | objects["leaf"].append(leaf)
25 |
26 | def _create_leaf_object(self, row):
27 | """
28 | use the values from the Documents worksheet row to create a leaf odmlib object
29 | :param row: Documents worksheet row values as a dictionary
30 | :return: a leaf odmlib object
31 | """
32 | lf = DEFINE.leaf(ID=row["ID"], href=row["Href"])
33 | title = DEFINE.title(_content=row["Title"])
34 | lf.title = title
35 | return lf
36 |
--------------------------------------------------------------------------------
/xls2define/Methods.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_0 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class Methods(define_object.DefineObject):
6 | """ create a Define-XML v2.0 MethodDef element object """
7 | def __init__(self):
8 | super().__init__()
9 |
10 | def create_define_objects(self, sheet, objects, lang):
11 | """
12 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary
13 | :param sheet: xlrd Excel sheet object
14 | :param objects: dictionary of odmlib objects updated by this method
15 | :param lang: xml:lang setting for TranslatedText
16 | """
17 | self.lang = lang
18 | self.sheet = sheet
19 | header = self.load_header(self.sheet.max_column)
20 | objects["MethodDef"] = []
21 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True):
22 | row_content = self.load_row(row, header)
23 | item = self._create_methoddef_object(row_content)
24 | objects["MethodDef"].append(item)
25 |
26 | def _create_methoddef_object(self, row):
27 | """
28 | use the values from the Methods worksheet row to create a MethodDef odmlib object
29 | :param row: Methods worksheet row values as a dictionary
30 | :return: a MethodDef odmlib object
31 | """
32 | attr = {"OID": row["OID"], "Name": row["Name"], "Type": row["Type"]}
33 | method = DEFINE.MethodDef(**attr)
34 | tt = DEFINE.TranslatedText(_content=row["Description"], lang=self.lang)
35 | method.Description = DEFINE.Description()
36 | method.Description.TranslatedText.append(tt)
37 | if row.get("Expression Context"):
38 | method.FormalExpression.append(DEFINE.FormalExpression(Context=row["Expression Context"], _content=row["Expression Code"]))
39 | if row.get("Document"):
40 | self._add_document(row, method)
41 | return method
42 |
43 | def _add_document(self, row, method):
44 | """
45 | creates a DocumentRef object using a row from the Methods Worksheet
46 | :param row: Methods worksheet row values as a dictionary
47 | :param method: odmlib MethodDef object that gets updated with a DocumentRef object
48 | """
49 | dr = DEFINE.DocumentRef(leafID=row["Document"])
50 | pdf = DEFINE.PDFPageRef(PageRefs=row["Pages"], Type="NamedDestination")
51 | dr.PDFPageRef.append(pdf)
52 | method.DocumentRef.append(dr)
53 |
--------------------------------------------------------------------------------
/xls2define/README.md:
--------------------------------------------------------------------------------
1 | # xls2define
2 |
3 | ## Introduction
4 | Use the xlsx2define2-1 example instead of this one. The Define-XML v2.1 examples are getting more use and testing
5 | creating study Define-XML files, so they're getting updated more frequently.
6 |
7 | The xls2define program is an odmlib example application that generates a Define-XML v2.0 file from
8 | an Excel spreadsheet that contains the study metadata needed to create the Define-XML file. The Exel
9 | spreadsheet version of the makes it easier for many to edit or create new content to include in a
10 | Define-XML v2.0 file. The companion define2xls program takes the generated Define-XML file and creates
11 | a spreadsheet using the metadata. This example demonstrates some basic odmlib features.
12 |
13 | ## Getting Started
14 | To run xls2define.py from the command-line:
15 |
16 | `python xls2define.py -e ./data/odmlib-define-metadata.xlsx -d ./data/odmlib-roundtrip-define.xml`
17 |
18 | The odmlib package must be installed to run xls2define. See the
19 | [odmlib repository](https://github.com/swhume/odmlib) to get the odmlib package. Eventually, it
20 | may make its way into PyPi, but for now you'll need to install from the source. The odmlib
21 | README provides instructions for getting started.
22 |
23 | ## Limitations
24 | The odmlib examples are basic programs intended to demonstrate some of the basic capabilities of odmlib.
25 | The examples are not complete, production ready applications.
26 |
27 | The odmlib package is still in development. Although odmlib supports all of ODM more work remains
28 | to complete all features for processing ClinicalData. The initial focus has been on getting
29 | the metadata sections complete.
--------------------------------------------------------------------------------
/xls2define/Study.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_0 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class Study(define_object.DefineObject):
6 | """ create a Define-XML v2.0 Study element object and initialize the MetaDataVersion object """
7 | def __init__(self):
8 | super().__init__()
9 |
10 | def create_define_objects(self, sheet, objects, lang):
11 | """
12 | parse each row in the Excel sheet and create ODMLIB objects to return in the objects dictionary
13 | :param sheet: xlrd Excel sheet object
14 | :param objects: dictionary of ODMLIB objects updated by this method
15 | :param lang: xml:lang setting for TranslatedText
16 | """
17 | self.lang = lang
18 | self.sheet = sheet
19 | rows = {}
20 | for row in sheet.iter_rows(min_row=1, min_col=1, max_col=2, values_only=True):
21 | row_content = self._load_row(row)
22 | rows.update(row_content)
23 | self.lang = rows["Language"]
24 | objects["Study"] = self._create_study_object(rows)
25 | objects["MetaDataVersion"] = self._create_metadataversion_object(rows)
26 |
27 | def _create_study_object(self, rows):
28 | """
29 | create the study ODMLIB object from the Study worksheet and return it
30 | :param rows: dictionary created from the rows in the study worksheet
31 | :return: odmlib Study object
32 | """
33 | study_oid = self.generate_oid(['ODM', rows["StudyName"]])
34 | study = DEFINE.Study(OID=study_oid)
35 | gv = DEFINE.GlobalVariables()
36 | gv.StudyName = DEFINE.StudyName(_content=rows["StudyName"])
37 | gv.StudyDescription = DEFINE.StudyDescription(_content=rows["StudyDescription"])
38 | gv.ProtocolName = DEFINE.ProtocolName(_content=rows["ProtocolName"])
39 | study.GlobalVariables = gv
40 | return study
41 |
42 | def _create_metadataversion_object(self, rows):
43 | """
44 | create the MetaDataVersion ODMLIB object from the Study worksheet and return it
45 | :param rows: dictionary created from the rows in the study worksheet
46 | :return: odmlib MetaDataVersion object
47 | """
48 | mdv_oid = self.generate_oid(["MDV", rows["StudyName"]])
49 | mdv = DEFINE.MetaDataVersion(OID=mdv_oid, Name="MDV " + rows["StudyName"], Description="Data Definitions for "
50 | + rows["StudyName"], DefineVersion="2.0.0", StandardName=rows["StandardName"],
51 | StandardVersion=rows["StandardVersion"])
52 | return mdv
53 |
54 | def _load_row(self, row_values):
55 | """
56 | load the Study worksheet row and return a dictionary
57 | :param row_idx: index indicating the row to load
58 | :return: dictionary with the row attribute as key and value as dictionary value
59 | """
60 | row = {}
61 | row[row_values[0]] = row_values[1]
62 | return row
63 |
--------------------------------------------------------------------------------
/xls2define/ValueLevel.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_0 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class ValueLevel(define_object.DefineObject):
6 | """ create a Define-XML v2.0 ValueListDef element object """
7 | def __init__(self):
8 | super().__init__()
9 | self.lookup_oid = None
10 | self.vld = None
11 |
12 | def create_define_objects(self, sheet, objects, lang):
13 | """
14 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary
15 | :param sheet: xlrd Excel sheet object
16 | :param objects: dictionary of odmlib objects updated by this method
17 | :param lang: xml:lang setting for TranslatedText
18 | """
19 | self.lang = lang
20 | self.sheet = sheet
21 | header = self.load_header(self.sheet.max_column)
22 | objects["ValueListDef"] = []
23 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True):
24 | row_content = self.load_row(row, header)
25 | self._create_valuelistdef_object(row_content, objects)
26 | self._create_itemref_object(row_content)
27 | self._create_itemdef_object(row_content, objects)
28 |
29 | def _create_valuelistdef_object(self, row, objects):
30 | """
31 | use the values from the ValueLevel worksheet row to create a ValueListDef odmlib object
32 | :param row: ValueList worksheet row values as a dictionary
33 | :param objects: dictionary of odmlib objects updated by this method
34 | """
35 | item_oid = self.generate_oid(["IT", row["Dataset"], row["Variable"]])
36 | if item_oid != self.lookup_oid:
37 | self.lookup_oid = item_oid
38 | oid = self.generate_oid(["VL", row["Dataset"], row["Variable"]])
39 | self.vld = DEFINE.ValueListDef(OID=oid)
40 | objects["ValueListDef"].append(self.vld)
41 |
42 | def _create_itemref_object(self, row):
43 | """
44 | use the values from the ValueLevel worksheet row to create ItemRef objects for ValueListDef
45 | :param row: ValueList worksheet row values as a dictionary
46 | """
47 | oid = self.generate_oid(["IT", row["Where Clause"][3:]])
48 | attr = {"ItemOID": oid, "Mandatory": row["Mandatory"], "OrderNumber": int(row["Order"])}
49 | if row.get("Method"):
50 | attr["MethodOID"] = self.generate_oid(["MT", row["Method"]])
51 | item = DEFINE.ItemRef(**attr)
52 | wc = DEFINE.WhereClauseRef(WhereClauseOID=row["Where Clause"])
53 | item.WhereClauseRef.append(wc)
54 | self.vld.ItemRef.append(item)
55 |
56 | def _create_itemdef_object(self, row, objects):
57 | """
58 | use the values from the ValueLevel worksheet row to create ItemDef objects referenced by ValueListDef ItemRefs
59 | :param row: ValueList worksheet row values as a dictionary
60 | :param objects: dictionary of odmlib objects updated by this method
61 | """
62 | oid = self.generate_oid(["IT", row["Where Clause"][3:]])
63 | attr = {"OID": oid, "Name": row["Variable"], "DataType": row["Data Type"], "SASFieldName": row["Variable"]}
64 | self._add_optional_itemdef_attributes(attr, row)
65 | item = DEFINE.ItemDef(**attr)
66 | self._add_optional_itemdef_elements(item, row)
67 | objects["ItemDef"].append(item)
68 |
69 | def _add_optional_itemdef_elements(self, item, row):
70 | """
71 | use the values from the ValueList worksheet row to add the optional ELEMENTS to the ItemDef object
72 | :param item: ItemDef odmlib object updated with optional ELEMENTS
73 | :param row: ValueList worksheet row values as a dictionary
74 | """
75 | if row.get("Codelist"):
76 | cl = DEFINE.CodeListRef(CodeListOID=self.generate_oid(["CL", row["Codelist"]]))
77 | item.CodeListRef = cl
78 | item.Origin = DEFINE.Origin(Type=row["Origin"])
79 | if row.get("Pages"):
80 | dr = DEFINE.DocumentRef(leafID="LF.blankcrf")
81 | pdf = DEFINE.PDFPageRef(PageRefs=row["Pages"], Type="PhysicalRef")
82 | dr.PDFPageRef.append(pdf)
83 | item.Origin.DocumentRef.append(dr)
84 | if row.get("Predecessor"):
85 | item.Origin.Description = DEFINE.Description()
86 | tt = DEFINE.TranslatedText(_content=row["Predecessor"])
87 | item.Origin.Description.TranslatedText.append(tt)
88 |
89 | def _add_optional_itemdef_attributes(self, attr, row):
90 | """
91 | use the values from the ValueList worksheet row to add the optional attributes to the ItemDef object
92 | :param item: ItemDef odmlib object updated with optional attributes
93 | :param row: ValueList worksheet row values as a dictionary
94 | """
95 | if row.get("Length"):
96 | attr["Length"] = row["Length"]
97 | if row.get("Significant Digits"):
98 | attr["SignificantDigits"] = row["Significant Digits"]
99 | if row.get("Format"):
100 | attr["DisplayFormat"] = row["Format"]
101 | if row.get("Comment"):
102 | attr["CommentOID"] = self.generate_oid(["COM", row["Comment"]])
103 |
--------------------------------------------------------------------------------
/xls2define/Variables.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_0 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class Variables(define_object.DefineObject):
6 | """ create a Define-XML v2.0 ItemDef element object """
7 | def __init__(self):
8 | super().__init__()
9 | self.lookup_oid = None
10 | self.igd = None
11 |
12 | def create_define_objects(self, sheet, objects, lang):
13 | """
14 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary
15 | :param sheet: xlrd Excel sheet object
16 | :param objects: dictionary of odmlib objects updated by this method
17 | :param lang: xml:lang setting for TranslatedText
18 | """
19 | self.lang = lang
20 | self.sheet = sheet
21 | header = self.load_header(self.sheet.max_column)
22 | objects["ItemDef"] = []
23 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True):
24 | row_content = self.load_row(row, header)
25 | item = self._create_itemdef_object(row_content)
26 | self._create_itemref_object(row_content, objects)
27 | objects["ItemDef"].append(item)
28 | self._create_leaf_objects(objects)
29 |
30 | def _create_itemdef_object(self, row):
31 | """
32 | use the values from the Variables worksheet row to create a ItemDef odmlib object
33 | :param row: Variables worksheet row values as a dictionary
34 | :return: odmlib ItemDef object
35 | """
36 | oid = self.generate_oid(["IT", row["Dataset"], row["Variable"]])
37 | attr = {"OID": oid, "Name": row["Variable"], "DataType": row["Data Type"], "SASFieldName": row["Variable"]}
38 | self._add_optional_itemdef_attributes(attr, row)
39 | item = DEFINE.ItemDef(**attr)
40 | tt = DEFINE.TranslatedText(_content=row["Label"], lang=self.lang)
41 | item.Description = DEFINE.Description()
42 | item.Description.TranslatedText.append(tt)
43 | self._add_optional_itemdef_elements(item, row)
44 | return item
45 |
46 | def _add_optional_itemdef_elements(self, item, row):
47 | """
48 | use the values from the Variables worksheet row to add the optional ELEMENTS to the ItemDef object
49 | :param item: ItemDef odmlib object updated with optional ELEMENTS
50 | :param row: Variables worksheet row values as a dictionary
51 | """
52 | if row.get("CodeList"):
53 | cl = DEFINE.CodeListRef(CodeListOID=row["CodeList"])
54 | item.CodeListRef = cl
55 | item.Origin = DEFINE.Origin(Type=row["Origin"])
56 | if row.get("Pages"):
57 | dr = DEFINE.DocumentRef(leafID="LF.blankcrf")
58 | pr = DEFINE.PDFPageRef(PageRefs=row["Pages"], Type="PhysicalRef")
59 | dr.PDFPageRef.append(pr)
60 | item.Origin.DocumentRef.append(dr)
61 | if row.get("Predecessor"):
62 | item.Origin.Description = DEFINE.Description()
63 | tt = DEFINE.TranslatedText(_content=row["Predecessor"])
64 | item.Origin.Description.TranslatedText.append(tt)
65 | if row.get("Valuelist"):
66 | vl = DEFINE.ValueListRef(ValueListOID=row["Valuelist"])
67 | item.ValueListRef = vl
68 |
69 | def _add_optional_itemdef_attributes(self, attr, row):
70 | """
71 | use the values from the Variables worksheet row to add the optional attributes to the ItemDef object
72 | :param item: ItemDef odmlib object updated with optional attributes
73 | :param row: Variables worksheet row values as a dictionary
74 | """
75 | if row.get("Length"):
76 | attr["Length"] = row["Length"]
77 | if row.get("Significant Digits"):
78 | attr["SignificantDigits"] = row["Significant Digits"]
79 | if row.get("Format"):
80 | attr["DisplayFormat"] = row["Format"]
81 | if row.get("Comment"):
82 | attr["CommentOID"] = row["Comment"]
83 |
84 | def _create_itemref_object(self, row, objects):
85 | """
86 | use the values from the Variables worksheet row to create the ItemRef object and add it to ItemGroupDef
87 | :param row: Variables worksheet row values as a dictionary
88 | :param objects: dictionary of odmlib objects updated by this method
89 | """
90 | dataset_oid = self.generate_oid(["IG", row["Dataset"]])
91 | if dataset_oid != self.lookup_oid:
92 | self.lookup_oid = dataset_oid
93 | self.igd = self.find_object(objects["ItemGroupDef"], self.lookup_oid)
94 | if self.igd is None:
95 | raise ValueError(f"ItemGroupDef with OID {dataset_oid} is missing from the Datasets tab")
96 | oid = self.generate_oid(["IT", row["Dataset"], row["Variable"]])
97 | attr = {"ItemOID": oid, "Mandatory": row["Mandatory"]}
98 | self._add_optional_itemref_attributes(attr, row)
99 | item = DEFINE.ItemRef(**attr)
100 | self.igd.ItemRef.append(item)
101 |
102 | def _add_optional_itemref_attributes(self, attr, row):
103 | """
104 | use the values from the Variables worksheet row to add the optional attributes to the attr dictionary
105 | :param attr: ItemRef object attributes to update with optional values
106 | :param row: Variables worksheet row values as a dictionary
107 | """
108 | if row.get("Method"):
109 | attr["MethodOID"] = row["Method"]
110 | if row.get("Order"):
111 | attr["OrderNumber"] = int(row["Order"])
112 | if row.get("KeySequence"):
113 | attr["KeySequence"] = int(row["KeySequence"])
114 | if row.get("Role"):
115 | attr["Role"] = row["Role"]
116 |
117 | def _create_leaf_objects(self, objects):
118 | """
119 | each ItemGroupDef object in objects is updated to add a leaf object
120 | :param objects: dictionary of odmlib objects updated by this method
121 | """
122 | for igd in objects["ItemGroupDef"]:
123 | id = self.generate_oid(["LF", igd.Name])
124 | xpt_name = igd.Name + ".xpt"
125 | leaf = DEFINE.leaf(ID=id, href=xpt_name.lower())
126 | title = DEFINE.title(_content=xpt_name.lower())
127 | leaf.title = title
128 | igd.leaf = leaf
129 |
--------------------------------------------------------------------------------
/xls2define/WhereClauses.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_0 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class WhereClauses(define_object.DefineObject):
6 | """ create a Define-XML v2.0 WhereClauseDef element object """
7 | def __init__(self):
8 | super().__init__()
9 |
10 | def create_define_objects(self, sheet, objects, lang):
11 | """
12 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary
13 | :param sheet: xlrd Excel sheet object
14 | :param objects: dictionary of odmlib objects updated by this method
15 | :param lang: xml:lang setting for TranslatedText
16 | """
17 | self.lang = lang
18 | self.sheet = sheet
19 | header = self.load_header(self.sheet.max_column)
20 | objects["WhereClauseDef"] = []
21 | prev_oid = ""
22 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True):
23 | row_content = self.load_row(row, header)
24 | # if the current id is the same as the previous, we're just adding another range_check
25 | oid = row_content["OID"]
26 | if oid != prev_oid:
27 | wcd = self._create_whereclausedef_object(row_content)
28 | objects["WhereClauseDef"].append(wcd)
29 | prev_oid = oid
30 | else:
31 | rc = self._create_rangecheck(row_content)
32 | objects["WhereClauseDef"][-1].RangeCheck.append(rc)
33 |
34 | def _create_whereclausedef_object(self, row):
35 | """
36 | use the values from the WhereClauses worksheet row to create a WhereClauseDef odmlib object
37 | :param row: WhereClauses worksheet row values as a dictionary
38 | :return: a WhereClause odmlib object
39 | """
40 | attr = {"OID": row["OID"]}
41 | if row.get("Comment"):
42 | attr["CommentOID"] = self.generate_oid(["COM", row["Comment"]])
43 | wc = DEFINE.WhereClauseDef(**attr)
44 | item_oid = self.generate_oid(["IT", row["Dataset"], row["Variable"]])
45 | rc_attr = {"SoftHard": "Soft", "ItemOID": item_oid, "Comparator": row["Comparator"]}
46 | rc = DEFINE.RangeCheck(**rc_attr)
47 | for value in row["Value"].split(", "):
48 | cv = DEFINE.CheckValue(_content=value)
49 | rc.CheckValue.append(cv)
50 | wc.RangeCheck.append(rc)
51 | return wc
52 |
53 | def _create_rangecheck(self, row):
54 | """
55 | use the values from the WhereClauses worksheet to create a RangeCheck odmlinb object
56 | :param row: WhereClauses worksheet row values as a dictionary
57 | :return: a RangeCheck odmlib object
58 | """
59 | item_oid = self.generate_oid(["IT", row["Dataset"], row["Variable"]])
60 | rc_attr = {"SoftHard": "Soft", "ItemOID": item_oid, "Comparator": row["Comparator"]}
61 | rc = DEFINE.RangeCheck(**rc_attr)
62 | for value in row["Value"].split(", "):
63 | cv = DEFINE.CheckValue(_content=value)
64 | rc.CheckValue.append(cv)
65 | return rc
66 |
--------------------------------------------------------------------------------
/xls2define/data/SDTM-Metadata-Worksheet.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/xls2define/data/SDTM-Metadata-Worksheet.xlsx
--------------------------------------------------------------------------------
/xls2define/data/odmlib-define-metadata.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/xls2define/data/odmlib-define-metadata.xlsx
--------------------------------------------------------------------------------
/xls2define/define_object.py:
--------------------------------------------------------------------------------
1 | from abc import ABC
2 |
3 |
4 | class DefineObject(ABC):
5 | def __init__(self):
6 | self.sheet = None
7 | self.lang = "en"
8 |
9 | def load_row(self, row_values, header):
10 | row = {}
11 | for cell in zip(header, row_values):
12 | row[cell[0]] = cell[1]
13 | return row
14 |
15 | def load_header(self, num_cols):
16 | header = []
17 | for row in self.sheet.iter_rows(min_row=1, max_row=1, min_col=1, max_col=num_cols, values_only=True):
18 | header = list(row)
19 | return header
20 |
21 | def generate_oid(self, descriptors):
22 | # ensure the element type prefix is not already pre-pended to the OID
23 | if len(descriptors) > 1 and descriptors[1].startswith(descriptors[0] + "."):
24 | oid = ".".join(descriptors[1:]).upper()
25 | else:
26 | oid = ".".join(descriptors).upper()
27 | return oid
28 |
29 | def find_object(self, objects, oid):
30 | for o in objects:
31 | if oid == o.OID:
32 | return o
33 | return None
34 |
--------------------------------------------------------------------------------
/xls2define/odm.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_0 import model as DEFINE
2 | import datetime
3 |
4 |
5 | class ODM:
6 | def __init__(self):
7 | self.attrs = self._set_attributes()
8 |
9 | def create_define_objects(self):
10 | odm = DEFINE.ODM(**self.attrs)
11 | return odm
12 |
13 | def _set_attributes(self):
14 | return {"FileOID": "ODM.DEFINE.TEST.001",
15 | "AsOfDateTime": self._set_datetime(),
16 | "CreationDateTime": self._set_datetime(), "ODMVersion": "1.3.2", "FileType": "Snapshot",
17 | "Originator": "Sam Hume", "SourceSystem": "ODMLib", "SourceSystemVersion": "0.1"}
18 |
19 | def _set_datetime(self):
20 | """return the current datetime in ISO 8601 format"""
21 | return datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat()
22 |
--------------------------------------------------------------------------------
/xls2define/requirements.txt:
--------------------------------------------------------------------------------
1 | odmlib>=0.1.4
2 | xmlschema>=1.4.1
3 | openpyxl>=3.0.5
--------------------------------------------------------------------------------
/xls2define/supporting_docs.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_0 import model as DEFINE
2 |
3 |
4 | class SupportingDocuments:
5 |
6 | @staticmethod
7 | def create_annotatedcrf():
8 | acrf = DEFINE.AnnotatedCRF()
9 | dr = DEFINE.DocumentRef(leafID='LF.blankcrf')
10 | acrf.DocumentRef = dr
11 | return acrf
12 |
13 | @staticmethod
14 | def create_supplementaldoc():
15 | sdoc = DEFINE.SupplementalDoc()
16 | dr1 = DEFINE.DocumentRef(leafID='LF.ReviewersGuide')
17 | sdoc.DocumentRef.append(dr1)
18 | dr2 = DEFINE.DocumentRef(leafID='LF.ComplexAlgorithms')
19 | sdoc.DocumentRef.append(dr2)
20 | return sdoc
21 |
--------------------------------------------------------------------------------
/xlsx2define2-1/CodeLists.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_1 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class CodeLists(define_object.DefineObject):
6 | """ create a Define-XML v2.1 CodeList element object """
7 | def __init__(self):
8 | super().__init__()
9 | self.igd = None
10 |
11 | def create_define_objects(self, sheet, objects, lang, acrf):
12 | """
13 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary
14 | :param sheet: xlrd Excel sheet object
15 | :param objects: dictionary of odmlib objects updated by this method
16 | :param lang: xml:lang setting for TranslatedText
17 |
18 | """
19 | self.lang = lang
20 | self.sheet = sheet
21 | header = self.load_header(self.sheet.max_column)
22 | objects["CodeList"] = []
23 | cl_c_code = ""
24 | cl_name = ""
25 | is_decode_item = False
26 | cl = None
27 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True):
28 | row_content = self.load_row(row, header)
29 | # assumes when this is a new code list the names will not be the same
30 | if row_content["Name"] != cl_name:
31 | if cl_name:
32 | self._add_previous_codelist_to_objects(cl_c_code, cl, objects)
33 | cl = self._create_codelist_object(row_content)
34 | cl_c_code = row_content.get("NCI Codelist Code")
35 | cl_name = row_content.get("Name")
36 | # assumption: if the first term has a decode element then create the list with decodes
37 | if row_content["Decoded Value"]:
38 | is_decode_item = True
39 | else:
40 | is_decode_item = False
41 | if is_decode_item:
42 | cl_item = self._create_codelistitem_object(row_content)
43 | cl.CodeListItem.append(cl_item)
44 | else:
45 | en_item = self._create_enumerateditem_object(row_content)
46 | cl.EnumeratedItem.append(en_item)
47 | self._add_previous_codelist_to_objects(cl_c_code, cl, objects)
48 |
49 | def _add_previous_codelist_to_objects(self, cl_c_code, cl, objects):
50 | """
51 | finish creating a codelist by adding Alias of a c-code exists and adding the object to the list of codelists
52 | :param row_idx: positive integer indicating which row - skip processing the first row
53 | :param cl_c_code: codelist c-code
54 | :param cl: odmlib codelist object
55 | :param objects: dictionary of odmlib objects created from the Excel input file and updated in this method
56 | """
57 | if cl_c_code:
58 | alias = DEFINE.Alias(Context="nci:ExtCodeID", Name=cl_c_code)
59 | cl.Alias.append(alias)
60 | # add the code list to the list of code list objects
61 | if cl:
62 | objects["CodeList"].append(cl)
63 |
64 | def _create_codelist_object(self, row):
65 | """
66 | using the row from the Codelists worksheet create an odmlib CodeList object
67 | :param row: dictionary with contents from a row in the Codelists worksheet
68 | :return: CodeList odmlib object
69 | """
70 | attr = {"OID": row["OID"], "Name": row["Name"], "DataType": row["Data Type"]}
71 | if row.get("Comment"):
72 | attr["CommentOID"] = row["Comment"]
73 | if row.get("IsNonStandard"):
74 | attr["IsNonStandard"] = row["IsNonStandard"]
75 | if row.get("StandardOID"):
76 | attr["StandardOID"] = row["StandardOID"]
77 | cl = DEFINE.CodeList(**attr)
78 | return cl
79 |
80 | def _create_enumerateditem_object(self, row):
81 | """
82 | using the row from the Codelists worksheet create an odmlib EnumeratedItem object
83 | :param row: dictionary with contents from a row in the Codelists worksheet
84 | :return: EnumeratedItem odmlib object
85 | """
86 | attr = {"CodedValue": row["Term"]}
87 | if row.get("Order"):
88 | attr["OrderNumber"] = row["Order"]
89 | en_item = DEFINE.EnumeratedItem(**attr)
90 | if row.get("NCI Term Code"):
91 | alias = DEFINE.Alias(Context="nci:ExtCodeID", Name=row["NCI Term Code"])
92 | en_item.Alias.append(alias)
93 | return en_item
94 |
95 | def _create_codelistitem_object(self, row):
96 | """
97 | using the row from the Codelists worksheet create an odmlib CodeListItem object
98 | :param row: dictionary with contents from a row in the Codelists worksheet
99 | :return: CodeListItem odmlib object
100 | """
101 | attr = {"CodedValue": row["Term"]}
102 | if row.get("Order"):
103 | attr["OrderNumber"] = row["Order"]
104 | cl_item = DEFINE.CodeListItem(**attr)
105 | decode = DEFINE.Decode()
106 | if row["Decoded Value"]:
107 | tt = DEFINE.TranslatedText(_content=row["Decoded Value"], lang="en")
108 | else:
109 | # if no decode for this term the use the submission value
110 | tt = DEFINE.TranslatedText(_content=row["Term"], lang="en")
111 | decode.TranslatedText.append(tt)
112 | cl_item.Decode = decode
113 | if row.get("NCI Term Code"):
114 | alias = DEFINE.Alias(Context="nci:ExtCodeID", Name=row["NCI Term Code"])
115 | cl_item.Alias.append(alias)
116 | return cl_item
117 |
--------------------------------------------------------------------------------
/xlsx2define2-1/Comments.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_1 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class Comments(define_object.DefineObject):
6 | """ create a Define-XML v2.0 CommentDef element object """
7 | def __init__(self):
8 | super().__init__()
9 | self.lookup_oid = None
10 | self.igd = None
11 |
12 | def create_define_objects(self, sheet, objects, lang, acrf):
13 | """
14 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary
15 | :param sheet: xlrd Excel sheet object
16 | :param objects: dictionary of odmlib objects updated by this method
17 | :param lang: xml:lang setting for TranslatedText
18 | """
19 | self.lang = lang
20 | self.sheet = sheet
21 | header = self.load_header(self.sheet.max_column)
22 | objects["CommentDef"] = []
23 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True):
24 | row_content = self.load_row(row, header)
25 | comment = self._create_commentdef_object(row_content)
26 | objects["CommentDef"].append(comment)
27 |
28 | def _create_commentdef_object(self, row):
29 | """
30 | use the values from the Comments worksheet row to create a CommentDef odmlib object
31 | :param row: Comments worksheet row values as a dictionary
32 | :return: a CommentDef odmlib object
33 | """
34 | com = DEFINE.CommentDef(OID=row["OID"])
35 | tt = DEFINE.TranslatedText(_content=row["Description"], lang=self.lang)
36 | com.Description = DEFINE.Description()
37 | com.Description.TranslatedText.append(tt)
38 | if row.get("Document"):
39 | self._add_document(row, com)
40 | return com
41 |
42 | def _add_document(self, row, com):
43 | """
44 | creates a DocumentRef object using a row from the Comments Worksheet
45 | :param row: Comments worksheet row values as a dictionary
46 | :param method: odmlib CommentDef object that gets updated with a DocumentRef object
47 | """
48 | dr = DEFINE.DocumentRef(leafID=row["Document"])
49 | if row.get("Pages"):
50 | pdf = DEFINE.PDFPageRef(PageRefs=row["Pages"], Type="NamedDestination")
51 | dr.PDFPageRef.append(pdf)
52 | com.DocumentRef.append(dr)
53 |
--------------------------------------------------------------------------------
/xlsx2define2-1/Datasets.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_1 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class Datasets(define_object.DefineObject):
6 | """ create a Define-XML v2.0 ItemGroupDef element object """
7 | def __init__(self):
8 | super().__init__()
9 |
10 | def create_define_objects(self, sheet, objects, lang, acrf):
11 | """
12 | parse each row in the Excel sheet and create odmlib objects to return in the objects dictionary
13 | :param sheet: xlrd Excel sheet object
14 | :param objects: dictionary of odmlib objects updated by this method
15 | :param lang: xml:lang setting for TranslatedText
16 | """
17 | self.lang = lang
18 | self.sheet = sheet
19 | num_cols = self.sheet.max_column
20 | header = self.load_header(num_cols)
21 | objects["ItemGroupDef"] = []
22 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=num_cols, values_only=True):
23 | row_content = self.load_row(row, header)
24 | itg = self._create_itemgroupdef_object(row_content)
25 | objects["ItemGroupDef"].append(itg)
26 |
27 | def _create_itemgroupdef_object(self, row):
28 | """
29 | use the values from the Dataset worksheet row to create a ItemGroupDef odmlib object
30 | NOTE: since this example is based on the SDTMIG sub-class has not been implemented
31 | :param row: Datasets worksheet row values as a dictionary
32 | :return: odmlib ItemGroupDef object
33 | """
34 | oid = self.generate_oid(["IG", row["Dataset"]])
35 | attr = {"OID": oid, "Name": row["Dataset"], "Repeating": row["Repeating"], "Domain": row["Dataset"],
36 | "SASDatasetName": row["Dataset"], "IsReferenceData": row["Reference Data"], "Purpose": row["Purpose"],
37 | "Structure": row["Structure"], "ArchiveLocationID": ".".join(["LF", row["Dataset"]])}
38 | if row.get("Comment"):
39 | attr["CommentOID"] = row["Comment"]
40 | if row.get("IsNonStandard"):
41 | attr["IsNonStandard"] = row["IsNonStandard"]
42 | if row.get("StandardOID"):
43 | attr["StandardOID"] = row["StandardOID"]
44 | if row.get("HasNoData"):
45 | attr["HasNoData"] = row["HasNoData"]
46 | igd = DEFINE.ItemGroupDef(**attr)
47 | tt = DEFINE.TranslatedText(_content=row["Description"], lang=self.lang)
48 | igd.Description = DEFINE.Description()
49 | igd.Description.TranslatedText.append(tt)
50 | # spreadsheet has up to 1 Class per dataset, but spec allows for nested sub-classes
51 | if row.get("Class"):
52 | igd.Class = DEFINE.Class(Name=row["Class"])
53 | return igd
54 |
--------------------------------------------------------------------------------
/xlsx2define2-1/Dictionaries.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_1 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class Dictionaries(define_object.DefineObject):
6 | """ create a Define-XML v2.0 CodeList element object """
7 | def __init__(self):
8 | super().__init__()
9 |
10 | def create_define_objects(self, sheet, objects, lang, acrf):
11 | """
12 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary
13 | :param sheet: xlrd Excel sheet object
14 | :param objects: dictionary of odmlib objects updated by this method
15 | :param lang: xml:lang setting for TranslatedText
16 | """
17 | self.lang = lang
18 | self.sheet = sheet
19 | header = self.load_header(self.sheet.max_column)
20 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True):
21 | row_content = self.load_row(row, header)
22 | cl = self._create_codelist_object(row_content)
23 | objects["CodeList"].append(cl)
24 |
25 | def _create_codelist_object(self, row):
26 | """
27 | using the row from the Dictionaries worksheet create an odmlib CodeList object and add ExternalCodeList
28 | :param row: dictionary with contents from a row in the Dictionaries worksheet
29 | :return: CodeList odmlib object with ExternalCodeList
30 | """
31 | cl = DEFINE.CodeList(OID=row["OID"], Name=row["Name"], DataType=row["Data Type"])
32 | attr = {"Dictionary": row["Dictionary"]}
33 | if row.get("Version"):
34 | attr["Version"] = row["Version"]
35 | exd = DEFINE.ExternalCodeList(**attr)
36 | cl.ExternalCodeList = exd
37 | return cl
38 |
--------------------------------------------------------------------------------
/xlsx2define2-1/Documents.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_1 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class Documents(define_object.DefineObject):
6 | """ create a Define-XML v2.0 leaf element object """
7 | def __init__(self):
8 | super().__init__()
9 |
10 | def create_define_objects(self, sheet, objects, lang, acrf):
11 | """
12 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary
13 | :param sheet: xlrd Excel sheet object
14 | :param objects: dictionary of odmlib objects updated by this method
15 | :param lang: xml:lang setting for TranslatedText
16 | """
17 | self.lang = lang
18 | self.sheet = sheet
19 | header = self.load_header(self.sheet.max_column)
20 | objects["leaf"] = []
21 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True):
22 | row_content = self.load_row(row, header)
23 | leaf = self._create_leaf_object(row_content)
24 | objects["leaf"].append(leaf)
25 |
26 | def _create_leaf_object(self, row):
27 | """
28 | use the values from the Documents worksheet row to create a leaf odmlib object
29 | :param row: Documents worksheet row values as a dictionary
30 | :return: a leaf odmlib object
31 | """
32 | lf = DEFINE.leaf(ID=row["ID"], href=row["Href"])
33 | title = DEFINE.title(_content=row["Title"])
34 | lf.title = title
35 | return lf
36 |
--------------------------------------------------------------------------------
/xlsx2define2-1/Methods.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_1 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class Methods(define_object.DefineObject):
6 | """ create a Define-XML v2.0 MethodDef element object """
7 | def __init__(self):
8 | super().__init__()
9 |
10 | def create_define_objects(self, sheet, objects, lang, acrf):
11 | """
12 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary
13 | :param sheet: xlrd Excel sheet object
14 | :param objects: dictionary of odmlib objects updated by this method
15 | :param lang: xml:lang setting for TranslatedText
16 | """
17 | self.lang = lang
18 | self.sheet = sheet
19 | header = self.load_header(self.sheet.max_column)
20 | objects["MethodDef"] = []
21 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True):
22 | row_content = self.load_row(row, header)
23 | item = self._create_methoddef_object(row_content)
24 | objects["MethodDef"].append(item)
25 |
26 | def _create_methoddef_object(self, row):
27 | """
28 | use the values from the Methods worksheet row to create a MethodDef odmlib object
29 | :param row: Methods worksheet row values as a dictionary
30 | :return: a MethodDef odmlib object
31 | """
32 | attr = {"OID": row["OID"], "Name": row["Name"], "Type": row["Type"]}
33 | method = DEFINE.MethodDef(**attr)
34 | tt = DEFINE.TranslatedText(_content=row["Description"], lang=self.lang)
35 | method.Description = DEFINE.Description()
36 | method.Description.TranslatedText.append(tt)
37 | if row.get("Expression Context"):
38 | method.FormalExpression.append(DEFINE.FormalExpression(Context=row["Expression Context"], _content=row["Expression Code"]))
39 | if row.get("Document"):
40 | self._add_document(row, method)
41 | return method
42 |
43 | def _add_document(self, row, method):
44 | """
45 | creates a DocumentRef object using a row from the Methods Worksheet
46 | :param row: Methods worksheet row values as a dictionary
47 | :param method: odmlib MethodDef object that gets updated with a DocumentRef object
48 | """
49 | dr = DEFINE.DocumentRef(leafID=row["Document"])
50 | pdf = DEFINE.PDFPageRef(PageRefs=row["Pages"], Type="NamedDestination")
51 | dr.PDFPageRef.append(pdf)
52 | method.DocumentRef.append(dr)
53 |
--------------------------------------------------------------------------------
/xlsx2define2-1/README.md:
--------------------------------------------------------------------------------
1 | # xlsx2define2-1
2 |
3 | ## Introduction
4 | The xlsx2define2-1 program is an odmlib example application that generates a Define-XML v2.1 file from
5 | an Excel spreadsheet that contains the study metadata needed to create the Define-XML file. The Exel
6 | spreadsheet version of the makes it easier for many to edit or create new content to include in a
7 | Define-XML v2.1 file. The companion define2-1-to-xlsx program takes the generated Define-XML file and creates
8 | a spreadsheet using the metadata. This example demonstrates some basic odmlib features.
9 |
10 | ## Getting Started
11 | To run xls2define.py from the command-line:
12 |
13 | `python xls2define.py -e ./data/odmlib-define-metadata.xlsx -d ./data/odmlib-roundtrip-define.xml`
14 |
15 | Or, to run it with both XML schema validation (-v) and some basic conformance checking (-c):
16 |
17 | `-v -c -e ./data/odmlib-define-metadata.xlsx -d ./data/odmlib-roundtrip-define.xml
18 | -s "/home/sam/standards/DefineV211/schema/cdisc-define-2.1/define2-1-0.xsd`
19 |
20 | The odmlib package must be installed to run xlsx2define2-1. See the
21 | [odmlib repository](https://github.com/swhume/odmlib) to install the odmlib source code and latest features.
22 | The odmlib package can also be installed from PyPi with the understanding that it is still in development
23 | so might not have everything available in the odmlib repository. It can be installed from PyPi using:
24 |
25 | 'pip install odmlib'
26 |
27 | The odmlib README provides instructions for getting started.
28 |
29 | ## Limitations
30 | The odmlib examples are basic programs intended to demonstrate some of the basic capabilities of odmlib.
31 | The examples are not complete, production ready applications. However, I'm happy to update these applications
32 | to accommodate new feature or bug fixes and will also review pull requests.
33 |
34 | The odmlib package is still in development.
--------------------------------------------------------------------------------
/xlsx2define2-1/Standards.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_1 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class Standards(define_object.DefineObject):
6 | """ create a Define-XML v2.1 Standards element object """
7 | def __init__(self):
8 | super().__init__()
9 |
10 | def create_define_objects(self, sheet, objects, lang, acrf):
11 | """
12 | parse each row in the Standards Excel sheet and create odmlib objects to return in the objects dictionary
13 | :param sheet: Excel sheet object
14 | :param objects: dictionary of odmlib objects updated by this method
15 | :param lang: xml:lang setting for TranslatedText
16 | """
17 | standards = DEFINE.Standards()
18 | self.lang = lang
19 | self.sheet = sheet
20 | header = self.load_header(self.sheet.max_column)
21 | objects["ItemGroupDef"] = []
22 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True):
23 | if row[0]:
24 | row_content = self.load_row(row, header)
25 | std = self._create_standard_object(row_content)
26 | standards.Standard.append(std)
27 | objects["Standards"] = standards
28 |
29 | def _create_standard_object(self, row):
30 | """
31 | use the values from the Standards worksheet row to create a Standard odmlib object
32 | :param row: Standards worksheet row values as a dictionary
33 | :return: odmlib Standard object
34 | """
35 | attr = {"OID": row["OID"], "Name": row["Name"], "Type": row["Type"], "Version": str(row["Version"]),
36 | "Status": row["Status"]}
37 | if row.get("Publishing Set"):
38 | attr["PublishingSet"] = row["Publishing Set"]
39 | if row.get("Comment"):
40 | attr["CommentOID"] = row["Comment"]
41 | return DEFINE.Standard(**attr)
42 |
--------------------------------------------------------------------------------
/xlsx2define2-1/Study.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_1 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class Study(define_object.DefineObject):
6 | """ create a Define-XML v2.1 Study element object and initialize the MetaDataVersion object """
7 | def __init__(self):
8 | super().__init__()
9 |
10 | def create_define_objects(self, sheet, objects, lang, acrf):
11 | """
12 | parse each row in the Excel sheet and create ODMLIB objects to return in the objects dictionary
13 | :param sheet: xlrd Excel sheet object
14 | :param objects: dictionary of ODMLIB objects updated by this method
15 | :param lang: xml:lang setting for TranslatedText
16 | """
17 | self.lang = lang
18 | self.acrf = acrf
19 | self.sheet = sheet
20 | rows = {}
21 | for row in sheet.iter_rows(min_row=1, min_col=1, max_col=2, values_only=True):
22 | row_content = self._load_row(row)
23 | rows.update(row_content)
24 | self.lang = rows["Language"]
25 | self.acrf = rows["Annotated CRF"]
26 | objects["Study"] = self._create_study_object(rows)
27 | objects["MetaDataVersion"] = self._create_metadataversion_object(rows)
28 |
29 | def _create_study_object(self, rows):
30 | """
31 | create the study ODMLIB object from the Study worksheet and return it
32 | :param rows: dictionary created from the rows in the study worksheet
33 | :return: odmlib Study object
34 | """
35 | study_oid = self.generate_oid(['ODM', rows["StudyName"]])
36 | study = DEFINE.Study(OID=study_oid)
37 | gv = DEFINE.GlobalVariables()
38 | gv.StudyName = DEFINE.StudyName(_content=rows["StudyName"])
39 | gv.StudyDescription = DEFINE.StudyDescription(_content=rows["StudyDescription"])
40 | gv.ProtocolName = DEFINE.ProtocolName(_content=rows["ProtocolName"])
41 | study.GlobalVariables = gv
42 | return study
43 |
44 | def _create_metadataversion_object(self, rows):
45 | """
46 | create the MetaDataVersion ODMLIB object from the Study worksheet and return it
47 | :param rows: dictionary created from the rows in the study worksheet
48 | :return: odmlib MetaDataVersion object
49 | """
50 | mdv_oid = self.generate_oid(["MDV", rows["StudyName"]])
51 | mdv = DEFINE.MetaDataVersion(OID=mdv_oid, Name="MDV " + rows["StudyName"], Description="Data Definitions for "
52 | + rows["StudyName"], DefineVersion="2.1.0")
53 | return mdv
54 |
55 | def _load_row(self, row_values):
56 | """
57 | load the Study worksheet row and return a dictionary
58 | :param row_idx: index indicating the row to load
59 | :return: dictionary with the row attribute as key and value as dictionary value
60 | """
61 | row = {}
62 | row[row_values[0]] = row_values[1]
63 | return row
64 |
--------------------------------------------------------------------------------
/xlsx2define2-1/ValueLevel.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_1 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class ValueLevel(define_object.DefineObject):
6 | """ create a Define-XML v2.0 ValueListDef element object """
7 | def __init__(self):
8 | super().__init__()
9 | self.lookup_oid = None
10 | self.vld = None
11 |
12 | def create_define_objects(self, sheet, objects, lang, acrf):
13 | """
14 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary
15 | :param sheet: xlrd Excel sheet object
16 | :param objects: dictionary of odmlib objects updated by this method
17 | :param lang: xml:lang setting for TranslatedText
18 | """
19 | self.lang = lang
20 | self.acrf = acrf
21 | self.sheet = sheet
22 | header = self.load_header(self.sheet.max_column)
23 | objects["ValueListDef"] = []
24 | vl_oid = ""
25 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True):
26 | row_content = self.load_row(row, header)
27 | if vl_oid != row_content["OID"]:
28 | self._create_valuelistdef_object(row_content, objects)
29 | vl_oid = row_content["OID"]
30 | self._create_itemref_object(row_content)
31 | self._create_itemdef_object(row_content, objects)
32 |
33 | def _create_valuelistdef_object(self, row, objects):
34 | """
35 | use the values from the ValueLevel worksheet row to create a ValueListDef odmlib object
36 | :param row: ValueList worksheet row values as a dictionary
37 | :param objects: dictionary of odmlib objects updated by this method
38 | """
39 | self.vld = DEFINE.ValueListDef(OID=row["OID"])
40 | objects["ValueListDef"].append(self.vld)
41 |
42 | def _create_itemref_object(self, row):
43 | """
44 | use the values from the ValueLevel worksheet row to create ItemRef objects for ValueListDef
45 | :param row: ValueList worksheet row values as a dictionary
46 | """
47 | # oid = self.generate_oid(["IT", row["Dataset"], row["Variable"], row["Where Clause"][3:]])
48 | attr = {"ItemOID": row["ItemOID"], "Mandatory": row["Mandatory"], "OrderNumber": int(row["Order"])}
49 | if row.get("Method"):
50 | attr["MethodOID"] = self.generate_oid(["MT", row["Method"]])
51 | item = DEFINE.ItemRef(**attr)
52 | wc = DEFINE.WhereClauseRef(WhereClauseOID=row["Where Clause"])
53 | item.WhereClauseRef.append(wc)
54 | self.vld.ItemRef.append(item)
55 |
56 | def _create_itemdef_object(self, row, objects):
57 | """
58 | use the values from the ValueLevel worksheet row to create ItemDef objects referenced by ValueListDef ItemRefs
59 | :param row: ValueList worksheet row values as a dictionary
60 | :param objects: dictionary of odmlib objects updated by this method
61 | """
62 | # oid = self.generate_oid(["IT", row["Dataset"], row["Variable"], row["Where Clause"][3:]])
63 | attr = {"OID": row["ItemOID"], "Name": row["Variable"], "DataType": row["Data Type"]}
64 | self._add_optional_itemdef_attributes(attr, row)
65 | item = DEFINE.ItemDef(**attr)
66 | self._add_optional_itemdef_elements(item, row)
67 | objects["ItemDef"].append(item)
68 |
69 | def _add_optional_itemdef_elements(self, item, row):
70 | """
71 | use the values from the ValueList worksheet row to add the optional ELEMENTS to the ItemDef object
72 | :param item: ItemDef odmlib object updated with optional ELEMENTS
73 | :param row: ValueList worksheet row values as a dictionary
74 | """
75 | if row.get("Codelist"):
76 | cl = DEFINE.CodeListRef(CodeListOID=row.get("Codelist"))
77 | item.CodeListRef = cl
78 | if row.get("Origin Type"):
79 | # spreadsheet input only provides for 1 Origin, but multiple are supported by the spec
80 | attr = {"Type": row["Origin Type"]}
81 | if row.get("Origin Source"):
82 | attr["Source"] = row["Origin Source"]
83 | item.Origin.append(DEFINE.Origin(**attr))
84 | if row.get("Predecessor"):
85 | item.Origin[0].Description = DEFINE.Description()
86 | item.Origin[0].Description.TranslatedText.append(DEFINE.TranslatedText(_content=row["Predecessor"]))
87 | if row.get("Pages"):
88 | dr = DEFINE.DocumentRef(leafID=self.acrf)
89 | dr.PDFPageRef.append(DEFINE.PDFPageRef(PageRefs=row["Pages"], Type="PhysicalRef"))
90 | item.Origin[0].DocumentRef.append(dr)
91 |
92 | def _add_optional_itemdef_attributes(self, attr, row):
93 | """
94 | use the values from the ValueList worksheet row to add the optional attributes to the ItemDef object
95 | :param item: ItemDef odmlib object updated with optional attributes
96 | :param row: ValueList worksheet row values as a dictionary
97 | """
98 | if len(row["Variable"]) < 9:
99 | attr["SASFieldName"] = row["Variable"]
100 | else:
101 | print(f"Skipping SASFieldName for ItemDef {row['Variable']} because it exceeds the 8 character limit")
102 | if row.get("Length"):
103 | attr["Length"] = row["Length"]
104 | if row.get("Significant Digits"):
105 | attr["SignificantDigits"] = row["Significant Digits"]
106 | if row.get("Format"):
107 | attr["DisplayFormat"] = row["Format"]
108 | if row.get("Comment"):
109 | attr["CommentOID"] = self.generate_oid(["COM", row["Comment"]])
110 |
--------------------------------------------------------------------------------
/xlsx2define2-1/WhereClauses.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_1 import model as DEFINE
2 | import define_object
3 |
4 |
5 | class WhereClauses(define_object.DefineObject):
6 | """ create a Define-XML v2.0 WhereClauseDef element object """
7 | def __init__(self):
8 | super().__init__()
9 |
10 | def create_define_objects(self, sheet, objects, lang, acrf):
11 | """
12 | parse the Excel sheet and create a odmlib objects to return in the objects dictionary
13 | :param sheet: xlrd Excel sheet object
14 | :param objects: dictionary of odmlib objects updated by this method
15 | :param lang: xml:lang setting for TranslatedText
16 | """
17 | self.lang = lang
18 | self.sheet = sheet
19 | header = self.load_header(self.sheet.max_column)
20 | objects["WhereClauseDef"] = []
21 | prev_oid = ""
22 | for row in sheet.iter_rows(min_row=2, min_col=1, max_col=self.sheet.max_column, values_only=True):
23 | row_content = self.load_row(row, header)
24 | # if the current id is the same as the previous, we're just adding another range_check
25 | oid = row_content["OID"]
26 | if oid != prev_oid:
27 | wcd = self._create_whereclausedef_object(row_content)
28 | objects["WhereClauseDef"].append(wcd)
29 | prev_oid = oid
30 | else:
31 | rc = self._create_rangecheck(row_content)
32 | objects["WhereClauseDef"][-1].RangeCheck.append(rc)
33 |
34 | def _create_whereclausedef_object(self, row):
35 | """
36 | use the values from the WhereClauses worksheet row to create a WhereClauseDef odmlib object
37 | :param row: WhereClauses worksheet row values as a dictionary
38 | :return: a WhereClause odmlib object
39 | """
40 | attr = {"OID": row["OID"]}
41 | if row.get("Comment"):
42 | attr["CommentOID"] = self.generate_oid(["COM", row["Comment"]])
43 | wc = DEFINE.WhereClauseDef(**attr)
44 | item_oid = self.generate_oid(["IT", row["Dataset"], row["Variable"]])
45 | rc_attr = {"SoftHard": "Soft", "ItemOID": item_oid, "Comparator": row["Comparator"]}
46 | rc = DEFINE.RangeCheck(**rc_attr)
47 | if row["Value"]:
48 | for value in row["Value"].split(", "):
49 | cv = DEFINE.CheckValue(_content=value)
50 | rc.CheckValue.append(cv)
51 | else:
52 | cv = DEFINE.CheckValue(_content="")
53 | rc.CheckValue.append(cv)
54 | wc.RangeCheck.append(rc)
55 | return wc
56 |
57 | def _create_rangecheck(self, row):
58 | """
59 | use the values from the WhereClauses worksheet to create a RangeCheck odmlinb object
60 | :param row: WhereClauses worksheet row values as a dictionary
61 | :return: a RangeCheck odmlib object
62 | """
63 | item_oid = self.generate_oid(["IT", row["Dataset"], row["Variable"]])
64 | rc_attr = {"SoftHard": "Soft", "ItemOID": item_oid, "Comparator": row["Comparator"]}
65 | rc = DEFINE.RangeCheck(**rc_attr)
66 | for value in row["Value"].split(", "):
67 | cv = DEFINE.CheckValue(_content=value)
68 | rc.CheckValue.append(cv)
69 | return rc
70 |
--------------------------------------------------------------------------------
/xlsx2define2-1/data/odmlib-define-metadata.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swhume/odmlib_examples/79c03feb12f4694d40ef2be18055bbdb05c97a9a/xlsx2define2-1/data/odmlib-define-metadata.xlsx
--------------------------------------------------------------------------------
/xlsx2define2-1/define_object.py:
--------------------------------------------------------------------------------
1 | from abc import ABC
2 |
3 |
4 | class DefineObject(ABC):
5 | def __init__(self):
6 | self.sheet = None
7 | self.lang = "en"
8 |
9 | def load_row(self, row_values, header):
10 | row = {}
11 | for cell in zip(header, row_values):
12 | row[cell[0]] = cell[1]
13 | return row
14 |
15 | def load_header(self, num_cols):
16 | header = []
17 | for row in self.sheet.iter_rows(min_row=1, max_row=1, min_col=1, max_col=num_cols, values_only=True):
18 | header = list(row)
19 | return header
20 |
21 | def generate_oid(self, descriptors):
22 | # ensure the element type prefix is not already pre-pended to the OID
23 | if len(descriptors) > 1 and descriptors[1].startswith(descriptors[0] + "."):
24 | oid = ".".join(descriptors[1:]).upper()
25 | else:
26 | oid = ".".join(descriptors).upper()
27 | return oid
28 |
29 | def find_object(self, objects, oid):
30 | for o in objects:
31 | if oid == o.OID:
32 | return o
33 | return None
34 |
--------------------------------------------------------------------------------
/xlsx2define2-1/odm.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_1 import model as DEFINE
2 | import datetime
3 |
4 |
5 | class ODM:
6 | def __init__(self):
7 | self.attrs = self._set_attributes()
8 |
9 | def create_define_objects(self):
10 | odm = DEFINE.ODM(**self.attrs)
11 | return odm
12 |
13 | def _set_attributes(self):
14 | return {"FileOID": "ODM.DEFINE21.TEST.001", "AsOfDateTime": self._set_datetime(),
15 | "CreationDateTime": self._set_datetime(), "ODMVersion": "1.3.2", "FileType": "Snapshot",
16 | "Originator": "Sam Hume", "SourceSystem": "odmlib", "SourceSystemVersion": "0.2", "Context": "Other"}
17 |
18 | def _set_datetime(self):
19 | """return the current datetime in ISO 8601 format"""
20 | return datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat()
21 |
--------------------------------------------------------------------------------
/xlsx2define2-1/requirements.txt:
--------------------------------------------------------------------------------
1 | odmlib>=0.1.4
2 | xmlschema>=1.10.0
3 | openpyxl>=3.0.9
--------------------------------------------------------------------------------
/xlsx2define2-1/supporting_docs.py:
--------------------------------------------------------------------------------
1 | from odmlib.define_2_1 import model as DEFINE
2 |
3 |
4 | class SupportingDocuments:
5 |
6 | @staticmethod
7 | def create_annotatedcrf(annotated_crf):
8 | acrf = DEFINE.AnnotatedCRF()
9 | dr = DEFINE.DocumentRef(leafID=annotated_crf)
10 | acrf.DocumentRef = dr
11 | return acrf
12 |
13 | @staticmethod
14 | def create_supplementaldoc(annotated_crf, leaf_objects):
15 | sdoc = DEFINE.SupplementalDoc() if leaf_objects else None
16 | for lo in leaf_objects:
17 | if leaf_objects and lo.ID != annotated_crf:
18 | dr = DEFINE.DocumentRef(leafID=lo.ID)
19 | sdoc.DocumentRef.append(dr)
20 | return sdoc
21 |
--------------------------------------------------------------------------------