├── .gitignore
├── tools
├── environment.yml
├── check_name_rules.py
├── check_xml_unique.py
├── lib
│ └── xml_tools.py
├── ccpp_meta_stdname_check.py
└── write_standard_name_table.py
├── LICENSE
├── CODEOWNERS
├── README.md
├── .github
├── PULL_REQUEST_TEMPLATE
└── workflows
│ └── pull_request_ci.yml
├── standard_names_v1_0.xsd
└── StandardNamesRules.rst
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 |
--------------------------------------------------------------------------------
/tools/environment.yml:
--------------------------------------------------------------------------------
1 | name: test
2 | channels:
3 | - conda-forge
4 | dependencies:
5 | - pyyaml
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2020, NOAA, UCAR/NCAR CU/CIRES
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 |
--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # Lines starting with '#' are comments.
2 | # Each line is a file pattern followed by one or more owners.
3 |
4 | # These owners will be the default owners for everything in the repo.
5 |
6 | * @cacraigucar @climbfuji @dustinswales @gold2718 @grantfirl @mattldawson @mkavulich @mwaxmonsky @nusbaume @peverwhee @MarekWlasak @svahl991 @ss421
7 |
8 | # Order is important. The last matching pattern has the most precedence.
9 | # So if a pull request only touches javascript files, only these owners
10 | # will be requested to review.
11 | #*.js @octocat @github/js
12 |
13 | # You can also use email addresses if you prefer.
14 | #docs/* docs@example.com
15 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ESMStandardNames
2 |
3 | The Earth System Modeling Standard Names Repository contains community-accepted Standard Names, publishing tools, and search tools.
4 |
5 | Rules governing the designation and format of standard names can be found in [StandardNamesRules.rst](https://github.com/ESCOMP/ESMStandardNames/blob/main/StandardNamesRules.rst).
6 |
7 | A [Markdown file describing the standard names is included](https://github.com/ESCOMP/ESMStandardNames/blob/main/Metadata-standard-names.md), as well as a [YAML version of the XML file](https://github.com/ESCOMP/ESMStandardNames/blob/main/Metadata-standard-names.yaml).
8 |
9 | Edits to standard names must be made in the xml file `standard_names.xml` only. When a pull request is opened into the main branch, the YAML and Markdown files should be updated using the `tools/write_standard_name_table.py` script. This can be done manually by the pull request author, or by activating the GitHub action available on an open pull request.
10 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE:
--------------------------------------------------------------------------------
1 |
2 |
3 |
19 |
20 |
43 |
44 | ## Description
45 |
46 |
47 | ## Issues
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/standard_names_v1_0.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
--------------------------------------------------------------------------------
/tools/check_name_rules.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | """
4 | Check standard names database file for violations of standard name character rules
5 | """
6 |
7 | import argparse
8 | import sys
9 | import os.path
10 | import re
11 | import xml.etree.ElementTree as ET
12 |
13 | ################################################
14 | # Add lib modules to python path
15 | ################################################
16 |
17 | _CURR_DIR = os.path.dirname(os.path.abspath(__file__))
18 | sys.path.append(os.path.join(_CURR_DIR, "lib"))
19 |
20 | #######################################
21 | #Import needed framework python modules
22 | #######################################
23 |
24 | from xml_tools import find_schema_file, find_schema_version, validate_xml_file, read_xml_file
25 |
26 | def main():
27 | """Parse the standard names database file and output a dictionary
28 | where the keys are any standard names in violation of character rules,
29 | and the values are lists of the specific rules violated
30 | """
31 | #Parse arguments
32 | parser = argparse.ArgumentParser(description=__doc__)
33 |
34 | parser.add_argument("-s","--standard_name_file",
35 | metavar='',required=True,
36 | type=str, help="XML file with standard name library")
37 | args = parser.parse_args()
38 |
39 | stdname_file = os.path.abspath(args.standard_name_file)
40 | tree, root = read_xml_file(stdname_file)
41 |
42 | # Validate the XML file
43 | version = find_schema_version(root)
44 | schema_name = os.path.basename(stdname_file)[0:-4]
45 | schema_root = os.path.dirname(stdname_file)
46 | schema_path = os.path.join(schema_root,schema_name)
47 | schema_file = find_schema_file(schema_path, version)
48 | if schema_file:
49 | try:
50 | validate_xml_file(stdname_file, schema_name, version, None,
51 | schema_path=schema_root, error_on_noxmllint=True)
52 | except ValueError:
53 | raise ValueError(f"Invalid standard names file, {stdname_file}")
54 | else:
55 | raise ValueError(f'Cannot find schema file, {schema_name}, for {version=}')
56 |
57 | #Parse list of standard names and see if any names violate one or more rules
58 | violators = {}
59 | legal_first_char = re.compile('[a-z]')
60 | valid_name_chars = re.compile('[a-z0-9_]')
61 | for name in root.findall('./section/standard_name'):
62 | sname = name.attrib['name']
63 | violations = []
64 | if legal_first_char.sub('', sname[0]):
65 | violations.append('First character is not a lowercase letter')
66 | testchars = valid_name_chars.sub('', sname)
67 | if testchars:
68 | violations.append(f'Invalid characters are present: "{testchars}"')
69 |
70 | # If any violations were detected, add an entry to "violators" dictionary
71 | if violations:
72 | violators[sname] = violations
73 |
74 | if violators:
75 | raise Exception(f"Violating standard names found:\n{violators}")
76 |
77 | # Check for non-ascii characters (ord > 127)
78 | for elem in ET.tostringlist(root, encoding='unicode'):
79 | violations = []
80 | badchars = ''
81 | badchars=''.join([i if ord(i) > 127 else '' for i in elem])
82 | if badchars:
83 | violations.append(f'Non-ascii characters found in {elem}: {badchars}')
84 | if violations:
85 | violators[elem] = f'Non-ascii characters found: {badchars}'
86 |
87 | if violators:
88 | raise Exception(f"Violating entries found:\n{violators}")
89 |
90 | print(f'Success! All entries in {args.standard_name_file} follow the rules.')
91 |
92 | if __name__ == "__main__":
93 | main()
94 |
--------------------------------------------------------------------------------
/.github/workflows/pull_request_ci.yml:
--------------------------------------------------------------------------------
1 | name: Pull request checks
2 |
3 | on:
4 | workflow_dispatch:
5 | pull_request:
6 | branches:
7 | - main
8 | - release/*
9 |
10 | jobs:
11 | check-unique-standard-names:
12 | name: Check for duplicates in standard names
13 | runs-on: ubuntu-latest
14 | steps:
15 | - name: Checkout repository
16 | uses: actions/checkout@v4
17 |
18 | - name: Setup Python
19 | uses: actions/setup-python@v4
20 | with:
21 | python-version: "3.x"
22 |
23 | - name: Install dependencies
24 | run: |
25 | sudo apt-get update
26 | sudo apt-get -y install libxml2-utils
27 |
28 | - name: Check for duplicate standard names, descriptions
29 | run: |
30 | tools/check_xml_unique.py standard_names.xml
31 | tools/check_xml_unique.py standard_names.xml --field="description"
32 |
33 | check-name-rules:
34 | name: Check standard names against rules
35 | runs-on: ubuntu-latest
36 |
37 | steps:
38 | - name: Checkout repository
39 | uses: actions/checkout@v4
40 |
41 | - name: Setup Python
42 | uses: actions/setup-python@v4
43 | with:
44 | python-version: "3.x"
45 |
46 | - name: Install dependencies
47 | run: |
48 | sudo apt-get update
49 | sudo apt-get -y install libxml2-utils
50 |
51 | - name: Checks standard names against character rules
52 | run: |
53 | python3 tools/check_name_rules.py -s standard_names.xml
54 |
55 | test-rendering:
56 | name: Test rendering xml file to markdown and yaml
57 | runs-on: ubuntu-latest
58 | steps:
59 | - name: Checkout repository
60 | uses: actions/checkout@v4
61 |
62 | - name: Setup Python
63 | uses: actions/setup-python@v4
64 | with:
65 | python-version: "3.x"
66 |
67 | - name: Install dependencies
68 | run: |
69 | sudo apt-get update
70 | sudo apt-get -y install libxml2-utils
71 | python -m pip install --upgrade pip
72 | python -m pip install PyYaml
73 |
74 | - name: Test rendering xml file to markdown
75 | run: |
76 | # Checks if the saved markdown matches freshly rendered markdown.
77 | # If this fails, prompt user to update
78 | tools/write_standard_name_table.py --output-format md standard_names.xml
79 | if ! git diff --exit-code --quiet; then
80 | echo "❌ Detected that Metadata-standard-names.md is not consistent with standard_names.xml"
81 | echo "✅ To fix: Run the following command locally and commit the result:"
82 | echo " tools/write_standard_name_table.py --output-format md standard_names.xml"
83 | echo "📘 This script requires the pyyaml Python package; to install with pip use command:"
84 | echo " python -m pip install PyYaml"
85 | echo "📘 For conda users, environment file tools/environment.yml is provided."
86 | echo
87 | exit 1
88 | fi
89 |
90 | - name: Test rendering xml file to yaml
91 | run: |
92 | tools/write_standard_name_table.py --output-format yaml standard_names.xml
93 | if ! git diff --exit-code --quiet; then
94 | echo "❌ Detected that Metadata-standard-names.yaml is not consistent with standard_names.xml"
95 | echo "✅ To fix: Run the following command locally and commit the result:"
96 | echo " tools/write_standard_name_table.py --output-format yaml standard_names.xml"
97 | echo "📘 This script requires the pyyaml Python package; to install with pip use command:"
98 | echo " python -m pip install PyYaml"
99 | echo "📘 For conda users, environment file tools/environment.yml is provided."
100 | echo
101 | exit 1
102 | fi
103 |
104 |
--------------------------------------------------------------------------------
/tools/check_xml_unique.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | """
4 | Remove duplicates from a metadata standard-name XML library file.
5 | """
6 |
7 | import argparse
8 | import sys
9 | import os.path
10 | import xml.etree.ElementTree as ET
11 | import copy
12 |
13 | ################################################
14 | # Add lib modules to python path
15 | ################################################
16 |
17 | _CURR_DIR = os.path.dirname(os.path.abspath(__file__))
18 | sys.path.append(os.path.join(_CURR_DIR, "lib"))
19 |
20 | #######################################
21 | #Import needed framework python modules
22 | #######################################
23 |
24 | from xml_tools import find_schema_file, find_schema_version, validate_xml_file, read_xml_file
25 |
26 | ###############################################################################
27 | def parse_command_line(args, description):
28 | ###############################################################################
29 | parser = argparse.ArgumentParser(description=description,
30 | formatter_class=argparse.RawTextHelpFormatter)
31 |
32 | parser.add_argument("standard_name_file",
33 | metavar='',
34 | type=str, help="XML file with standard name library")
35 | parser.add_argument("--overwrite", action='store_true',
36 | help="flag to remove duplicates and overwrite the file")
37 | parser.add_argument("--field", type=str, default="name",
38 | help="Field to check for uniqueness; default is 'name'")
39 | parser.add_argument("--debug", action='store_true',
40 | help="flag for additional debug print statements")
41 |
42 | pargs = parser.parse_args(args)
43 | return pargs
44 |
45 | ###############################################################################
46 | def main_func():
47 | ###############################################################################
48 | """Parse the standard names database file and notify of duplicates.
49 | """
50 | # Parse command line arguments
51 | args = parse_command_line(sys.argv[1:], __doc__)
52 | stdname_file = os.path.abspath(args.standard_name_file)
53 | tree, root = read_xml_file(stdname_file)
54 |
55 | # Validate the XML file
56 | version = find_schema_version(root)
57 | schema_name = os.path.basename(stdname_file)[0:-4]
58 | schema_root = os.path.dirname(stdname_file)
59 | schema_path = os.path.join(schema_root,schema_name)
60 | schema_file = find_schema_file(schema_path, version)
61 | if schema_file:
62 | try:
63 | validate_xml_file(stdname_file, schema_name, version, None,
64 | schema_path=schema_root, error_on_noxmllint=True)
65 | except ValueError:
66 | raise ValueError(f"Invalid standard names file, {stdname_file}")
67 | else:
68 | raise ValueError(f'Cannot find schema file, {schema_name}, for {version=}')
69 |
70 | #get list of all standard names
71 | all_std_names = []
72 | for name in root.findall('./section/standard_name'):
73 | try:
74 | all_std_names.append(name.attrib[args.field])
75 | except KeyError:
76 | if (args.debug):
77 | print(f"WARNING: no field '{args.field}' for standard name '{name.attrib['name']}' ")
78 | #get list of all unique and duplicate standard names, in source order
79 | seen = set()
80 | uniq_std_names = []
81 | dup_std_names = []
82 | for x in all_std_names:
83 | if x not in seen:
84 | uniq_std_names.append(x)
85 | seen.add(x)
86 | else:
87 | dup_std_names.append(x)
88 |
89 | if len(dup_std_names)>0:
90 | print(f'The following duplicate {args.field} entries were found:')
91 | for dup in dup_std_names:
92 | rm_elements = root.findall(f'./section/standard_name[@{args.field}="{dup}"]')[1:]
93 | print(f"{dup}, ({len(rm_elements)} duplicate(s))")
94 | if args.overwrite:
95 | print(f'Removing duplicates and overwriting {stdname_file}')
96 | for dup in dup_std_names:
97 | first_use = True #Logical that indicates the first use of the duplicated name
98 | rm_parents = root.findall('./section/standard_name[@name="%s"]..'%dup)
99 | for par in rm_parents:
100 | rm_ele = par.findall('./standard_name[@name="%s"]'%dup)
101 | for ele in rm_ele:
102 | if first_use:
103 | #Now all future uses of the name will be removed:
104 | first_use = False
105 | else:
106 | par.remove(ele)
107 | #Overwrite the xml file with the new, duplicate-free element tree:
108 | tree.write(stdname_file, "utf-8")
109 | else:
110 | # If not overwriting, exit with status 1 to indicate failure
111 | sys.exit(1)
112 | else:
113 | print(f'No duplicate {args.field}s were found.')
114 |
115 |
116 | ###############################################################################
117 | if __name__ == "__main__":
118 | main_func()
119 |
--------------------------------------------------------------------------------
/tools/lib/xml_tools.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | Parse and / or validate an XML file and return the captured variables.
5 | """
6 |
7 | # Python library imports
8 | from __future__ import print_function
9 | import os
10 | import os.path
11 | import subprocess
12 | import sys
13 | import logging
14 | from shutil import which
15 | import xml.etree.ElementTree as ET
16 | try:
17 | _XMLLINT = which('xmllint')
18 | except ImportError:
19 | _XMLLINT = None
20 | # end try
21 |
22 | # Find python version
23 | PY3 = sys.version_info[0] > 2
24 | PYSUBVER = sys.version_info[1]
25 | _LOGGER = None
26 |
27 | ###############################################################################
28 | def call_command(commands, logger, silent=False):
29 | ###############################################################################
30 | """
31 | Try a command line and return the output on success (None on failure)
32 | >>> call_command(['ls', 'really__improbable_fffilename.foo'], _LOGGER) #doctest: +IGNORE_EXCEPTION_DETAIL
33 | Traceback (most recent call last):
34 | RuntimeError: Execution of 'ls really__improbable_fffilename.foo' failed:
35 | [Errno 2] No such file or directory
36 | >>> call_command(['ls', 'really__improbable_fffilename.foo'], _LOGGER, silent=True)
37 | False
38 | >>> call_command(['ls'], _LOGGER)
39 | True
40 | """
41 | result = False
42 | outstr = ''
43 | if logger is None:
44 | silent = True
45 | # end if
46 | try:
47 | if PY3:
48 | if PYSUBVER > 6:
49 | cproc = subprocess.run(commands, check=True,
50 | capture_output=True)
51 | if not silent:
52 | logger.debug(cproc.stdout)
53 | # end if
54 | result = cproc.returncode == 0
55 | elif PYSUBVER >= 5:
56 | cproc = subprocess.run(commands, check=True,
57 | stdout=subprocess.PIPE,
58 | stderr=subprocess.PIPE)
59 | if not silent:
60 | logger.debug(cproc.stdout)
61 | # end if
62 | result = cproc.returncode == 0
63 | else:
64 | raise ValueError("Python 3 must be at least version 3.5")
65 | # end if
66 | else:
67 | pproc = subprocess.Popen(commands, stdin=None,
68 | stdout=subprocess.PIPE,
69 | stderr=subprocess.PIPE)
70 | output, _ = pproc.communicate()
71 | if not silent:
72 | logger.debug(output)
73 | # end if
74 | result = pproc.returncode == 0
75 | # end if
76 | except (OSError, RuntimeError, subprocess.CalledProcessError) as err:
77 | if silent:
78 | result = False
79 | else:
80 | cmd = ' '.join(commands)
81 | emsg = "Execution of '{}' failed with code:\n"
82 | outstr = emsg.format(cmd, err.returncode)
83 | outstr += "{}".format(err.output)
84 | raise RuntimeError(outstr)
85 | # end if
86 | # end of try
87 | return result
88 |
89 | ###############################################################################
90 | def find_schema_version(root):
91 | ###############################################################################
92 | """
93 | Find the version of the host registry file represented by root
94 | >>> find_schema_version(ET.fromstring(''))
95 | [1, 0]
96 | >>> find_schema_version(ET.fromstring('')) #doctest: +IGNORE_EXCEPTION_DETAIL
97 | Traceback (most recent call last):
98 | ValueError: Illegal version string, '1.a'
99 | Format must be .
100 | >>> find_schema_version(ET.fromstring('')) #doctest: +IGNORE_EXCEPTION_DETAIL
101 | Traceback (most recent call last):
102 | ValueError: Illegal version string, '0.0'
103 | Major version must be at least 1
104 | >>> find_schema_version(ET.fromstring('')) #doctest: +IGNORE_EXCEPTION_DETAIL
105 | Traceback (most recent call last):
106 | ValueError: Illegal version string, '0.0'
107 | Minor version must be at least 0
108 | """
109 | verbits = None
110 | if 'version' not in root.attrib:
111 | raise ValueError("version attribute required")
112 | # end if
113 | version = root.attrib['version']
114 | versplit = version.split('.')
115 | try:
116 | if len(versplit) != 2:
117 | raise ValueError('oops')
118 | # end if (no else needed)
119 | try:
120 | verbits = [int(x) for x in versplit]
121 | except ValueError as verr:
122 | raise ValueError(verr)
123 | # end try
124 | if verbits[0] < 1:
125 | raise ValueError('Major version must be at least 1')
126 | # end if
127 | if verbits[1] < 0:
128 | raise ValueError('Minor version must be non-negative')
129 | # end if
130 | except ValueError as verr:
131 | errstr = """Illegal version string, '{}'
132 | Format must be ."""
133 | ve_str = str(verr)
134 | if ve_str:
135 | errstr = ve_str + '\n' + errstr
136 | # end if
137 | raise ValueError(errstr.format(version))
138 | # end try
139 | return verbits
140 |
141 | ###############################################################################
142 | def find_schema_file(schema_root, version, schema_path=None):
143 | ###############################################################################
144 | """Find and return the schema file based on and
145 | or return None.
146 | If is present, use that as the directory to find the
147 | appropriate schema file. Otherwise, just look in the current directory."""
148 |
149 | verstring = '_'.join([str(x) for x in version])
150 | schema_filename = "{}_v{}.xsd".format(schema_root, verstring)
151 | if schema_path:
152 | schema_file = os.path.join(schema_path, schema_filename)
153 | else:
154 | schema_file = schema_filename
155 | # end if
156 | if os.path.exists(schema_file):
157 | return schema_file
158 | # end if
159 | return None
160 |
161 | ###############################################################################
162 | def validate_xml_file(filename, schema_root, version, logger,
163 | schema_path=None, error_on_noxmllint=False):
164 | ###############################################################################
165 | """
166 | Find the appropriate schema and validate the XML file, ,
167 | against it using xmllint
168 | """
169 | # Check the filename
170 | if not os.path.isfile(filename):
171 | raise ValueError("validate_xml_file: Filename, '{}', does not exist".format(filename))
172 | # end if
173 | if not os.access(filename, os.R_OK):
174 | raise ValueError("validate_xml_file: Cannot open '{}'".format(filename))
175 | # end if
176 | if not schema_path:
177 | # Find the schema, based on the model version
178 | thispath = os.path.abspath(__file__)
179 | pdir = os.path.dirname(os.path.dirname(os.path.dirname(thispath)))
180 | schema_path = os.path.join(pdir, 'schema')
181 | # end if
182 | schema_file = find_schema_file(schema_root, version, schema_path)
183 | if not (schema_file and os.path.isfile(schema_file)):
184 | verstring = '.'.join([str(x) for x in version])
185 | emsg = """validate_xml_file: Cannot find schema for version {},
186 | {} does not exist"""
187 | raise ValueError(emsg.format(verstring, schema_file))
188 | # end if
189 | if not os.access(schema_file, os.R_OK):
190 | emsg = "validate_xml_file: Cannot open schema, '{}'"
191 | raise ValueError(emsg.format(schema_file))
192 | # end if
193 | if _XMLLINT is not None:
194 | if logger is not None:
195 | lmsg = "Checking file {} against schema {}"
196 | logger.debug(lmsg.format(filename, schema_file))
197 | # end if
198 | cmd = [_XMLLINT, '--noout', '--schema', schema_file, filename]
199 | result = call_command(cmd, logger)
200 | return result
201 | # end if
202 | lmsg = "xmllint not found, could not validate file {}"
203 | if error_on_noxmllint:
204 | raise ValueError("validate_xml_file: " + lmsg.format(filename))
205 | # end if
206 | if logger is not None:
207 | logger.warning(lmsg.format(filename))
208 | # end if
209 | return True # We could not check but still need to proceed
210 |
211 | ###############################################################################
212 | def read_xml_file(filename, logger=None):
213 | ###############################################################################
214 | """Read the XML file, , and return its tree and root"""
215 | if os.path.isfile(filename) and os.access(filename, os.R_OK):
216 | if PY3:
217 | file_open = (lambda x: open(x, 'r', encoding='utf-8'))
218 | else:
219 | file_open = (lambda x: open(x, 'r'))
220 | # end if
221 | with file_open(filename) as file_:
222 | try:
223 | tree = ET.parse(file_)
224 | root = tree.getroot()
225 | except ET.ParseError as perr:
226 | emsg = "read_xml_file: Cannot read {}, {}"
227 | raise ValueError(emsg.format(filename, perr))
228 | elif not os.access(filename, os.R_OK):
229 | raise ValueError("read_xml_file: Cannot open '{}'".format(filename))
230 | else:
231 | emsg = "read_xml_file: Filename, '{}', does not exist"
232 | raise ValueError(emsg.format(filename))
233 | # end if
234 | if logger:
235 | logger.debug("Read XML file, '{}'".format(filename))
236 | # end if
237 | return tree, root
238 |
239 | ###############################################################################
240 |
241 | if __name__ == "__main__":
242 | _LOGGER = logging.getLogger('xml_tools')
243 | for handler in list(_LOGGER.handlers):
244 | _LOGGER.removeHandler(handler)
245 | # end for
246 | _LOGGER.addHandler(logging.NullHandler())
247 | try:
248 | # First, run doctest
249 | import doctest
250 | doctest.testmod()
251 | except ValueError as cerr:
252 | print("{}".format(cerr))
253 | # no else:
254 |
--------------------------------------------------------------------------------
/tools/ccpp_meta_stdname_check.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | """
4 |
5 | This tool checks if all of the
6 | standard names present in a
7 | CCPP metadata file also exist
8 | in the standard names dictionary.
9 |
10 | The tool currently has two options:
11 |
12 | 1. A path to a single metadata file
13 | is passed, in which case only that
14 | file's standard names are checked, e.g.:
15 |
16 | ./meta_stdname_check --metafile-loc /path/to/file.meta --stdname-dict /path/to/dict.xml
17 |
18 | 2. A path to a directory is passed, in
19 | which case the directory is searched,
20 | along with any subdirectories, for
21 | metadata files, and all found files'
22 | standard names are checked, e.g.:
23 |
24 | ./meta_stdname_check --metafile-loc /meta/path/ --stdname-dict /path/to/dict.xml
25 |
26 | """
27 |
28 | ######################################
29 | #Import needed standard python modules
30 | ######################################
31 |
32 | import argparse
33 | import sys
34 | import os
35 | import os.path
36 | import datetime
37 | from collections import OrderedDict
38 |
39 | ################################################
40 | # Add lib modules to python path
41 | ################################################
42 |
43 | _CURR_DIR = os.path.dirname(os.path.abspath(__file__))
44 | sys.path.append(os.path.join(_CURR_DIR, "lib"))
45 |
46 | #######################################
47 | #Import needed framework python modules
48 | #######################################
49 |
50 | from xml_tools import read_xml_file
51 |
52 | #################
53 | #Helper functions
54 | #################
55 |
56 | #++++++++++++++++++++++++++++++
57 | #Input Argument parser function
58 | #++++++++++++++++++++++++++++++
59 |
60 | def parse_arguments():
61 |
62 | """
63 | Parses command-line input arguments
64 | using the argparse python module and
65 | outputs the final argument object.
66 | """
67 |
68 | #Create description:
69 | desc = "Check if the metafile contains variable standard names\n"
70 | desc += "that are not in the provided standard names dictionary."
71 |
72 | #Create parser object:
73 | parser = argparse.ArgumentParser(description=desc)
74 |
75 | #Add input arguments to be parsed:
76 | parser.add_argument('-m', '--metafile-loc',
77 | metavar='',
78 | action='store', type=str,
79 | help="Location of metadata file(s)")
80 |
81 | parser.add_argument('-s', '--stdname-dict',
82 | metavar='',
83 | action='store', type=str,
84 | help="Location of standard name dictionary (XML file)")
85 |
86 | #Parse Argument inputs
87 | args = parser.parse_args()
88 |
89 | return args.metafile_loc, args.stdname_dict
90 |
91 | #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
92 | #Function to extract standard names from element tree root
93 | #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
94 |
95 | def get_dict_stdnames(xml_tree_root):
96 |
97 | """
98 | Extract all elements with the "standard_name" tag,
99 | find the "name" attribute for that tag, and collect
100 | all of those "names" in a set.
101 | """
102 |
103 | #Create empty set to store standard name names:
104 | std_dict_names = set()
105 |
106 | #Loop over all standard_name tags"
107 | for stdname in xml_tree_root.findall('./section/standard_name'):
108 | #Add the "name" attribute to the set:
109 | std_dict_names.add(stdname.attrib['name'])
110 | #End for
111 |
112 | return std_dict_names
113 |
114 | #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
115 | #Function to parse a list of strings from a metadata file
116 | #in order to find all standard names
117 | #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
118 |
119 | def find_metafile_stdnames(metafile_obj):
120 |
121 | """
122 | Find all lines that start with "standard_name",
123 | and then assume that all characters after an "="
124 | are part of the standard name, excluding those
125 | that are behind a comment delimiter (#).
126 |
127 | NOTE:
128 |
129 | The CCPP-framework has much more advanced parsers
130 | that can extract this same info, but bringing them
131 | into this repo would require many additional
132 | supporting source files to be brought in as well.
133 |
134 | However, if it is found that this simplified parser
135 | is hitting too many edge cases then it might be wise
136 | to use the actual CCPP-framework parser instead of
137 | expanding on this function or script.
138 | """
139 |
140 | #Create empty set to store found standard names:
141 | meta_stdname_set = set()
142 |
143 | #Loop over lines in metadata file object:
144 | for line in metafile_obj:
145 |
146 | #Check if line starts with "standard_name":
147 | if line.lstrip().startswith("standard_name"):
148 |
149 | #Attempt to find string index for "equals" sign:
150 | equals_index = line.find("=")
151 |
152 | #Check if an equals sign actually
153 | #exists:
154 | if equals_index != -1:
155 |
156 | #If so, then extract all text to the right
157 | #of the equals sign:
158 | stdname_text = line[equals_index+1:]
159 |
160 | #Attempt to find the index for a comment delimiter:
161 | comment_index = stdname_text.find("#")
162 |
163 | #If comment exists, then remove
164 | #it from the standard name text:
165 | if comment_index != -1:
166 | stdname_text = stdname_text[:comment_index]
167 | #End if
168 | #End if
169 |
170 | #Add stripped/trimmed text to the standardname set:
171 | meta_stdname_set.add(stdname_text.strip())
172 |
173 | #End if
174 | #End for
175 |
176 | return meta_stdname_set
177 |
178 | #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
179 | #Function to extract standard names in CCPP metadata file
180 | #that are not in a provided set of accepted standard names
181 | #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
182 |
183 | def missing_metafile_names(metafile, stdname_set):
184 |
185 | """
186 | Extract all standard names listed in CCPP
187 | metadata file, and provide a list of all
188 | names that are not in the provide standard
189 | name set.
190 | """
191 |
192 | #Open metadata file:
193 | with open(metafile,'r', encoding='utf-8') as mfile:
194 |
195 | #Find all standard names in metadata file
196 | meta_stdname_set = find_metafile_stdnames(mfile)
197 | #End with
198 |
199 | #Create set of all standard names not in dictionary set:
200 | missing_stdname_set = meta_stdname_set.difference(stdname_set)
201 |
202 | #Return sorted list of missing standard names:
203 | return sorted(missing_stdname_set)
204 |
205 | #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
206 | #Function to find the paths to all metadata files within
207 | #a given directory path
208 | #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
209 |
210 | def find_metadata_files(dir_path):
211 |
212 | """
213 | Walk through the provided directory
214 | and create a list of all found CCPP
215 | metadata files.
216 | """
217 |
218 | #Create new, empy list to store metadata file paths:
219 | metadata_files = []
220 |
221 | #Walk through provided directory:
222 | for root, _, files in os.walk(dir_path):
223 | #Ignore git directories:
224 | if '.git' not in root:
225 |
226 | #Find all metadata files in current root location:
227 | local_meta_files = [mfil for mfil in files if mfil[-5:] == '.meta']
228 |
229 |
230 | #Add all found metadata files to metadata list,
231 | #including their full path:
232 | for local_file in local_meta_files:
233 | metadata_files.append(os.path.join(root, local_file))
234 | #End for
235 | #End if
236 | #End for
237 |
238 | #Return list of metadata files:
239 | return metadata_files
240 |
241 | #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
242 | #Function to print a "human-readable" list of all of the
243 | #standard names in the provided CCPP metadata files that
244 | #were not found in the provided standard name dictionary
245 | #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
246 |
247 | def print_missing_names(missing_names_dict):
248 |
249 | """
250 | Prints a list of the metadata files that
251 | contain standard names not found in the
252 | dictionary, and underneath each metadata
253 | file a list of each "missing" standard name.
254 | """
255 |
256 | #Get current date/time:
257 | curr_time = datetime.datetime.now()
258 |
259 | print("\n#######################")
260 | print("Date/time of when script was run:")
261 | print(curr_time)
262 | print("#######################")
263 | msg = "\nNon-dictionary standard names found in the following"
264 | msg += " metadata files:"
265 | print(msg)
266 |
267 | #Loop over dictionary keys, which should be
268 | #paths to metadata files:
269 | for metafile in missing_names_dict:
270 |
271 | print("\n--------------------------\n")
272 | print(f"{metafile}\n")
273 |
274 | #Extract standard names for file:
275 | missing_names_list = missing_names_dict[metafile]
276 |
277 | for stdname in missing_names_list:
278 | print(f" - {stdname}")
279 | #End for
280 |
281 | #End for
282 |
283 | print("\n#######################")
284 |
285 | #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
286 |
287 | ############
288 | #Main script
289 | ############
290 |
291 | #Parse command-line arguments:
292 | metafile_loc, stdname_xml = parse_arguments()
293 |
294 | #Open standard name dictionary:
295 | _, stdname_dict_root = read_xml_file(stdname_xml)
296 |
297 | #Extract all standard names from dictionary:
298 | std_names = get_dict_stdnames(stdname_dict_root)
299 |
300 | #Create new meta file/missing names dictionary:
301 | meta_miss_names_dict = OrderedDict()
302 |
303 | #Check if user passed in single metadata file:
304 | if os.path.isfile(metafile_loc):
305 |
306 | #Find all metadata standard names
307 | #that are not in the dictionary:
308 | missing_stdnames = missing_metafile_names(metafile_loc,
309 | std_names)
310 |
311 | #If missing stdnames exist, then add the
312 | #file and missing names to dictionary:
313 | if missing_stdnames:
314 | meta_miss_names_dict[metafile_loc] = missing_stdnames
315 | #End if
316 |
317 | #If not a file, then check if a directory:
318 | elif os.path.isdir(metafile_loc):
319 |
320 | #Find all CCPP metadata files that are
321 | #located in or under this directory:
322 | meta_files = find_metadata_files(metafile_loc)
323 |
324 | #Loop through all metadata files:
325 | for meta_file in meta_files:
326 |
327 | #Find all metadata standard names
328 | #that are not in the dictionary
329 | missing_stdnames = missing_metafile_names(meta_file,
330 | std_names)
331 |
332 | #If missing stdnames exist, then add the
333 | #file and missing names to dictionary:
334 | if missing_stdnames:
335 | meta_miss_names_dict[meta_file] = missing_stdnames
336 | #End if
337 | #End for
338 |
339 | else:
340 | #This is a non-supported input, so raise
341 | #an error:
342 | emsg = f"The metafile-loc arg input, '{metafile_loc}'\n"
343 | emsg += "is neither a file nor a directory,"
344 | emsg += " so script will end here."
345 | raise FileNotFoundError(emsg)
346 | #End if
347 |
348 | #Print list of metadata file standard
349 | #names that are not in the dictionary:
350 | if meta_miss_names_dict:
351 | #Print organized, human-readable
352 | #list of "missing" standard names
353 | #to the screen, along with the
354 | #metadata file they are associated
355 | #with
356 | print_missing_names(meta_miss_names_dict)
357 | else:
358 | #Notify user that all standard names
359 | #exist in the dictionary:
360 | print("All standard names are in the dictionary!")
361 | #End if
362 |
363 |
364 | ##############
365 | #End of script
366 |
--------------------------------------------------------------------------------
/tools/write_standard_name_table.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | """
4 | Convert a metadata standard-name XML library file to another format.
5 | """
6 |
7 | # Python library imports
8 | from collections import OrderedDict
9 | import xml.etree.ElementTree as ET
10 | import os.path
11 | import argparse
12 | import sys
13 | import re
14 | import yaml
15 |
16 | ################################################
17 | # Add lib modules to python path
18 | ################################################
19 |
20 | _CURR_DIR = os.path.dirname(os.path.abspath(__file__))
21 | sys.path.append(os.path.join(_CURR_DIR, "lib"))
22 |
23 | #######################################
24 | # Import needed framework python modules
25 | #######################################
26 |
27 | from xml_tools import validate_xml_file, read_xml_file
28 | from xml_tools import find_schema_file, find_schema_version
29 |
30 | #######################################
31 | # Regular expressions
32 | #######################################
33 |
34 | _REAL_SUBST_RE = re.compile(r"(.*\d)p(\d.*)")
35 |
36 | _DROPPED_LINK_CHARS_RE = re.compile(r"[^a-z_-]")
37 |
38 | #######################################
39 | # Custom representer for OrderedDict
40 | #######################################
41 |
42 | def ordered_dict_representer(dumper, data):
43 | return dumper.represent_mapping(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, data.items())
44 | yaml.add_representer(OrderedDict, ordered_dict_representer)
45 |
46 | ########################################################################
47 | def convert_text_to_link(text_str):
48 | ########################################################################
49 | """
50 | When Markdown converts a header string into
51 | an internal document link it applies certain
52 | text conversion rules. This function thus
53 | applies those same rules to a given string
54 | in order to produce the correct link.
55 | """
56 |
57 | # First trim the string to remove leading/trailing white space:
58 | link_str = text_str.strip()
59 |
60 | # Next, make sure all text is lowercase:
61 | link_str = link_str.lower()
62 |
63 | # Then, replace all spaces with dashes:
64 | link_str = link_str.replace(" ", "-")
65 |
66 | # Finally, remove all characters that aren't
67 | # letters, underscores, or dashes:
68 | link_str = _DROPPED_LINK_CHARS_RE.sub("", link_str)
69 |
70 | return link_str
71 |
72 | ########################################################################
73 | def standard_name_to_description(prop_dict, context=None):
74 | ########################################################################
75 | """Translate a standard_name to its default description
76 | Note: This code is copied from the CCPP Framework.
77 | >>> standard_name_to_description({'standard_name':'cloud_optical_depth_layers_from_0p55mu_to_0p99mu'})
78 | 'Cloud optical depth layers from 0.55mu to 0.99mu'
79 | >>> standard_name_to_description({'local_name':'foo'}) #doctest: +IGNORE_EXCEPTION_DETAIL
80 | Traceback (most recent call last):
81 | CCPPError: No standard name to convert foo to description
82 | >>> standard_name_to_description({}) #doctest: +IGNORE_EXCEPTION_DETAIL
83 | Traceback (most recent call last):
84 | CCPPError: No standard name to convert to description
85 | >>> standard_name_to_description({'local_name':'foo'}, context=ParseContext(linenum=3, filename='foo.F90')) #doctest: +IGNORE_EXCEPTION_DETAIL
86 | Traceback (most recent call last):
87 | CCPPError: No standard name to convert foo to description at foo.F90:3
88 | >>> standard_name_to_description({}, context=ParseContext(linenum=3, filename='foo.F90')) #doctest: +IGNORE_EXCEPTION_DETAIL
89 | Traceback (most recent call last):
90 | CCPPError: No standard name to convert to description at foo.F90:3
91 | """
92 | # We assume that standard_name has been checked for validity
93 | # Make the first char uppercase and replace each underscore with a space
94 | if 'standard_name' in prop_dict:
95 | standard_name = prop_dict['standard_name']
96 | if standard_name:
97 | description = standard_name[0].upper() + re.sub("_", " ",
98 | standard_name[1:])
99 | else:
100 | description = ''
101 | # end if
102 | # Next, substitute a decimal point for the p in [:digit]p[:digit]
103 | match = _REAL_SUBST_RE.match(description)
104 | while match is not None:
105 | description = match.group(1) + '.' + match.group(2)
106 | match = _REAL_SUBST_RE.match(description)
107 | # end while
108 | else:
109 | description = ''
110 | if 'local_name' in prop_dict:
111 | lname = ' {}'.format(prop_dict['local_name'])
112 | else:
113 | lname = ''
114 | # end if
115 | ctxt = context_string(context)
116 | emsg = 'No standard name to convert{} to description{}'
117 | raise CCPPError(emsg.format(lname, ctxt))
118 | # end if
119 | return description
120 |
121 | ###############################################################################
122 | def parse_command_line(args, program_description):
123 | ###############################################################################
124 | parser = argparse.ArgumentParser(description=program_description,
125 | formatter_class=argparse.RawTextHelpFormatter)
126 |
127 | parser.add_argument("standard_name_file",
128 | metavar='',
129 | type=str, help="XML file with standard name library")
130 | parser.add_argument("--output-filename", metavar='