├── .github └── dependabot.yml ├── .gitignore ├── .travis.yml ├── CHANGELOG ├── LICENSE.md ├── MANIFEST.in ├── Makefile ├── README.md ├── docs ├── converters.html ├── datatypes.html ├── filters.html ├── formulas │ ├── eval.html │ ├── functions.html │ ├── index.html │ ├── lexer.html │ ├── parser.html │ └── parsetab.html ├── geo.html ├── index.html ├── input.html ├── model.html ├── scripts.html ├── util.html └── validation.html ├── hxl ├── __init__.py ├── converters.py ├── datatypes.py ├── filters.py ├── formulas │ ├── __init__.py │ ├── eval.py │ ├── functions.py │ ├── lexer.py │ └── parser.py ├── geo.py ├── hxl-default-schema.json ├── input.py ├── model.py ├── scripts.py ├── util.py └── validation.py ├── profile ├── data │ └── unhcr_popstats_export_persons_of_concern_all_data.hxl └── validation-profile.py ├── requirements.txt ├── setup.py └── tests ├── __init__.py ├── files ├── mock │ ├── append-source-1.csv │ ├── append-source-2.csv │ ├── append-source-list.csv │ └── taxonomy.csv ├── test_converters │ └── wide-tagging-test.csv ├── test_io │ ├── input-broken.xlsx │ ├── input-fuzzy.csv │ ├── input-invalid.csv │ ├── input-invalid.png │ ├── input-merged.xlsx │ ├── input-multiline.csv │ ├── input-multiple-headers.csv │ ├── input-notag1.html │ ├── input-notag2.html │ ├── input-quality.xls │ ├── input-quality.xlsx │ ├── input-untagged-csv.zip │ ├── input-untagged-objects.json │ ├── input-untagged.csv │ ├── input-untagged.json │ ├── input-valid-csv.zip │ ├── input-valid-json-selector.json │ ├── input-valid-json.txt │ ├── input-valid-latin1.csv │ ├── input-valid-nested.json │ ├── input-valid-objects.json │ ├── input-valid-xlsx.NOEXT │ ├── input-valid.csv │ ├── input-valid.hxl │ ├── input-valid.json │ ├── input-valid.ssv │ ├── input-valid.tsv │ ├── input-valid.xls │ ├── input-valid.xlsx │ ├── input-zip-invalid.zip │ ├── output-valid-objects.json │ ├── output-valid.csv │ └── output-valid.json ├── test_scripts │ ├── add-output-before.csv │ ├── add-output-default.csv │ ├── add-output-headers.csv │ ├── append-dataset.csv │ ├── clean-output-headers.csv │ ├── clean-output-lower.csv │ ├── clean-output-noheaders.csv │ ├── clean-output-upper.csv │ ├── clean-output-whitespace-all.csv │ ├── clean-output-whitespace-tags.csv │ ├── count-output-aggregated.csv │ ├── count-output-colspec.csv │ ├── count-output-simple.csv │ ├── cut-output-excludes.csv │ ├── cut-output-includes.csv │ ├── input-date.csv │ ├── input-merge.csv │ ├── input-simple.csv │ ├── input-status.csv │ ├── input-untagged.csv │ ├── input-whitespace.csv │ ├── merge-output-basic.csv │ ├── merge-output-overwrite.csv │ ├── merge-output-replace.csv │ ├── rename-output-header.csv │ ├── rename-output-multiple.csv │ ├── rename-output-single.csv │ ├── select-output-eq.csv │ ├── select-output-ge.csv │ ├── select-output-gt.csv │ ├── select-output-le.csv │ ├── select-output-lt.csv │ ├── select-output-multiple.csv │ ├── select-output-ne.csv │ ├── select-output-nre.csv │ ├── select-output-re.csv │ ├── select-output-reverse.csv │ ├── sort-output-date.csv │ ├── sort-output-default.csv │ ├── sort-output-numeric.csv │ ├── sort-output-reverse.csv │ ├── sort-output-tags.csv │ ├── tag-output-ambiguous.csv │ ├── tag-output-default.csv │ ├── tag-output-full.csv │ ├── tag-output-notsubstrings.csv │ ├── tag-output-partial.csv │ ├── validation-schema-invalid.csv │ └── validation-schema-valid.csv └── test_validation │ └── truthy-schema.json ├── test_converters.py ├── test_datatypes.py ├── test_filters.py ├── test_formulas.py ├── test_geo.py ├── test_input.py ├── test_model.py ├── test_scripts.py └── test_validation.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "pip" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | open-pull-requests-limit: 10 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg 2 | *.pyc 3 | /.eggs 4 | /.pypirc 5 | /build 6 | /dist 7 | /hxl/formulas/parser.out 8 | /hxl/formulas/parsetab.py 9 | /libhxl.egg-info 10 | /profile 11 | /venv 12 | /venv-test 13 | TAGS 14 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - '2.7' 5 | - '3.3' 6 | - '2.6' 7 | 8 | script: 9 | - python setup.py test 10 | 11 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include hxl *.json -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ######################################################################## 2 | # Makefile to automate common tasks 3 | # 4 | # build-venv - (re)build the Python virtual environment if needed 5 | # test - run all unit tests 6 | # test-failed - rerun only failed unit tests 7 | # test-install - test a fresh installation in a temporary venv 8 | # publish-pypi - publish a new release to PyPi 9 | # etags - build an Emacs TAGS file 10 | # api-docs - generate HTML documentation from inline comments 11 | # browse-docs - open API docs in the browser 12 | ######################################################################## 13 | 14 | 15 | # activation script for the Python virtual environment 16 | VENV=venv/bin/activate 17 | 18 | # run unit tests 19 | test: $(VENV) 20 | . $(VENV) && pytest 21 | 22 | # run unit tests 23 | test-failed: $(VENV) 24 | . $(VENV) && pytest --lf 25 | 26 | # alias to (re)build the Python virtual environment 27 | build-venv: $(VENV) 28 | 29 | # (re)build the virtual environment if it's missing, or whenever setup.py changes 30 | $(VENV): setup.py requirements.txt 31 | rm -rf venv 32 | python3 -m venv venv 33 | . $(VENV) \ 34 | && pip3 install -r requirements.txt \ 35 | && python setup.py develop \ 36 | && pip install pdoc3 37 | 38 | # do a cold install in a temporary virtual environment and run unit tests 39 | test-install: 40 | rm -rf venv-test 41 | python3 -m venv venv-test 42 | . venv-test/bin/activate \ 43 | && python setup.py install \ 44 | && pytest 45 | rm -rf venv-test # make sure we clean up 46 | 47 | # publish a new release on PyPi 48 | publish-pypi: $(VENV) 49 | rm -rf dist/* 50 | git checkout upstream/prod 51 | git pull upstream prod 52 | . $(VENV) \ 53 | && pip install twine \ 54 | && python setup.py sdist && twine upload dist/* 55 | 56 | # generate API documentation 57 | api-docs: $(VENV) 58 | rm -rf docs/* 59 | . $(VENV) \ 60 | && pdoc3 -o docs/ --html hxl && mv docs/hxl/* docs/ 61 | rmdir docs/hxl/ 62 | 63 | # browse the API docs 64 | browse-docs: 65 | firefox docs/index.html 66 | 67 | # (re)generate emacs TAGS file 68 | etags: 69 | find hxl tests -name '*.py' -o -name '*.csv' \ 70 | | xargs etags 71 | 72 | # end 73 | 74 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | libhxl-python 2 | ============= 3 | 4 | Python support library for the Humanitarian Exchange Language (HXL) 5 | data standard. The library requires Python 3 (versions prior to 4.6 6 | also supported Python 2.7). 7 | 8 | **API docs:** https://hxlstandard.github.io/libhxl-python/ (and in the ``docs/`` folder) 9 | 10 | **HXL standard:** http://hxlstandard.org 11 | 12 | ## Quick start 13 | 14 | From the command line (or inside a Python3 virtual environment): 15 | 16 | ``` 17 | $ pip3 install libhxl 18 | ``` 19 | 20 | In your code: 21 | 22 | ``` 23 | import hxl 24 | 25 | url = "https://github.com/HXLStandard/libhxl-python/blob/main/tests/files/test_io/input-valid.csv" 26 | 27 | data = hxl.data(url).with_rows("#sector=WASH").sort("#country") 28 | 29 | for line in data.gen_csv(): 30 | print(line) 31 | ``` 32 | 33 | ## Usage 34 | 35 | ### Reading from a data source 36 | 37 | The _hxl.data()_ function reads HXL from a file object, filename, URL, 38 | or list of arrays and makes it available for processing, much like 39 | ``$()`` in JQuery. The following will read HXLated data from standard input: 40 | 41 | ``` 42 | import sys 43 | import hxl 44 | 45 | dataset = hxl.data(sys.stdin) 46 | ``` 47 | 48 | Most commonly, you will open a dataset via a URL: 49 | 50 | ``` 51 | dataset = hxl.data("https://example.org/dataset.url" 52 | ``` 53 | 54 | To open a local file rather than a URL, use the _allow\_local_ property 55 | of the 56 | [InputOptions](https://hxlstandard.github.io/libhxl-python/input.html#hxl.input.InputOptions) 57 | class: 58 | 59 | ``` 60 | dataset = hxl.data("dataset.xlsx", hxl.InputOptions(allow_local=True)) 61 | ``` 62 | 63 | #### Input caching 64 | 65 | libhxl uses the Python 66 | [requests](http://docs.python-requests.org/en/master/) library for 67 | opening URLs. If you want to enable caching (for example, to avoid 68 | beating up on your source with repeated requests), your code can use 69 | the [requests_cache](https://pypi.python.org/pypi/requests-cache) 70 | plugin, like this: 71 | 72 | import requests_cache 73 | requests_cache.install_cache('demo_cache', expire_after=3600) 74 | 75 | The default caching backend is a sqlite database at the location specied. 76 | 77 | 78 | ### Filter chains 79 | 80 | You can filters to transform the output, and chain them as 81 | needed. Transformation is lazy, and uses the minimum memory 82 | possible. For example, this command selects only data rows where the 83 | country is "Somalia", sorted by the organisation: 84 | 85 | ``` 86 | transformed = hxl.data(url).with_rows("#country=Somalia").sort("#org") 87 | ``` 88 | 89 | For more on filters see the API documentation for the 90 | [hxl.model.Dataset](https://hxlstandard.github.io/libhxl-python/model.html#hxl.model.Dataset) 91 | class and the 92 | [hxl.filters](https://hxlstandard.github.io/libhxl-python/filters.html) 93 | module. 94 | 95 | 96 | ### Generators 97 | 98 | Generators allow the re-serialising of HXL data, returning something that works like an iterator. Example: 99 | 100 | ``` 101 | for line in hxl.data(url).gen_csv(): 102 | print(line) 103 | ``` 104 | 105 | The following generators are available (you can use the parameters to turn the text headers and HXL tags on or off): 106 | 107 | Generator method | Description 108 | -- | -- 109 | [gen_raw()](https://hxlstandard.github.io/libhxl-python/model.html#hxl.model.Dataset.gen_raw) | Generate arrays of strings, one row at a time. 110 | [gen_csv()](https://hxlstandard.github.io/libhxl-python/model.html#hxl.model.Dataset.gen_csv) | Generate encoded CSV rows, one row at a time. 111 | [gen_json()](https://hxlstandard.github.io/libhxl-python/model.html#hxl.model.Dataset.gen_json) | Generate JSON output, either as rows or as JSON objects with the HXL hashtags as property names. 112 | 113 | ### Validation 114 | 115 | To validate a HXL dataset against a schema (also in HXL), use the [validate()](https://hxlstandard.github.io/libhxl-python/model.html#hxl.model.Dataset.validate) method at the end of the filter chain: 116 | 117 | ``` 118 | is_valid = hxl.data(url).validate('my-schema.csv') 119 | ``` 120 | 121 | If you don't specify a schema, the library will use a simple, built-in schema: 122 | 123 | ``` 124 | is_valid = hxl.data(url).validate() 125 | ``` 126 | 127 | If you include a callback, you can collect details about the errors and warnings: 128 | 129 | ``` 130 | def my_callback(error_info): 131 | ## error_info is a HXLValidationException 132 | sys.stderr.write(error_info) 133 | 134 | is_valid = hxl.data(url).validate(schema='my-schema.csv', callback=my_callback) 135 | ``` 136 | 137 | For more information on validation, see the API documentation for the 138 | [hxl.validation](https://hxlstandard.github.io/libhxl-python/validation.html) 139 | module and the format documentation for [HXL 140 | schemas](https://github.com/HXLStandard/hxl-proxy/wiki/HXL-schemas). 141 | 142 | 143 | ## Command-line scripts 144 | 145 | The filters are also available as command-line scripts, installed with 146 | the library. For example, 147 | 148 | ``` 149 | $ hxlcount -t country dataset.csv 150 | ``` 151 | 152 | Will perform the same action as 153 | 154 | ``` 155 | import hxl 156 | 157 | hxl.data("dataset.csv", hxl.InputOptions(allow_local=True)).count("country").gen_csv() 158 | ``` 159 | 160 | See the API documentation for the 161 | [hxl.scripts](https://hxlstandard.github.io/libhxl-python/scripts.html) 162 | module for more information about the command-line scripts 163 | available. All scripts have an ``-h`` option that gives usage 164 | information. 165 | 166 | 167 | ## Installation 168 | 169 | This repository includes a standard Python `setup.py` script for 170 | installing the library and scripts (applications) on your system. In a 171 | Unix-like operating system, you can install using the following 172 | command: 173 | 174 | ``` 175 | python setup.py install 176 | ``` 177 | 178 | If you don't need to install from source, try simply 179 | 180 | ``` 181 | pip install libhxl 182 | ``` 183 | 184 | Once you've installed, you will be able to include the HXL libraries 185 | from any Python application, and will be able to call scripts like 186 | _hxlvalidate_ from the command line. 187 | 188 | 189 | ## Makefile 190 | 191 | There is also a generic Makefile that automates many tasks, including 192 | setting up a Python virtual environment for testing. The Python3 venv 193 | module is required for most of the targets. 194 | 195 | 196 | ``` 197 | make build-venv 198 | ``` 199 | 200 | Set up a local Python virtual environment for testing, if it doesn't 201 | already exist. Will recreate the virtual environment if setup.py has 202 | changed. 203 | 204 | ``` 205 | make test 206 | ``` 207 | 208 | Set up a virtual environment (if missing) and run all the unit tests 209 | 210 | ``` 211 | make test-install 212 | ``` 213 | 214 | Test a clean installation to verify there are no missing dependencies, 215 | etc. 216 | 217 | ## License 218 | 219 | libhxl-python is released into the Public Domain, and comes with NO 220 | WARRANTY. See [LICENSE.md](./LICENSE.md) for details. 221 | -------------------------------------------------------------------------------- /docs/formulas/eval.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | hxl.formulas.eval API documentation 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |
20 |
21 |
22 |

Module hxl.formulas.eval

23 |
24 |
25 |

Evaluate a formula against a row

26 |
27 | 28 | Expand source code 29 | 30 |
""" Evaluate a formula against a row
 31 | """
 32 | 
 33 | import logging
 34 | import hxl.formulas.parser as p, hxl.formulas.lexer as l
 35 | 
 36 | from hxl.util import logup
 37 | 
 38 | logger = logging.getLogger(__name__)
 39 | 
 40 | def eval(row, formula):
 41 |     """Parse a formula, then return the result of evaluating it against a row.
 42 |     @param row: the HXL row object
 43 |     @param formula: the formula as a string
 44 |     @return: a scalar result
 45 |     """
 46 |     statement = p.parser.parse(formula, lexer=l.lexer)
 47 |     if statement:
 48 |         return statement[0](row, statement[1])
 49 |     else:
 50 |         logup('Cannot parse formula', {"formula": formula}, level='error')
 51 |         logger.error("Cannot parse formula {{ {} }}".format(formula))
 52 |         return "**ERROR**"
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |

Functions

61 |
62 |
63 | def eval(row, formula) 64 |
65 |
66 |

Parse a formula, then return the result of evaluating it against a row. 67 | @param row: the HXL row object 68 | @param formula: the formula as a string 69 | @return: a scalar result

70 |
71 | 72 | Expand source code 73 | 74 |
def eval(row, formula):
 75 |     """Parse a formula, then return the result of evaluating it against a row.
 76 |     @param row: the HXL row object
 77 |     @param formula: the formula as a string
 78 |     @return: a scalar result
 79 |     """
 80 |     statement = p.parser.parse(formula, lexer=l.lexer)
 81 |     if statement:
 82 |         return statement[0](row, statement[1])
 83 |     else:
 84 |         logup('Cannot parse formula', {"formula": formula}, level='error')
 85 |         logger.error("Cannot parse formula {{ {} }}".format(formula))
 86 |         return "**ERROR**"
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 | 112 |
113 | 116 | 117 | -------------------------------------------------------------------------------- /docs/formulas/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | hxl.formulas API documentation 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |
20 | 58 | 80 |
81 | 84 | 85 | -------------------------------------------------------------------------------- /docs/formulas/lexer.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | hxl.formulas.lexer API documentation 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |
20 |
21 |
22 |

Module hxl.formulas.lexer

23 |
24 |
25 |
26 | 27 | Expand source code 28 | 29 |
import logging
 30 | import ply.lex as lex, json
 31 | 
 32 | from hxl.util import logup
 33 | 
 34 | logger = logging.getLogger(__name__)
 35 | 
 36 | tokens = (
 37 |     'NAME',
 38 |     'TAGPATTERN',
 39 |     'INT',
 40 |     'FLOAT',
 41 |     'STRING',
 42 |     'PLUS',
 43 |     'MINUS',
 44 |     'TIMES',
 45 |     'DIVIDE',
 46 |     'MODULO',
 47 |     'LPAREN',
 48 |     'RPAREN',
 49 |     'COMMA'
 50 | )
 51 | 
 52 | t_ignore = " \t\r\n"
 53 | 
 54 | # Regular expression rules for simple tokens
 55 | t_NAME = r'[A-Za-z][A-Za-z0-9_]*'
 56 | t_TAGPATTERN = r'\#[A-Za-z][A-Za-z0-9_]*(\s*[+-][A-Za-z][A-Za-z0-9_]*)*[!]?'
 57 | t_PLUS = r'\+'
 58 | t_MINUS = r'-'
 59 | t_TIMES = r'\*'
 60 | t_DIVIDE = r'/'
 61 | t_MODULO = r'%'
 62 | t_LPAREN = r'\('
 63 | t_RPAREN = r'\)'
 64 | t_COMMA = r','
 65 | 
 66 | def t_STRING(t):
 67 |     r'"([^"]|\\.)*"'
 68 |     t.value = json.loads(t.value)
 69 |     return t
 70 | 
 71 | def t_FLOAT(t):
 72 |     r'\d+\.\d+'
 73 |     t.value = float(t.value)
 74 |     return t
 75 | 
 76 | def t_INT(t):
 77 |     r'\d+'
 78 |     t.value = int(t.value)
 79 |     return t
 80 | 
 81 | def t_error(t):
 82 |     logup('Illegal character', {"char": t.value[0]}, level='error')
 83 |     logger.error("Illegal character '%s'", t.value[0])
 84 |     t.lexer.skip(1)
 85 | 
 86 | lexer = lex.lex()
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |

Functions

95 |
96 |
97 | def t_FLOAT(t) 98 |
99 |
100 |

\d+.\d+

101 |
102 | 103 | Expand source code 104 | 105 |
def t_FLOAT(t):
106 |     r'\d+\.\d+'
107 |     t.value = float(t.value)
108 |     return t
109 |
110 |
111 |
112 | def t_INT(t) 113 |
114 |
115 |

\d+

116 |
117 | 118 | Expand source code 119 | 120 |
def t_INT(t):
121 |     r'\d+'
122 |     t.value = int(t.value)
123 |     return t
124 |
125 |
126 |
127 | def t_STRING(t) 128 |
129 |
130 |

"([^"]|\.)*"

131 |
132 | 133 | Expand source code 134 | 135 |
def t_STRING(t):
136 |     r'"([^"]|\\.)*"'
137 |     t.value = json.loads(t.value)
138 |     return t
139 |
140 |
141 |
142 | def t_error(t) 143 |
144 |
145 |
146 |
147 | 148 | Expand source code 149 | 150 |
def t_error(t):
151 |     logup('Illegal character', {"char": t.value[0]}, level='error')
152 |     logger.error("Illegal character '%s'", t.value[0])
153 |     t.lexer.skip(1)
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 | 182 |
183 | 186 | 187 | -------------------------------------------------------------------------------- /docs/formulas/parsetab.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | hxl.formulas.parsetab API documentation 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |
20 |
21 |
22 |

Module hxl.formulas.parsetab

23 |
24 |
25 |
26 | 27 | Expand source code 28 | 29 |
# parsetab.py
 30 | # This file is automatically generated. Do not edit.
 31 | # pylint: disable=W,C,R
 32 | _tabversion = '3.10'
 33 | 
 34 | _lr_method = 'LALR'
 35 | 
 36 | _lr_signature = 'leftPLUSMINUSleftTIMESDIVIDEMODULOrightUMINUSCOMMA DIVIDE FLOAT INT LPAREN MINUS MODULO NAME PLUS RPAREN STRING TAGPATTERN TIMESexpression : INT\n                  | FLOAT\n                  | STRING\n    expression : LPAREN expression RPARENexpression : expression PLUS expressionexpression : expression MINUS expressionexpression : expression TIMES expressionexpression : expression DIVIDE expressionexpression : expression MODULO expressionexpression : MINUS expression %prec UMINUSexpression : TAGPATTERNexpression : NAME LPAREN args RPARENargs : expression COMMA argsargs : expressionargs :'
 37 |     
 38 | _lr_action_items = {'INT':([0,5,6,9,10,11,12,13,16,26,],[2,2,2,2,2,2,2,2,2,2,]),'FLOAT':([0,5,6,9,10,11,12,13,16,26,],[3,3,3,3,3,3,3,3,3,3,]),'STRING':([0,5,6,9,10,11,12,13,16,26,],[4,4,4,4,4,4,4,4,4,4,]),'LPAREN':([0,5,6,8,9,10,11,12,13,16,26,],[5,5,5,16,5,5,5,5,5,5,5,]),'MINUS':([0,1,2,3,4,5,6,7,9,10,11,12,13,14,15,16,17,18,19,20,21,22,24,25,26,],[6,10,-1,-2,-3,6,6,-11,6,6,6,6,6,10,-10,6,-5,-6,-7,-8,-9,-4,10,-12,6,]),'TAGPATTERN':([0,5,6,9,10,11,12,13,16,26,],[7,7,7,7,7,7,7,7,7,7,]),'NAME':([0,5,6,9,10,11,12,13,16,26,],[8,8,8,8,8,8,8,8,8,8,]),'$end':([1,2,3,4,7,15,17,18,19,20,21,22,25,],[0,-1,-2,-3,-11,-10,-5,-6,-7,-8,-9,-4,-12,]),'PLUS':([1,2,3,4,7,14,15,17,18,19,20,21,22,24,25,],[9,-1,-2,-3,-11,9,-10,-5,-6,-7,-8,-9,-4,9,-12,]),'TIMES':([1,2,3,4,7,14,15,17,18,19,20,21,22,24,25,],[11,-1,-2,-3,-11,11,-10,11,11,-7,-8,-9,-4,11,-12,]),'DIVIDE':([1,2,3,4,7,14,15,17,18,19,20,21,22,24,25,],[12,-1,-2,-3,-11,12,-10,12,12,-7,-8,-9,-4,12,-12,]),'MODULO':([1,2,3,4,7,14,15,17,18,19,20,21,22,24,25,],[13,-1,-2,-3,-11,13,-10,13,13,-7,-8,-9,-4,13,-12,]),'RPAREN':([2,3,4,7,14,15,16,17,18,19,20,21,22,23,24,25,26,27,],[-1,-2,-3,-11,22,-10,-15,-5,-6,-7,-8,-9,-4,25,-14,-12,-15,-13,]),'COMMA':([2,3,4,7,15,17,18,19,20,21,22,24,25,],[-1,-2,-3,-11,-10,-5,-6,-7,-8,-9,-4,26,-12,]),}
 39 | 
 40 | _lr_action = {}
 41 | for _k, _v in _lr_action_items.items():
 42 |    for _x,_y in zip(_v[0],_v[1]):
 43 |       if not _x in _lr_action:  _lr_action[_x] = {}
 44 |       _lr_action[_x][_k] = _y
 45 | del _lr_action_items
 46 | 
 47 | _lr_goto_items = {'expression':([0,5,6,9,10,11,12,13,16,26,],[1,14,15,17,18,19,20,21,24,24,]),'args':([16,26,],[23,27,]),}
 48 | 
 49 | _lr_goto = {}
 50 | for _k, _v in _lr_goto_items.items():
 51 |    for _x, _y in zip(_v[0], _v[1]):
 52 |        if not _x in _lr_goto: _lr_goto[_x] = {}
 53 |        _lr_goto[_x][_k] = _y
 54 | del _lr_goto_items
 55 | _lr_productions = [
 56 |   ("S' -> expression","S'",1,None,None,None),
 57 |   ('expression -> INT','expression',1,'p_expression_const','parser.py',17),
 58 |   ('expression -> FLOAT','expression',1,'p_expression_const','parser.py',18),
 59 |   ('expression -> STRING','expression',1,'p_expression_const','parser.py',19),
 60 |   ('expression -> LPAREN expression RPAREN','expression',3,'p_expression_group','parser.py',24),
 61 |   ('expression -> expression PLUS expression','expression',3,'p_expression_plus','parser.py',28),
 62 |   ('expression -> expression MINUS expression','expression',3,'p_expression_minus','parser.py',32),
 63 |   ('expression -> expression TIMES expression','expression',3,'p_expression_times','parser.py',36),
 64 |   ('expression -> expression DIVIDE expression','expression',3,'p_expression_divide','parser.py',40),
 65 |   ('expression -> expression MODULO expression','expression',3,'p_expression_modulo','parser.py',44),
 66 |   ('expression -> MINUS expression','expression',2,'p_expression_uminus','parser.py',48),
 67 |   ('expression -> TAGPATTERN','expression',1,'p_expression_tagpattern','parser.py',52),
 68 |   ('expression -> NAME LPAREN args RPAREN','expression',4,'p_expression_function','parser.py',56),
 69 |   ('args -> expression COMMA args','args',3,'p_args_multiple','parser.py',60),
 70 |   ('args -> expression','args',1,'p_args_single','parser.py',64),
 71 |   ('args -> <empty>','args',0,'p_args_empty','parser.py',68),
 72 | ]
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 | 97 |
98 | 101 | 102 | -------------------------------------------------------------------------------- /docs/util.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | hxl.util API documentation 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |
20 |
21 |
22 |

Module hxl.util

23 |
24 |
25 |

Other misc utilities

26 |
27 | 28 | Expand source code 29 | 30 |
""" Other misc utilities
 31 | """
 32 | 
 33 | import logging
 34 | import sys
 35 | import structlog
 36 | 
 37 | def logup(msg, props={}, level="info"):
 38 |     """
 39 |     Adds the function name on the fly for the log
 40 | 
 41 |     Args:
 42 |         msg: the actual log message
 43 |         props: additional properties for the log
 44 | 
 45 |     """
 46 |     input_logger = structlog.wrap_logger(logging.getLogger('hxl.REMOTE_ACCESS'))
 47 |     props['function'] = sys._getframe(1).f_code.co_name
 48 |     levels = {
 49 |         "critical": 50,
 50 |         "error": 40,
 51 |         "warning": 30,
 52 |         "info": 20,
 53 |         "debug": 10
 54 |     }
 55 |     input_logger.log(level=levels[level], event=msg, **props)
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |

Functions

64 |
65 |
66 | def logup(msg, props={}, level='info') 67 |
68 |
69 |

Adds the function name on the fly for the log

70 |

Args

71 |
72 |
msg
73 |
the actual log message
74 |
props
75 |
additional properties for the log
76 |
77 |
78 | 79 | Expand source code 80 | 81 |
def logup(msg, props={}, level="info"):
 82 |     """
 83 |     Adds the function name on the fly for the log
 84 | 
 85 |     Args:
 86 |         msg: the actual log message
 87 |         props: additional properties for the log
 88 | 
 89 |     """
 90 |     input_logger = structlog.wrap_logger(logging.getLogger('hxl.REMOTE_ACCESS'))
 91 |     props['function'] = sys._getframe(1).f_code.co_name
 92 |     levels = {
 93 |         "critical": 50,
 94 |         "error": 40,
 95 |         "warning": 30,
 96 |         "info": 20,
 97 |         "debug": 10
 98 |     }
 99 |     input_logger.log(level=levels[level], event=msg, **props)
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 | 125 |
126 | 129 | 130 | -------------------------------------------------------------------------------- /hxl/__init__.py: -------------------------------------------------------------------------------- 1 | """Support library for the Humanitarian Exchange Language (HXL), version 1.1. 2 | 3 | This library provides support for parsing, validating, cleaning, and 4 | transforming humanitarian datasets that follow the [HXL 5 | standard](https://hxlstandard.org). Its use will be familiar to 6 | developers who have worked with libraries like 7 | [JQuery](https://jquery.com). 8 | 9 | ### Example 10 | 11 | ``` 12 | import hxl 13 | data = hxl.data('data.xlsx', True).with_rows('org=UNICEF').without_columns('contact').count('country') 14 | ``` 15 | 16 | This two-line script performs the following actions: 17 | 18 | 1. Load and parse the spreadsheet ``data.xlsx`` (the library can 19 | also load from any URL, and understands how to read Google 20 | spreadsheets or [CKAN](http://ckan.org) resources). 21 | 22 | 2. Filter out all rows where the value "UNICEF" doesn't appear under 23 | the ``#org`` (organisation) hashtag. 24 | 25 | 3. Strip out personally-identifiable information by removing all 26 | columns with the ``#contact`` hashtag (e.g. ``#contact+name`` or 27 | ``#contact+phone`` or ``#contact+email``). 28 | 29 | 4. Produce a report showing the number of times each unique 30 | ``#country`` appears in the resulting sheet (e.g. to count the 31 | number of activities being conducted by UNICEF in each country). 32 | 33 | ### Command-line scripts 34 | 35 | The various filters are also available 36 | as command-line scripts, so you could perform the same actions as 37 | above in a shell script like this: 38 | 39 | ``` 40 | $ cat data.xlsx | hxlselect -q 'org=UNICEF' | hxlcut -x contact | hxlcount -t country 41 | ``` 42 | 43 | For more information about scripts, see the documentation for 44 | `hxl.scripts`, or invoke any script with the ``-h`` option. 45 | 46 | ### Imports 47 | 48 | Several identifiers are imported into this top-level package for 49 | typing convenience, including `hxl.model.TagPattern`, 50 | `hxl.model.Dataset`, `hxl.model.Column`, `hxl.model.Row`, 51 | `hxl.model.RowQuery`, `hxl.input.data`, `hxl.input.tagger`, 52 | `hxl.input.HXLParseException`, `hxl.input.write_hxl`, 53 | `hxl.input.make_input`, `hxl.input.InputOptions`, 54 | `hxl.input.from_spec`, `hxl.validation.schema`, 55 | `hxl.validation.validate`, and 56 | `hxl.validation.HXLValidationException`. 57 | 58 | ### Next steps 59 | 60 | To get started, read the documentation for the `hxl.input.data` function and 61 | the `hxl.model.Dataset` class. 62 | 63 | ### About this module 64 | 65 | **Author:** David Megginson 66 | 67 | **Organisation:** UN OCHA 68 | 69 | **License:** Public Domain 70 | 71 | **Started:** Started August 2014 72 | 73 | **GitHub:** https://github.com/HXLStandard/libhxl-python 74 | 75 | **PyPi:** https://pypi.org/project/libhxl/ 76 | 77 | """ 78 | 79 | import sys 80 | 81 | if sys.version_info < (3,): 82 | raise RuntimeError("libhxl requires Python 3 or higher") 83 | 84 | __version__="5.2.2" 85 | """Module version number 86 | see https://www.python.org/dev/peps/pep-0396/ 87 | """ 88 | 89 | # Flatten out common items for easier access 90 | 91 | class HXLException(Exception): 92 | """Base class for all HXL-related exceptions.""" 93 | 94 | def __init__(self, message, data={}): 95 | """Create a new HXL exception. 96 | 97 | Args: 98 | message (str): error message for the exception 99 | data (dict): properties associated with the exception (default {}) 100 | """ 101 | super(Exception, self).__init__(message) 102 | 103 | self.message = message 104 | """The human-readable error message.""" 105 | 106 | self.data = data 107 | """Additional properties related to the error.""" 108 | 109 | def __str__(self): 110 | return "<{}: {}>".format(type(self).__name__, str(self.message)) 111 | 112 | import hxl.geo 113 | import hxl.datatypes 114 | from hxl.model import TagPattern, Dataset, Column, Row, RowQuery 115 | from hxl.input import data, info, tagger, HXLParseException, write_hxl, make_input, InputOptions, from_spec 116 | from hxl.validation import schema, validate, HXLValidationException 117 | 118 | # end 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /hxl/converters.py: -------------------------------------------------------------------------------- 1 | """Data-conversion classes 2 | 3 | This module holds classes for converting to HXL from other formats, or 4 | from HXL to other formats. Current, the only class is ``Tagger`` (for 5 | adding tags to non-HXL tabular data on the fly), but we may add more 6 | converters, especially for formats like GeoJSON. 7 | 8 | Author: 9 | David Megginson 10 | 11 | License: 12 | Public Domain 13 | 14 | """ 15 | 16 | import hxl 17 | import logging, re 18 | 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | class Tagger(hxl.input.AbstractInput): 24 | """Add HXL hashtags to a non-HXL datasource on the fly. 25 | 26 | Example: 27 | ``` 28 | input = hxl.input.make_input(url_or_filename) 29 | specs = [('Cluster', '#sector'), ('Organi', '#org'), ('province', '#adm1+es')] 30 | dataset = hxl.converters.Tagger(input, specs) 31 | ``` 32 | 33 | The more-common way to invoke the tagger is through the 34 | ``hxl.input.tagger()`` function: 35 | 36 | ``` 37 | dataset = hxl.input.tagger(url_or_filename, specs) 38 | ``` 39 | 40 | """ 41 | 42 | def __init__(self, input, specs=[], default_tag=None, match_all=False): 43 | """Construct a new Tagger object. 44 | 45 | The input spec is a list of tuples, where the first item is a 46 | substring to match (case-/space-/punctuation-insensitive), and 47 | the second item is the HXL tag spec to use. 48 | 49 | Example: 50 | ``` 51 | spec = [ 52 | ['Cluster', '#sector'], 53 | ['Organisation', '#org'], 54 | ['Province', '#adm1+es'] 55 | ] 56 | ``` 57 | 58 | Args: 59 | input (hxl.input.AbstractInput): an input source that can yield rows of values (see ``hxl.input.make_input``). 60 | specs (dict): the input specs, as described above (default: []) 61 | match_all (bool): if True, require that the full header string match; otherwise, match substrings (default: False) 62 | default_tag (str): default tagspec to use for any column without a match 63 | """ 64 | if isinstance(specs, dict): 65 | # convert to list of tuples if needed 66 | specs = [(key, specs[key]) for key in specs] 67 | self.specs = [(hxl.datatypes.normalise_string(spec[0]), spec[1]) for spec in specs] 68 | self.default_tag = default_tag 69 | self.match_all = match_all 70 | self.input = iter(input) 71 | self._cache = [] 72 | self._found_tags = False 73 | 74 | def __next__(self): 75 | """Return the next line of input (including the new tags).""" 76 | if not self._found_tags: 77 | # Search the first 25 rows for a match. 78 | if self._add_tags(): 79 | self._found_tags = True 80 | else: 81 | # if no match, through an exception 82 | raise hxl.HXLException("Tagging failed") 83 | if len(self._cache) > 0: 84 | # read from the cache, first 85 | return self._cache.pop(0) 86 | else: 87 | return next(self.input) 88 | 89 | def _add_tags(self): 90 | """Look for headers in the first 25 rows of data. 91 | @return: True if headers were found matching the tagging specs; False otherwise. 92 | """ 93 | for n in range(0, 25): 94 | raw_row = next(self.input) 95 | if raw_row is None: 96 | break 97 | self._cache.append(raw_row) 98 | tag_row = self._try_tag_row(raw_row) 99 | if tag_row: 100 | self._cache.append(tag_row) 101 | return True 102 | return False 103 | 104 | def _try_tag_row(self, raw_row): 105 | """See if we can match a potential header row with the spec headers. 106 | @param raw_row: the row to check 107 | @return: the row of hashtag specs if successful, or None otherwise. 108 | """ 109 | tags = [] 110 | tag_count = 0 111 | for index, value in enumerate(raw_row): 112 | value = hxl.datatypes.normalise_string(value) 113 | for spec in self.specs: 114 | if self._check_header(spec[0], value): 115 | tags.append(spec[1]) 116 | tag_count += 1 117 | break 118 | else: 119 | # run only if nothing found 120 | tags.append('') 121 | if tag_count > 0 and tag_count/float(len(self.specs)) >= 0.5: 122 | if self.default_tag: 123 | tags = [tag or self.default_tag for tag in tags] 124 | return tags 125 | else: 126 | return None 127 | 128 | def _check_header(self, spec, header): 129 | """Check if an individual header matches a spec for tagging. 130 | Assumes that both the spec and the header have already been 131 | case- and whitespace-normalised. If self.match_all is True, 132 | then the spec must match the header completely; otherwise, it 133 | needs to match only a substring. 134 | @param spec: the spec to match 135 | @param header: the header to test 136 | @return True if there's a match; False otherwise 137 | """ 138 | if self.match_all: 139 | return (spec == header) 140 | else: 141 | return (spec in header) 142 | 143 | # this class is its own iterator 144 | def __iter__(self): 145 | return self 146 | 147 | _SPEC_PATTERN = r'^(.+)(#{token}([+]{token})*)$'.format(token=hxl.datatypes.TOKEN_PATTERN) 148 | """Regular-expression pattern for matching a tagging specification as a string""" 149 | 150 | @staticmethod 151 | def parse_spec(s): 152 | """Parse a JSON-like tagger spec 153 | 154 | The string is in the format "HEADER TEXT#hashtag+attributes" 155 | 156 | Example: 157 | ``` 158 | spec = hxl.converters.Tagger.parse_spec("Organisation name#org+name") 159 | ``` 160 | 161 | Used only by the command-line tools. 162 | 163 | Args: 164 | s (str): the string representing a tagging specification 165 | 166 | Returns: 167 | hxl.model.Column: the parsed specification as a column object (header, hashtags, and attributes) 168 | 169 | Raises: 170 | hxl.filters.HXLFilterException: if there is an error parsing the spec 171 | 172 | """ 173 | result = re.match(Tagger._SPEC_PATTERN, s) 174 | if result: 175 | return (result.group(1), hxl.model.Column.parse(result.group(2), use_exception=True).display_tag) 176 | else: 177 | raise HXLFilterException("Bad tagging spec: " + s) 178 | 179 | @staticmethod 180 | def _load(input, spec): 181 | """Create a tagger from a dict spec. 182 | 183 | Example: 184 | ``` 185 | { 186 | "match_all": false, 187 | "default_tag": "#affected+label", 188 | "specs": [ 189 | ["district", "#adm1+name"], 190 | ["p-code", "#adm1+code+v_pcode"], 191 | ["organi", "#org+name"] 192 | ] 193 | } 194 | ``` 195 | 196 | """ 197 | return Tagger( 198 | input=input, 199 | specs=spec.get('specs', []), 200 | default_tag=spec.get('default_tag', None), 201 | match_all=spec.get('match_all', False) 202 | ) 203 | -------------------------------------------------------------------------------- /hxl/datatypes.py: -------------------------------------------------------------------------------- 1 | """Utility functions for testing and normalising scalar-ish data types 2 | 3 | Other modules in libhxl use these functions for consistent type 4 | checking, conversion, and normalisation. 5 | 6 | Examples: 7 | ``` 8 | s = hxl.datatypes.normalise(" This IS a String ") # => "this is a string" 9 | s = hxl.datatypes.normalise_whitespace(" a b\\nc") # => "a b c" 10 | s = hxl.datatypes.normalise_date("1/13/2020") # => "2020-01-13" 11 | hxl.datatypes.is_empty(" ") # => True 12 | type = hxl.datatypes.typeof(" ") # => "empty" 13 | ``` 14 | 15 | Author: 16 | David Megginson 17 | 18 | License: 19 | Public Domain 20 | 21 | """ 22 | 23 | import collections, datetime, dateutil.parser, json, logging, re, six, unidecode 24 | 25 | __all__ = ["TOKEN_PATTERN", "normalise", "typeof", "flatten", "is_truthy", "is_empty", "is_string", "is_token", "normalise_space", "normalise_string", "is_number", "normalise_number", "is_date", "normalise_date", "is_dict", "is_list"] 26 | 27 | logger = logging.getLogger(__name__) 28 | 29 | 30 | 31 | ######################################################################## 32 | # Constants 33 | ######################################################################## 34 | 35 | TOKEN_PATTERN = r'[A-Za-z][_0-9A-Za-z]*' 36 | """A regular expression matching a single string token. 37 | """ 38 | 39 | _WHITESPACE_PATTERN = re.compile(r'\s+', re.MULTILINE) 40 | 41 | _ISO_DATE_PATTERN = re.compile( 42 | r'^(?P[12]\d\d\d)(?:Q(?P[1-4])|W(?P\d\d?)|-(?P\d\d?)(?:-(?P\d\d?))?)?$', 43 | re.IGNORECASE 44 | ) 45 | 46 | _SQL_DATETIME_PATTERN = re.compile( 47 | r'^(?P[12]\d\d\d)-(?P\d\d?)-(?P\d\d?) \d\d?:\d\d?:\d\d?(?P)?(?P)?$' 48 | ) 49 | 50 | _DEFAULT_DATE_1 = datetime.datetime(2015, 1, 1) 51 | 52 | _DEFAULT_DATE_2 = datetime.datetime(2016, 3, 3) 53 | 54 | 55 | 56 | ######################################################################## 57 | # Functions 58 | ######################################################################## 59 | 60 | def normalise(value, col=None, dayfirst=True): 61 | """Intelligently normalise a value, optionally using the HXL hashtag and attributes for hints 62 | 63 | Attempt to guess the value's type using duck typing and 64 | (optionally) hints from the HXL hashtag, then product a string 65 | containing a standard representation of a date or number (if 66 | appropriate), or a string with whitespace normalised. 67 | 68 | Args: 69 | value: the value to convert to a normalised string 70 | col (hxl.model.Column): an optional Column object associated with the string (for hints) 71 | dayfirst (bool): hint for whether to default to DD-MM-YYYY or MM-DD-YYY when ambiguous. 72 | 73 | Returns: 74 | str: A normalised string version of the value provided. 75 | 76 | """ 77 | # TODO add lat/lon 78 | 79 | if col and col.tag == '#date': 80 | try: 81 | return normalise_date(value, dayfirst=dayfirst) 82 | except ValueError: 83 | pass 84 | 85 | # fall through 86 | try: 87 | return normalise_number(value) 88 | except ValueError: 89 | return normalise_string(value) 90 | 91 | 92 | def typeof(value, col=None): 93 | """Use duck typing and HXL hinting to guess of a value 94 | 95 | Args: 96 | value: the value to check 97 | col (hxl.model.Column): an optional Column object for hinting (via the hashtag and attributes) 98 | 99 | Returns: 100 | str: one of the strings "date", "number", "empty", or "string" 101 | 102 | """ 103 | if col and col.tag == '#date' and is_date(value): 104 | return 'date' 105 | elif is_number(value): 106 | return 'number' 107 | elif is_empty(value): 108 | return 'empty' 109 | else: 110 | return 'string' 111 | 112 | 113 | def flatten(value, use_json=True, separator=" | "): 114 | """Flatten potential lists and dictionaries 115 | 116 | If use_json is false, then remove hierarchies, and create a single list 117 | separated with " | ", and will use dict keys rather than values. 118 | 119 | Args: 120 | value: the value to flatten (may be a list) 121 | use_json (bool): if True (default), encode top-level lists as JSON 122 | separator (str): the string to use as a separator, if use_json is false 123 | 124 | Returns: 125 | str: a string version of the value 126 | 127 | """ 128 | # keep it simple for now 129 | if value is None: 130 | return '' 131 | elif is_list(value) or is_dict(value): 132 | if use_json: 133 | return json.dumps(value) 134 | else: 135 | return " | ".join([flatten(item, False) for item in value]) 136 | else: 137 | return str(value) 138 | 139 | 140 | def is_truthy(value): 141 | """Loosely check for a boolean-type true value 142 | 143 | Accepts values such as "1", "yes", "t", "true", etc 144 | 145 | Args: 146 | value: the value to test 147 | 148 | Returns: 149 | bool: True if the value appears truthy 150 | 151 | """ 152 | return normalise_string(value) in ['y', 'yes', 't', 'true', '1'] 153 | 154 | 155 | def is_empty(value): 156 | """Test for a functionally-empty value. 157 | 158 | None, empty string, or whitespace only counts as empty; anything else doesn't. 159 | 160 | Args: 161 | value: value to test 162 | 163 | Returns: 164 | bool: True if the value is functionally empty 165 | 166 | """ 167 | return (value is None or value == '' or str(value).isspace()) 168 | 169 | 170 | def is_string(value): 171 | """Test if a value is already a string 172 | 173 | Looks for an actual string data type. 174 | 175 | Args: 176 | value: the value to test 177 | 178 | Returns: 179 | bool: True if the value is a string type. 180 | 181 | """ 182 | return isinstance(value, six.string_types) 183 | 184 | 185 | def is_token(value): 186 | """Test if a value is a valid HXL token 187 | 188 | A token is the string that may appear after "#" for a hashtag, or 189 | "+" for an attribute. It must begin with a letter (A-Z, a-z), 190 | followed by letters, numbers, or underscore ("_"). Internal 191 | spaces, accented/non-Roman characters, and space or other 192 | punctuation are not allowed. 193 | 194 | Args: 195 | value: the value to test 196 | 197 | Returns: 198 | bool: True if the value is a token 199 | 200 | """ 201 | return is_string(value) and re.fullmatch(TOKEN_PATTERN, value) 202 | 203 | 204 | def normalise_space(value): 205 | """Normalise whitespace only in a string 206 | 207 | This method will convert the input value to a string first, then 208 | remove any leading or trailing whitespace, and replace all 209 | sequences of internal whitespace (including line breaks) with a 210 | single space character. 211 | 212 | Note: this does not perform other normalisations (date, etc), but 213 | simply calls the str() function on the value provided. 214 | 215 | Args: 216 | value: the value to normalise 217 | 218 | Returns: 219 | str: a string representation of the original value, with whitespace normalised. 220 | 221 | """ 222 | if is_empty(value): 223 | return '' 224 | else: 225 | value = str(value).strip().replace("\n", " ") 226 | return re.sub( 227 | _WHITESPACE_PATTERN, 228 | ' ', 229 | value 230 | ) 231 | 232 | 233 | def normalise_string(value): 234 | """Normalise a string. 235 | 236 | Remove all leading and trailing whitespace. Convert to lower 237 | case. Replace all internal whitespace (including lineends) with a 238 | single space. Replace None with ''. 239 | 240 | The input value will be forced to a string using str() 241 | 242 | Args: 243 | value: the string to normalise 244 | 245 | Returns: 246 | str: the normalised string 247 | 248 | """ 249 | if value is None: 250 | value = '' 251 | else: 252 | value = str(value) 253 | return normalise_space(unidecode.unidecode(value)).lower() 254 | 255 | 256 | def is_number(value): 257 | """By duck typing, test if a value contains something recognisable as a number. 258 | 259 | Args: 260 | value: the value (string, int, float, etc) to test 261 | 262 | Returns: 263 | bool: True if usable as a number (via normalise_number()) 264 | 265 | """ 266 | try: 267 | float(value) 268 | return True 269 | except: 270 | return False 271 | 272 | 273 | def normalise_number(value): 274 | """Attempt to convert a value to a number. 275 | 276 | Will convert to int type if it has no decimal places. 277 | 278 | Args: 279 | value: the value (string, int, float, etc) to convert. 280 | 281 | Returns: 282 | int: an integer value if there are no decimal places 283 | float: a floating point value if there were decimal places 284 | 285 | Raises: 286 | ValueError: if the value cannot be converted 287 | 288 | """ 289 | try: 290 | n = float(value) 291 | if n == int(n): 292 | return int(n) 293 | else: 294 | return n 295 | except: 296 | raise ValueError("Cannot convert to number: {}".format(value)) 297 | 298 | 299 | def is_date(value): 300 | """Test if a value contains something recognisable as a date. 301 | 302 | Args: 303 | value: the value (string, etc) to test 304 | 305 | Returns: 306 | True if usable as a date 307 | 308 | """ 309 | try: 310 | normalise_date(value) 311 | return True 312 | except ValueError: 313 | return False 314 | 315 | 316 | def normalise_date(value, dayfirst=True): 317 | """Normalise a string as a date. 318 | 319 | This function will take a variety of different date formats and 320 | attempt to convert them to an ISO 8601 date, such as 321 | "2020-06-01". It also will use a non-ISO format for quarter years, 322 | such as "2020Q2". 323 | 324 | Args: 325 | value: the value to normalise as a date 326 | dayfirst (bool): if the date is ambiguous, assume the day comes before the month 327 | 328 | Returns: 329 | str: the date in ISO 8601 format or the extended quarters syntax 330 | 331 | Raises: 332 | ValueError: if the value cannot be parsed as a date 333 | 334 | """ 335 | 336 | def make_date_string(year, quarter=None, month=None, week=None, day=None): 337 | if quarter: 338 | # *not* real ISO 8601 339 | quarter = int(quarter) 340 | if quarter < 1 or quarter > 4: 341 | raise ValueError("Illegal Quarter number: {}".format(quarter)) 342 | return '{:04d}Q{:01d}'.format(int(year), int(quarter)) 343 | elif week: 344 | week = int(week) 345 | if week < 1 or week > 53: 346 | raise ValueError("Illegal week number: {}".format(week)) 347 | return '{:04d}W{:02d}'.format(int(year), int(week)) 348 | elif month: 349 | month = int(month) 350 | if month < 1 or month > 12: 351 | raise ValueError("Illegal month number: {}".format(month)) 352 | if day: 353 | day = int(day) 354 | if day < 1 or day > 31 or (month in [4, 6, 9, 11] and day > 30) or (month==2 and day>29): 355 | raise ValueError("Illegal day {} for month {}".format(day, month)) 356 | return '{:04d}-{:02d}-{:02d}'.format(int(year), int(month), int(day)) 357 | else: 358 | return '{:04d}-{:02d}'.format(int(year), int(month)) 359 | else: 360 | return '{:04d}'.format(int(year)) 361 | 362 | # If it's a positive integer, try a quick conversion to days or seconds since epoch 363 | try: 364 | interval = int(value) 365 | if interval > 100000: # assume seconds for a big number 366 | d = datetime.datetime.fromtimestamp(interval) 367 | return d.strftime("%Y-%m-%d") 368 | elif interval >= 2200: # assume days (cut out for years) 369 | d = datetime.datetime(1970, 1, 1) + datetime.timedelta(days=interval-1) 370 | return d.strftime("%Y-%m-%d") 371 | except (ValueError, TypeError,): 372 | pass 373 | 374 | # First, try our quick ISO date pattern, extended to support quarter notation 375 | value = normalise_space(value) 376 | result = _ISO_DATE_PATTERN.match(value) 377 | if not result: 378 | result = _SQL_DATETIME_PATTERN.match(value) 379 | if result: 380 | return make_date_string( 381 | result.group('year'), 382 | quarter=result.group('quarter'), 383 | month=result.group('month'), 384 | week=result.group('week'), 385 | day=result.group('day') 386 | ) 387 | 388 | # Next, check for a timestamp, which will crash the datetime module 389 | if value.isnumeric() and len(value) >= 10: 390 | if len(value) >= 16: 391 | timestamp = int(value) / 1000000 # nanoseconds 392 | if len(value) >= 13: 393 | timestamp = int(value) / 1000 # milliseconds 394 | else: 395 | timestamp = int(value) # seconds 396 | d = datetime.datetime.utcfromtimestamp(timestamp) 397 | return d.date().isoformat() 398 | 399 | # revert to full date parsing 400 | # we parse the date twice, to detect any default values Python might have filled in 401 | date1 = dateutil.parser.parse(value, default=_DEFAULT_DATE_1, dayfirst=dayfirst) 402 | date2 = dateutil.parser.parse(value, default=_DEFAULT_DATE_2, dayfirst=dayfirst) 403 | day = date1.day if date1.day==date2.day else None 404 | month = date1.month if date1.month==date2.month else None 405 | year = date1.year if date1.year==date2.year else None 406 | 407 | # do some quick validation 408 | if year is None: 409 | if month is not None: 410 | year = datetime.datetime.now().year 411 | else: 412 | raise ValueError("Will not provide default year unless month is present: {}".format(value)) 413 | if month is None and day is not None: 414 | raise ValueError("Will not provide default month: {}".format(value)) 415 | 416 | return make_date_string(year=year, month=month, day=day) 417 | 418 | 419 | def is_dict(value): 420 | """Test if a value is a Python dict. 421 | 422 | Args: 423 | value: the value to test 424 | 425 | Returns: 426 | bool: True if the value is a Python dict or similar map. 427 | 428 | """ 429 | return isinstance(value, collections.abc.Mapping) 430 | 431 | 432 | def is_list(value): 433 | """Test if a value is a Python sequence (other than a string) 434 | 435 | Args: 436 | value: the value to test 437 | 438 | Returns: 439 | bool: True if the values is a non-string sequence. 440 | 441 | """ 442 | return isinstance(value, collections.abc.Sequence) and not isinstance(value, six.string_types) 443 | 444 | -------------------------------------------------------------------------------- /hxl/formulas/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HXLStandard/libhxl-python/5d18d9f34e269954583e9f1b3ec3e4eb0386a63d/hxl/formulas/__init__.py -------------------------------------------------------------------------------- /hxl/formulas/eval.py: -------------------------------------------------------------------------------- 1 | """ Evaluate a formula against a row 2 | """ 3 | 4 | import logging 5 | import hxl.formulas.parser as p, hxl.formulas.lexer as l 6 | 7 | from hxl.util import logup 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | def eval(row, formula): 12 | """Parse a formula, then return the result of evaluating it against a row. 13 | @param row: the HXL row object 14 | @param formula: the formula as a string 15 | @return: a scalar result 16 | """ 17 | statement = p.parser.parse(formula, lexer=l.lexer) 18 | if statement: 19 | return statement[0](row, statement[1]) 20 | else: 21 | logup('Cannot parse formula', {"formula": formula}, level='error') 22 | logger.error("Cannot parse formula {{ {} }}".format(formula)) 23 | return "**ERROR**" 24 | 25 | -------------------------------------------------------------------------------- /hxl/formulas/functions.py: -------------------------------------------------------------------------------- 1 | """Functions that can run inside a formula 2 | """ 3 | 4 | import logging, collections 5 | import hxl.datatypes 6 | import datetime 7 | 8 | from hxl.util import logup 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | # 13 | # Operators (not directly callable as functions, but see below) 14 | # 15 | 16 | def const(row, args, multiple=False): 17 | """A constant value (returns itself). 18 | """ 19 | return args[0] 20 | 21 | def tagref(row, args): 22 | """A single tag pattern standing alone. 23 | @param row: the HXL data row 24 | @param args: the arguments parsed 25 | """ 26 | return row.get(args[0]) 27 | 28 | def add(row, args, multiple=False): 29 | """An addition statement 30 | X + Y 31 | @param row: the HXL data row 32 | @param args: the arguments parsed 33 | @param multiple: if true, allow tag patterns to expand to multiple values (used only for function form, not operator form) 34 | @returns: the sum of the arguments 35 | """ 36 | result = 0 37 | for arg in _deref(row, args, multiple): 38 | result += _num(arg) 39 | return result 40 | 41 | def subtract(row, args, multiple=False): 42 | """A subtraction statement 43 | X - Y 44 | @param row: the HXL data row 45 | @param args: the arguments parsed 46 | @param multiple: if true, allow tag patterns to expand to multiple values (used only for function form, not operator form) 47 | @returns: the result of subtracting all of the following arguments from the first one 48 | """ 49 | args = _deref(row, args, multiple) 50 | result = _num(args[0]) if len(args) > 0 else 0 51 | for arg in args[1:]: 52 | result -= _num(arg) 53 | return result 54 | 55 | def multiply(row, args, multiple=False): 56 | """A multiplication statement 57 | X * Y 58 | @param row: the HXL data row 59 | @param args: the arguments parsed 60 | @param multiple: if true, allow tag patterns to expand to multiple values (used only for function form, not operator form) 61 | @returns: the product of the arguments 62 | """ 63 | args = _deref(row, args, multiple) 64 | result = _num(args[0]) if len(args) > 0 else 0 65 | for arg in args[1:]: 66 | result *= _num(arg) 67 | return result 68 | 69 | def divide(row, args, multiple=False): 70 | """A division statement 71 | X / Y 72 | @param row: the HXL data row 73 | @param args: the arguments parsed 74 | @param multiple: if true, allow tag patterns to expand to multiple values (used only for function form, not operator form) 75 | @returns: the result of dividing the first argument by all of the following ones, in order. 76 | """ 77 | args = _deref(row, args, multiple) 78 | result = _num(args[0]) if len(args) > 0 else 0 79 | for arg in args[1:]: 80 | v = _num(arg) # avoid DIV0 81 | if v: 82 | result = result / v 83 | else: 84 | return 'NaN' 85 | return result 86 | 87 | def modulo(row, args, multiple=False): 88 | """A modulo division statement 89 | X / Y 90 | @param row: the HXL data row 91 | @param args: the arguments parsed 92 | @param multiple: if true, allow tag patterns to expand to multiple values (used only for function form, not operator form) 93 | @returns: the remainder from dividing the first argument by all of the following ones, in order. 94 | """ 95 | args = _deref(row, args, multiple) 96 | result = _num(args[0]) if len(args) > 0 else 0 97 | for arg in args[1:]: 98 | v = _num(arg) # avoid DIV0 99 | if v: 100 | result = result % v 101 | return result 102 | 103 | 104 | # 105 | # User-callable functions 106 | # 107 | 108 | def function(row, args): 109 | """Execute a named function 110 | function(arg, arg...) 111 | @param row: the HXL data row 112 | @param args: the arguments parsed (the first one is the function name) 113 | @returns: the result of executing the function on the arguments 114 | """ 115 | f = FUNCTIONS.get(args[0]) 116 | if f: 117 | return f(row, args[1:], True) 118 | else: 119 | logup('Unknown function', {"function": args[0]}, level='error') 120 | logger.error("Unknown function %s", args[0]) 121 | return '' 122 | 123 | def do_min(row, args, multiple=True): 124 | """Find the minimum value in the list. 125 | If they're all numbers (or empty), use numeric comparison. 126 | Otherwise, use lexical comparison (case- and space-insensitive) 127 | @param row: the HXL data row 128 | @param args: the function arguments (name removed from start) 129 | @returns: the minimum value 130 | """ 131 | 132 | values = _deref(row, args, multiple) 133 | 134 | # first, try a numbery comparison 135 | try: 136 | min_value = None 137 | for value in values: 138 | if not hxl.datatypes.is_empty(value): 139 | value = hxl.datatypes.normalise_number(value) 140 | if min_value is None or min_value > value: 141 | min_value = value 142 | return min_value 143 | # if that fails, revert to lexical 144 | except: 145 | min_value = None 146 | min_value_norm = None 147 | for value in values: 148 | if not hxl.datatypes.is_empty(value): 149 | norm = hxl.datatypes.normalise_string(value) 150 | if min_value_norm is None or norm < min_value_norm: 151 | min_value_norm = norm 152 | min_value = value 153 | return min_value 154 | 155 | def do_max(row, args, multiple=True): 156 | """Find the maximum value in the list. 157 | If they're all numbers (or empty), use numeric comparison. 158 | Otherwise, use lexical comparison (case- and space-insensitive) 159 | @param row: the HXL data row 160 | @param args: the function arguments (name removed from start) 161 | @returns: the maximum value 162 | """ 163 | 164 | values = _deref(row, args, multiple) 165 | 166 | # first, try a numbery comparison 167 | try: 168 | max_value = None 169 | for value in values: 170 | if not hxl.datatypes.is_empty(value): 171 | value = hxl.datatypes.normalise_number(value) 172 | if max_value is None or max_value < value: 173 | max_value = value 174 | return max_value 175 | # if that fails, revert to lexical 176 | except: 177 | max_value = None 178 | max_value_norm = None 179 | for value in values: 180 | if not hxl.datatypes.is_empty(value): 181 | norm = hxl.datatypes.normalise_string(value) 182 | if max_value_norm is None or norm > max_value_norm: 183 | max_value_norm = norm 184 | max_value = value 185 | return max_value 186 | 187 | def do_average(row, args, multiple=True): 188 | """Calculate the average (mean) of the arguments 189 | Ignores any cell that does not contain a number. 190 | @param row: the HXL data row 191 | @param args: the function arguments (name removed from start) 192 | @returns: the mean of all numeric arguments, or empty string if none found 193 | """ 194 | values = _deref(row, args, multiple) 195 | 196 | total = 0 197 | count = 0 198 | 199 | # look for numbers 200 | for value in values: 201 | try: 202 | total += hxl.datatypes.normalise_number(value) 203 | count += 1 204 | except: 205 | pass # not a number 206 | 207 | # if there were no numbers, don't return a result 208 | if count > 0: 209 | return total / count 210 | else: 211 | return '' 212 | 213 | def do_round(row, args, multiple=False): 214 | """Round a single value to the nearest integer. 215 | @param row: the HXL data row 216 | @param args: the function argument (name removed from start) 217 | @returns: the first argument, rounded if it's a number, or unchanged otherwise 218 | """ 219 | values = _deref(row, args, False) 220 | if len(values) > 1: 221 | logup('Ignoring extra arguments to round()', {"args": str(values[1:])}, level='warning') 222 | logger.warning("Ignoring extra arguments to round(): %s", str(values[1:])) 223 | try: 224 | return round(values[0]) 225 | except: 226 | logup('Trying to round non-numeric value', {"value": str(values[0])}, level='warning') 227 | logger.warning("Trying to round non-numeric value %s", values[0]) 228 | return values[0] 229 | 230 | def do_join(row, args, multiple=True): 231 | """Join values with the separator provided. 232 | Also joins empty values (for consistency) 233 | USAGE: join(sep, value1[, ...]) 234 | @param row: the HXL data row 235 | @param args: the function arguments (name removed from start) 236 | @returns: all of the arguments, joined together 237 | """ 238 | values = _deref(row, args, multiple) 239 | separator = values[0] 240 | return separator.join(values[1:]) 241 | 242 | 243 | def do_today(row, args, multiple=False): 244 | """Return the current date (UTC) in ISO format YYYY-mm-dd 245 | @param row: the HXL data row 246 | @param args: the function arguments (name removed from start) 247 | @returns: the current UTC date in ISO YYYY-mm-dd format 248 | """ 249 | return datetime.datetime.utcnow().strftime('%Y-%m-%d') 250 | 251 | 252 | def do_datedif(row, args, multiple=False): 253 | """Calculate the difference between the first date and the second. 254 | The optional internal units arg determines the unit of measurement. 255 | USAGE: datedif(date1, date2[, unit]) 256 | @param row: the HXL data row 257 | @param args: the function arguments (name removed from start) 258 | @returns: the difference between the dates as an integer. 259 | """ 260 | values = _deref(row, args, multiple) 261 | if len(values) == 2: 262 | unit = 'D' 263 | elif len(values) == 3: 264 | unit = str(values[2]).upper() 265 | else: 266 | logup('Wrong number of arguments to datedif()', level='error') 267 | logger.error("Wrong number of arguments to datedif()") 268 | return '' 269 | try: 270 | date1 = datetime.datetime.strptime(hxl.datatypes.normalise_date(values[0]), '%Y-%m-%d') 271 | except: 272 | logup("Can't parse date", {"date": str(values[0])}, level='error') 273 | logger.error("Can't parse date: %s", values[0]) 274 | return '' 275 | try: 276 | date2 = datetime.datetime.strptime(hxl.datatypes.normalise_date(values[1]), '%Y-%m-%d') 277 | except: 278 | logup("Can't parse date", {"date": str(values[1])}, level='error') 279 | logger.error("Can't parse date: %s", values[1]) 280 | return '' 281 | diff = date2-date1 282 | if unit == 'Y': 283 | return int(abs(diff.days/365)) 284 | elif unit == 'M': 285 | return abs(int(round(diff.days/30))) 286 | elif unit == 'W': 287 | return abs(int(round(diff.days/7))) 288 | elif unit == 'D': 289 | return abs(diff.days) 290 | else: 291 | logup('Unrecognised unit for datediff()', {"unit": str(unit)}, level='error') 292 | logger.error("Unrecognised unit %s for datediff()", str(unit)) 293 | return '' 294 | 295 | 296 | def do_toupper(row, args, multiple=False): 297 | """Convert the value to a string in upper case 298 | USAGE: toupper(value) 299 | @param row: the HXL data row 300 | @param args: the function arguments (name removed from start) 301 | @returns: the value as an upper-case string 302 | """ 303 | values = _deref(row, args, multiple) 304 | return str(values[0]).upper() 305 | 306 | 307 | def do_tolower(row, args, multiple=False): 308 | """Convert the value to a string in lower case 309 | USAGE: tolower(value) 310 | @param row: the HXL data row 311 | @param args: the function arguments (name removed from start) 312 | @returns: the value as an upper-case string 313 | """ 314 | values = _deref(row, args, multiple) 315 | return str(values[0]).lower() 316 | 317 | 318 | FUNCTIONS = { 319 | 'sum': lambda row, args, multiple: add(row, args, multiple), 320 | 'product': lambda row, args, multiple: multiply(row, args, multiple), 321 | 'min': do_min, 322 | 'max': do_max, 323 | 'average': do_average, 324 | 'round': do_round, 325 | 'join': do_join, 326 | 'today': do_today, 327 | 'datedif': do_datedif, 328 | 'toupper': do_toupper, 329 | 'tolower': do_tolower, 330 | } 331 | """Master table of user-callable functions""" 332 | 333 | 334 | # 335 | # Internal helpers 336 | # 337 | 338 | def _deref(row, args, multiple=False): 339 | """Dereference a term. 340 | If it's a two-element list with a function and a list, recurse. 341 | If it's a tag pattern, look it up in the row and replace with value(s) 342 | If it's already a literal (number or string), leave it alone. 343 | @param row: a hxl.model.Row object 344 | @param args: a list of arguments to dereference (may be tag patterns or literals) 345 | @param multiple: if true, return all matches for a tag pattern 346 | @return: always a list (may be empty) 347 | """ 348 | result = [] 349 | 350 | for arg in args: 351 | if isinstance(arg, collections.abc.Sequence) and callable(arg[0]): 352 | # it's a function and args: recurse 353 | if arg[0] == tagref: 354 | result += _deref(row, arg[1], multiple) 355 | else: 356 | result.append(arg[0](row, arg[1])) 357 | elif isinstance(arg, hxl.model.TagPattern): 358 | # it's a tag pattern: look up matching values in the row 359 | if multiple: 360 | result += row.get_all(arg) 361 | else: 362 | result.append(row.get(arg)) 363 | else: 364 | # it's a literal: leave it alone 365 | result.append(arg) 366 | 367 | 368 | return result 369 | 370 | def _num(arg): 371 | """Convert to a number if possible. 372 | Otherwise, return zero and log a warning. 373 | """ 374 | if not arg: 375 | return 0 376 | try: 377 | return hxl.datatypes.normalise_number(arg) 378 | except (ValueError, TypeError): 379 | logup('Cannot convert to a number for calculated field', {"arg": arg}, level='debug') 380 | logger.debug("Cannot convert %s to a number for calculated field", arg) 381 | return 0 382 | -------------------------------------------------------------------------------- /hxl/formulas/lexer.py: -------------------------------------------------------------------------------- 1 | 2 | import logging 3 | import ply.lex as lex, json 4 | 5 | from hxl.util import logup 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | tokens = ( 10 | 'NAME', 11 | 'TAGPATTERN', 12 | 'INT', 13 | 'FLOAT', 14 | 'STRING', 15 | 'PLUS', 16 | 'MINUS', 17 | 'TIMES', 18 | 'DIVIDE', 19 | 'MODULO', 20 | 'LPAREN', 21 | 'RPAREN', 22 | 'COMMA' 23 | ) 24 | 25 | t_ignore = " \t\r\n" 26 | 27 | # Regular expression rules for simple tokens 28 | t_NAME = r'[A-Za-z][A-Za-z0-9_]*' 29 | t_TAGPATTERN = r'\#[A-Za-z][A-Za-z0-9_]*(\s*[+-][A-Za-z][A-Za-z0-9_]*)*[!]?' 30 | t_PLUS = r'\+' 31 | t_MINUS = r'-' 32 | t_TIMES = r'\*' 33 | t_DIVIDE = r'/' 34 | t_MODULO = r'%' 35 | t_LPAREN = r'\(' 36 | t_RPAREN = r'\)' 37 | t_COMMA = r',' 38 | 39 | def t_STRING(t): 40 | r'"([^"]|\\.)*"' 41 | t.value = json.loads(t.value) 42 | return t 43 | 44 | def t_FLOAT(t): 45 | r'\d+\.\d+' 46 | t.value = float(t.value) 47 | return t 48 | 49 | def t_INT(t): 50 | r'\d+' 51 | t.value = int(t.value) 52 | return t 53 | 54 | def t_error(t): 55 | logup('Illegal character', {"char": t.value[0]}, level='error') 56 | logger.error("Illegal character '%s'", t.value[0]) 57 | t.lexer.skip(1) 58 | 59 | lexer = lex.lex() 60 | -------------------------------------------------------------------------------- /hxl/formulas/parser.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import ply.yacc as yacc 3 | import hxl.model 4 | import hxl.formulas.functions as f 5 | 6 | from hxl.util import logup 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | from hxl.formulas.lexer import tokens 11 | 12 | precedence = [ 13 | ['left', 'PLUS', 'MINUS'], 14 | ['left', 'TIMES', 'DIVIDE', 'MODULO'], 15 | ['right', 'UMINUS'], 16 | ] 17 | 18 | def p_expression_const(p): 19 | """expression : INT 20 | | FLOAT 21 | | STRING 22 | """ 23 | p[0] = [f.const, [p[1]]] 24 | 25 | def p_expression_group(p): 26 | 'expression : LPAREN expression RPAREN' 27 | p[0] = p[2] 28 | 29 | def p_expression_plus(p): 30 | 'expression : expression PLUS expression' 31 | p[0] = [f.add, [p[1], p[3]]] 32 | 33 | def p_expression_minus(p): 34 | 'expression : expression MINUS expression' 35 | p[0] = [f.subtract, [p[1], p[3]]] 36 | 37 | def p_expression_times(p): 38 | 'expression : expression TIMES expression' 39 | p[0] = [f.multiply, [p[1], p[3]]] 40 | 41 | def p_expression_divide(p): 42 | 'expression : expression DIVIDE expression' 43 | p[0] = [f.divide, [p[1], p[3]]] 44 | 45 | def p_expression_modulo(p): 46 | 'expression : expression MODULO expression' 47 | p[0] = [f.modulo, [p[1], p[3]]] 48 | 49 | def p_expression_uminus(p): 50 | 'expression : MINUS expression %prec UMINUS' 51 | p[0] = [f.subtract, [0, p[2]]] 52 | 53 | def p_expression_tagpattern(p): 54 | 'expression : TAGPATTERN' 55 | p[0] = [f.tagref, [hxl.model.TagPattern.parse(p[1])]] 56 | 57 | def p_expression_function(p): 58 | 'expression : NAME LPAREN args RPAREN' 59 | p[0] = [f.function, [p[1]] + p[3]] 60 | 61 | def p_args_multiple(p): 62 | 'args : expression COMMA args' 63 | p[0] = [p[1]] + p[3] 64 | 65 | def p_args_single(p): 66 | 'args : expression' 67 | p[0] = [p[1]] 68 | 69 | def p_args_empty(p): 70 | 'args :' 71 | p[0] = [] 72 | 73 | # Error rule for syntax errors 74 | def p_error(p): 75 | logup('Syntax error', {"err": str(p)}, level='error') 76 | logger.error("Syntax error: %s", str(p)) 77 | 78 | parser = yacc.yacc() 79 | -------------------------------------------------------------------------------- /hxl/geo.py: -------------------------------------------------------------------------------- 1 | """ 2 | Geodata operations 3 | 4 | Functions for parsing latitudes and longitudes 5 | 6 | May add more geo ops here later (e.g. filtering by boundaries). 7 | 8 | Author: 9 | David Megginson 10 | 11 | License: 12 | Public Domain 13 | 14 | """ 15 | 16 | import hxl, logging, re 17 | 18 | __all__ = ["LAT_PATTERNS", "LON_PATTERNS", "parse_lat", "parse_lon", "parse_coord"] 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | 24 | ######################################################################## 25 | # Constants 26 | ######################################################################## 27 | 28 | # regular expression fragments 29 | _DEG_RE = r'(?P\d+(?:\.\d*)?)\s*\°?' 30 | _MIN_RE = r'(?P\d+(?:\.\d*)?)\s*[\'`′]?' 31 | _SEC_RE = r'(?P\d+(?:\.\d*)?)\s*(?:["“”″]|[\'`′][\'`′])?' 32 | 33 | LAT_PATTERNS = ( 34 | re.compile( 35 | r'^(?P[+-])?\s*{}(?:[\s:;,-]*{}(?:[\s:;,-]*{})?)?$'.format( 36 | _DEG_RE, _MIN_RE, _SEC_RE 37 | ), flags=re.I 38 | ), # -00 00 00 39 | re.compile( 40 | r'^(?P[NS])\s*{}(?:[\s:;,-]*{}(?:[\s:;,-]*{})?)?$'.format( 41 | _DEG_RE, _MIN_RE, _SEC_RE 42 | ), flags=re.I 43 | ), # S 00 00 00 44 | re.compile( 45 | r'^{}\s*(?P[NS])\s*(?:[\s:;,-]*{}(?:[\s:;,-]*{})?)?$'.format( 46 | _DEG_RE, _MIN_RE, _SEC_RE 47 | ), flags=re.I 48 | ), # 00 S 00 00 49 | re.compile( 50 | r'^{}(?:[\s:;,-]*{}(?:[\s:;,-]*{})?)\s*(?P[NS])?$'.format( 51 | _DEG_RE, _MIN_RE, _SEC_RE 52 | ), flags=re.I 53 | ), # 00 00 00 S 54 | ) 55 | """List of regular expressions for parsing latitude strings""" 56 | 57 | 58 | LON_PATTERNS = ( 59 | re.compile( 60 | r'^(?P[+-])?\s*{}(?:[\s:;,-]*{}(?:[\s:;,-]*{})?)?$'.format( 61 | _DEG_RE, _MIN_RE, _SEC_RE 62 | ), flags=re.I 63 | ), # -00 00 00 64 | re.compile( 65 | r'^(?P[EW])\s*{}(?:[\s:;,-]*{}(?:[\s:;,-]*{})?)?$'.format( 66 | _DEG_RE, _MIN_RE, _SEC_RE 67 | ), flags=re.I 68 | ), # S 00 00 00 69 | re.compile( 70 | r'^{}\s*(?P[EW])\s*(?:[\s:;,-]*{}(?:[\s:;,-]*{})?)?$'.format( 71 | _DEG_RE, _MIN_RE, _SEC_RE 72 | ), flags=re.I 73 | ), # 00 S 00 00 74 | re.compile( 75 | r'^{}(?:[\s:;,-]*{}(?:[\s:;,-]*{})?)\s*(?P[EW])?$'.format( 76 | _DEG_RE, _MIN_RE, _SEC_RE 77 | ), flags=re.I 78 | ), # 00 00 00 S 79 | ) 80 | """List of regular expressions for parsing longitude strings""" 81 | 82 | 83 | 84 | ######################################################################## 85 | # Functions 86 | ######################################################################## 87 | 88 | def _make_degrees_digital(parts, max_deg): 89 | """Assemble the degrees, minutes, and seconds parts from a regular expression result into a decimal number. 90 | """ 91 | num = float(parts['deg']) 92 | if num > max_deg or num < max_deg*-1: 93 | raise ValueError('degrees out of range {}/{}'.format(max_deg*-1, max_deg)) 94 | if parts['min']: 95 | min = float(parts['min']) 96 | if min >= 60.0: 97 | raise ValueError('minutes must be less than 60') 98 | else: 99 | num += min/60.0 100 | if parts['sec']: 101 | sec = float(parts['sec']) 102 | if sec >= 60: 103 | raise ValueError('seconds must be less than 60') 104 | num += sec/3600.0 105 | if parts.get('sign') == '-' or (parts.get('hemi') and parts['hemi'].upper() in ('S', 'W')): 106 | num *= -1 107 | return num 108 | 109 | 110 | def parse_lat(value): 111 | """Parse a latitude string 112 | 113 | Accepts a wide range of formats, as defined in LAT_PATTERNS 114 | 115 | Examples: 116 | ``` 117 | lat = parse_lat("45.5000000") # => 45.5 118 | lat = parse_lat("45:30N") # => 45.5 119 | ``` 120 | 121 | Args: 122 | value (str): the input string to parse 123 | 124 | Returns: 125 | float: decimal degrees latitude, or None if it can't be parsed. 126 | 127 | Raises: 128 | ValueError: if the input is out of allowed range 129 | 130 | """ 131 | value = hxl.datatypes.normalise_space(value) 132 | for pattern in LAT_PATTERNS: 133 | result = re.match(pattern, value) 134 | if result: 135 | try: 136 | lat = _make_degrees_digital(result.groupdict(), max_deg=90) 137 | except ValueError as e: 138 | raise ValueError('failed to parse latitude {}: {}'.format(value, e.args[0])) 139 | return lat 140 | return None 141 | 142 | 143 | def parse_lon(value): 144 | """Parse a longitude string 145 | 146 | Accepts a wide range of formats, as defined in LON_PATTERNS 147 | 148 | Examples: 149 | ``` 150 | lon = parse_lon("-75.5000000") # => -75.5 151 | lon = parse_lon("75:30W") # => -75.5 152 | ``` 153 | 154 | Args: 155 | value (str): the input string to parse 156 | 157 | Returns: 158 | float: decimal degrees longitude, or None if it can't be parsed. 159 | 160 | Raises: 161 | ValueError: if the input is out of allowed range 162 | 163 | """ 164 | value = hxl.datatypes.normalise_space(value) 165 | for pattern in LON_PATTERNS: 166 | result = re.match(pattern, value) 167 | if result: 168 | try: 169 | lat = _make_degrees_digital(result.groupdict(), max_deg=180) 170 | except ValueError as e: 171 | raise ValueError('failed to parse latitude {}: {}'.format(value, e.args[0])) 172 | return lat 173 | return None 174 | 175 | 176 | def parse_coord(value): 177 | """Parse lat/lon separated by a delimiter [/,:; ] 178 | 179 | Examples: 180 | ``` 181 | coord = parse_coord("45.500000;-75.5000000") # => (45.5, -75.5,) 182 | coord = parse_coord("45:30N, 75:30W") # => (45.5, -75.5,) 183 | ``` 184 | 185 | Args: 186 | value (str): the lat/lon coordinate string to parse 187 | 188 | Returns: 189 | tuple: the latitude and longitude as float values, or None if unparseable 190 | 191 | Raises: 192 | ValueError: if either of the coordinates is out of range 193 | """ 194 | for delim in ('/', ',', ':', ';', ' ',): 195 | if value.find(delim) > 0: 196 | parts = value.split(delim) 197 | if len(parts) == 2: 198 | lat = parse_lat(parts[0]) 199 | lon = parse_lon(parts[1]) 200 | if lat and lon: 201 | return (lat, lon,) 202 | return None 203 | -------------------------------------------------------------------------------- /hxl/hxl-default-schema.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "#meta+comment": "Always a date", 4 | "#valid_tag": "#date", 5 | "#valid_severity": "error", 6 | "#valid_datatype": "date" 7 | }, 8 | { 9 | "#meta+comment": "Always a number", 10 | "#valid_tag": "#population,#affected,#inneed,#targeted,#reached,#indicator+num,#output+num,#value", 11 | "#valid_severity": "error", 12 | "#valid_datatype": "number" 13 | }, 14 | { 15 | "#meta+comment": "Gently point out numeric outliers", 16 | "#valid_tag": "#population,#affected,#inneed,#targeted,#reached,#indicator+num,#output+num,#value", 17 | "#valid_severity": "info", 18 | "#valid_value+outliers": "true" 19 | }, 20 | { 21 | "#meta+comment": "Spell-check highly-repetitive values", 22 | "#valid_tag": "#org,#sector,#subsector,#status,#beneficiary,#indicator+name,#access,#frequency,#item,#operations,#service,#channel,#modality,#currency,#impact,#group,#severity", 23 | "#valid_severity": "warning", 24 | "#valid_value+case": "true", 25 | "#valid_value+spelling": "true", 26 | "#description": "Rare value: check that it is correct" 27 | }, 28 | { 29 | "#meta+comment": "Placenames may benefit from spell checking", 30 | "#valid_tag": "#region,#country,#adm1,#adm2,#adm3,#adm4,#adm5,#loc", 31 | "#valid_severity": "info", 32 | "#valid_value+case": "true", 33 | "#valid_value+spelling": "true", 34 | "#description": "Rare value: check that it is correct" 35 | }, 36 | { 37 | "#meta+comment": "Look for consistent datatypes with indicators", 38 | "#valid_tag": "#indicator", 39 | "#valid_severity": "warning", 40 | "#valid_datatype+consistent": "true", 41 | "#description": "Possibly inconsistent datatype for this indicator" 42 | }, 43 | { 44 | "#meta+comment": "Detect sloppy whitespace anywhere", 45 | "#valid_tag": "#*", 46 | "#valid_severity": "warning", 47 | "#valid_value+whitespace": "true", 48 | "#description": "Extra whitespace can affect reporting and visualisation" 49 | } 50 | ] 51 | -------------------------------------------------------------------------------- /hxl/util.py: -------------------------------------------------------------------------------- 1 | """ Other misc utilities 2 | """ 3 | 4 | import logging 5 | import os 6 | import sys 7 | import structlog 8 | 9 | def logup(msg, props={}, level="notset"): 10 | """ 11 | Adds the function name on the fly for the log 12 | 13 | Args: 14 | msg: the actual log message 15 | props: additional properties for the log 16 | 17 | """ 18 | levels = { 19 | "critical": 50, 20 | "error": 40, 21 | "warning": 30, 22 | "info": 20, 23 | "debug": 10 24 | } 25 | if level == 'notset': 26 | level = 'info' 27 | if levels[level] >= levels[os.getenv('LOGGING_LEVEL', 'INFO').lower()]: 28 | input_logger = structlog.wrap_logger(logging.getLogger('hxl.REMOTE_ACCESS')) 29 | props['function'] = sys._getframe(1).f_code.co_name 30 | input_logger.log(level=levels[level], event=msg, **props) 31 | -------------------------------------------------------------------------------- /profile/validation-profile.py: -------------------------------------------------------------------------------- 1 | import hxl 2 | 3 | DATA_FILE='data/unhcr_popstats_export_persons_of_concern_all_data.hxl' 4 | 5 | hxl.validate(hxl.io.make_input(DATA_FILE, allow_local=True)) 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jsonpath-ng 2 | ply 3 | python-dateutil 4 | python-io-wrapper>=0.2 5 | requests 6 | structlog 7 | unidecode 8 | urllib3 9 | wheel 10 | xlrd3>=1.1.0 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from setuptools import setup 4 | import sys 5 | 6 | if sys.version_info < (3,): 7 | raise RuntimeError("libhxl requires Python 3 or higher") 8 | 9 | with open("README.md", "r") as fh: 10 | long_description = fh.read() 11 | 12 | setup( 13 | name='libhxl', 14 | version="5.2.2", 15 | description='Python support library for the Humanitarian Exchange Language (HXL). See http://hxlstandard.org and https://github.com/HXLStandard/libhxl-python', 16 | long_description=long_description, 17 | long_description_content_type="text/markdown", 18 | project_urls={ 19 | 'Documentation': 'https://hxlstandard.github.io/libhxl-python/index.html', 20 | 'GitHub': 'https://github.com/HXLStandard/libhxl-python/', 21 | 'Changelog': 'https://github.com/HXLStandard/libhxl-python/blob/prod/CHANGELOG', 22 | }, 23 | author='David Megginson', 24 | author_email='megginson@un.org', 25 | url='http://hxlproject.org', 26 | install_requires=[ 27 | 'jsonpath_ng', 28 | 'ply', 29 | 'python-dateutil', 30 | 'python-io-wrapper>=0.2', 31 | 'requests', 32 | 'structlog', 33 | 'unidecode', 34 | 'urllib3', 35 | 'wheel', 36 | 'xlrd3>=1.1.0', 37 | ], 38 | packages=['hxl', 'hxl.formulas'], 39 | package_data={'hxl': ['*.json']}, 40 | include_package_data=True, 41 | test_suite='tests', 42 | tests_require = [ 43 | 'pytest', 44 | 'mock' 45 | ], 46 | entry_points={ 47 | 'console_scripts': [ 48 | 'hxladd = hxl.scripts:hxladd', 49 | 'hxlappend = hxl.scripts:hxlappend', 50 | 'hxlclean = hxl.scripts:hxlclean', 51 | 'hxlcount = hxl.scripts:hxlcount', 52 | 'hxlcut = hxl.scripts:hxlcut', 53 | 'hxldedup = hxl.scripts:hxldedup', 54 | 'hxlexpand = hxl.scripts:hxlexpand', 55 | 'hxlexplode = hxl.scripts:hxlexplode', 56 | 'hxlfill = hxl.scripts:hxlfill', 57 | 'hxlimplode = hxl.scripts:hxlimplode', 58 | 'hxlhash = hxl.scripts:hxlhash', 59 | 'hxlinfo = hxl.scripts:hxlinfo', 60 | 'hxlmerge = hxl.scripts:hxlmerge', 61 | 'hxlrename = hxl.scripts:hxlrename', 62 | 'hxlreplace = hxl.scripts:hxlreplace', 63 | 'hxlselect = hxl.scripts:hxlselect', 64 | 'hxlsort = hxl.scripts:hxlsort', 65 | 'hxlspec = hxl.scripts:hxlspec', 66 | 'hxltag = hxl.scripts:hxltag', 67 | 'hxlvalidate = hxl.scripts:hxlvalidate' 68 | ] 69 | } 70 | ) 71 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import io, logging, os, re, socket, sys, unittest.mock, warnings 2 | 3 | # Default to turning off all but critical logging messages 4 | logging.basicConfig(level=logging.CRITICAL) 5 | 6 | # But turn on system-level warnings 7 | warnings.simplefilter("default") 8 | os.environ["PYTHONWARNINGS"] = "default" 9 | 10 | 11 | def mock_open_url(url, allow_local=False, timeout=None, verify_ssl=True, http_headers=None): 12 | """Open local files instead of URLs. 13 | If it's a local file path, leave it alone; otherwise, 14 | open as a file under ./files/ 15 | 16 | This is meant as a side effect for unittest.mock.Mock 17 | """ 18 | if re.match(r'https?:', url): 19 | # Looks like a URL 20 | filename = re.sub(r'^.*/([^/]+)$', '\\1', url) 21 | path = resolve_path('files/mock/' + filename) 22 | else: 23 | # Assume it's a file 24 | path = url 25 | with open(path, 'rb') as input: 26 | data = input.read() 27 | return (io.BytesIO(data), None, None, None, None, None,) 28 | 29 | def resolve_path(filename): 30 | """Resolve a pathname for a test input file.""" 31 | return os.path.join(os.path.dirname(__file__), filename) 32 | 33 | def have_connectivity(host="8.8.8.8", port=53, timeout=3): 34 | """ Attempt to make a DNS connection to see if we're on the Internet. 35 | From https://stackoverflow.com/questions/3764291/checking-network-connection 36 | @param host: the host IP to connect to (default 8.8.8.8, google-public-dns-a.google.com) 37 | @param port: the port to connect to (default 53, TCP) 38 | @param timeout: seconds before timeout (default 3) 39 | @returns: True if connected; False otherwise. 40 | """ 41 | try: 42 | socket.setdefaulttimeout(timeout) 43 | socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect((host, port)) 44 | return True 45 | except Exception as ex: 46 | return False 47 | 48 | # Target function to replace for mocking URL access. 49 | URL_MOCK_TARGET = 'hxl.input.open_url_or_file' 50 | 51 | # Mock object to replace hxl.input.make_stream 52 | URL_MOCK_OBJECT = unittest.mock.Mock() 53 | URL_MOCK_OBJECT.side_effect = mock_open_url 54 | -------------------------------------------------------------------------------- /tests/files/mock/append-source-1.csv: -------------------------------------------------------------------------------- 1 | Org, Targeted, Sector 1, Sector 2 2 | #org, #targeted, #sector+list, #sector+list 3 | NGO A, 200, WASH, 4 | NGO C, 500, Health, Food 5 | -------------------------------------------------------------------------------- /tests/files/mock/append-source-2.csv: -------------------------------------------------------------------------------- 1 | Org, Targeted, Sector 1, Sector 2 2 | #org, #targeted, #sector+list, #sector+list 3 | NGO A, 300, Education, 4 | NGO C, 100, Protection, Food 5 | -------------------------------------------------------------------------------- /tests/files/mock/append-source-list.csv: -------------------------------------------------------------------------------- 1 | Datasets to append 2 | #x_source 3 | http://example.org/test_filters/append-source-1.csv 4 | http://example.org/test_filters/append-source-2.csv 5 | -------------------------------------------------------------------------------- /tests/files/mock/taxonomy.csv: -------------------------------------------------------------------------------- 1 | #adm1+name, #adm1+code 2 | Coast, C001 3 | Plains, C002 4 | Mountains, C003 5 | -------------------------------------------------------------------------------- /tests/files/test_converters/wide-tagging-test.csv: -------------------------------------------------------------------------------- 1 | cod_wardsr,dispvshost,sect_hhcompsexrep,hoh,sexhoh,marital_stat,grp_fcs1_cernbr_fcs1_cer,src_fcs1_cer,grp_fcs2_legnbr_fcs2_leg,src_fcs2_leg,grp_fcs3_milknbr_fcs3_milk,src_fcs3_milk,grp_fcs4_meatnbr_fcs4_meat,src_fcs4_meat,grp_fcs41_freshnbr_fcs41_fresh,src_fcs41_fresh,grp_fcs42_organnbr_fcs42_organ,src_fcs42_organ,grp_fcs43_fishnbr_fcs43_fish,src_fcs43_fish,grp_fcs44_egnbr_fcs44_eg,src_fcs44_eg,grp_fcs5_vegnbr_fcs5_veg,src_fcs5_veg,grp_fcs51_orvegnbr_fcs51_orveg,src_fcs51_orveg,grp_fcs52_grvegnbr_fcs52_grveg,src_fcs52_grveg,grp_fcs6_fruitnbr_fcs6_fruit,src_fcs6_fruit,grp_fcs61_orfrnbr_fcs61_orfr,src_fcs61_orfr,grp_fcs7_oilnbr_fcs7_oil,src_fcs7_oil,grp_fcs8_sugnbr_fcs8_sug,src_fcs8_sug,grp_fcs9_condnbr_fcs9_cond,src_fcs9_cond,grp_csircsi1,grp_csircsi2,grp_csircsi3,grp_csircsi4,grp_csircsi5,grp_lhcsilhcs1s,grp_lhcsilhcs2s,grp_lhcsilhcs3s,grp_lhcsilhcs4s,grp_lhcsilhcs5c,grp_lhcsilhcs6c,grp_lhcsilhcs7c,grp_lhcsilhcs8e,grp_lhcsilhcs9e,grp_lhcsilhcs10e,grp_washwdrink,grp_wash2src_engy,typ_toilet,typ_dwel,typ_wall,typ_roof,hh_house,grp_houseasset_house1,grp_houseasset_house2,grp_houseasset_house3,grp_houseasset_house4,grp_houseasset_house5,grp_houseasset_house6,grp_houseasset_house7,grp_houseasset_house8,grp_houseasset_house9,grp_houseasset_house10,grp_houseasset_house11,grp_houseasset_house12,grp_houseasset_house13,grp_houseasset_house14,grp_houseasset_house15,grp_houseasset_house16,grp_inc_lvhdnbr_hh_inc,grp_inc_lvhdincom_act1,incom_act1_contr,incom_act1_pers,nt_inc2incom_act2,incom_act2_contr,incom_act2_pers,grp_inc_lvhdbincom_act1b,incom_act1_contrb,incom_act1_persb,nt_inc2bincom_act2b,incom_act2_contrb,incom_act2_persb,grp_expend_debtsyn_expend_fcs1_cer_cash,expend_fcs1_cer_cash,yn_expend_fcs1_cer_ncash,expend_fcs1_cer_ncash,grp_expend_fcs2_tubyn_expend_fcs2_tub_cash,expend_fcs2_tub_cash,yn_expend_fcs2_tub_ncash,expend_fcs2_tub_ncash,grp_expend_fcs3_pulyn_expend_fcs3_pul_cash,expend_fcs3_pul_cash,yn_expend_fcs3_pul_ncash,expend_fcs3_pul_ncash,grp_expend_fcs4_fruyn_expend_fcs4_fru_cash,expend_fcs4_fru_cash,yn_expend_fcs4_fru_ncash,expend_fcs4_fru_ncash,grp_expend_fcs5_fishyn_expend_fcs5_fish_cash,expend_fcs5_fish_cash,yn_expend_fcs5_fish_ncash,expend_fcs5_fish_ncash,grp_expend_fcs6_oilyn_expend_fcs6_oil_cash,expend_fcs6_oil_cash,yn_expend_fcs6_oil_ncash,expend_fcs6_oil_ncash,grp_expend_fcs7_milkyn_expend_fcs7_milk_cash,expend_fcs7_milk_cash,yn_expend_fcs7_milk_ncash,expend_fcs7_milk_ncash,grp_expend_fcs8_sugyn_expend_fcs8_sug_cash,expend_fcs8_sug_cash,yn_expend_fcs8_sug_ncash,expend_fcs8_sug_ncash,grp_expend_fcs9_teayn_expend_fcs9_tea_cash,expend_fcs9_tea_cash,yn_expend_fcs9_tea_ncash,expend_fcs9_tea_ncash,yn_expend_nonfood_alc,expend_nonfood_alc,yn_expend_nonfood_soap,expend_nonfood_soap,yn_expend_nonfood_trans,expend_nonfood_trans,yn_expend_nonfood_fuel,expend_nonfood_fuel,yn_expend_nonfood_wat,expend_nonfood_wat,yn_expend_nonfood_elec,expend_nonfood_elec,yn_expend_nonfood_com,expend_nonfood_com,yn_expend_nonfood_rent,expend_nonfood_rent,yn_expend_nonfood180_med,expend_nonfood180_med,yn_expend_nonfood180_clo,expend_nonfood180_clo,yn_expend_nonfood180_edu,expend_nonfood180_edu,yn_expend_nonfood180_deb,expend_nonfood180_deb,yn_expend_nonfood180_cel,expend_nonfood180_cel,yn_expend_nonfood180_agr,expend_nonfood180_agr,yn_expend_nonfood180_sav,expend_nonfood180_sav,yn_expend_nonfood180_con,expend_nonfood180_con,yn_expend_nonfood180_oth,expend_nonfood180_oth,grp_chockyn_chocks,chocks1,chocks2,chocks3,grp_hhsyn_hhs1,hss1,yn_hhs2,hss2,yn_hhs3,hss3,rCSI,starches,pulses,meat,meat1,veg1,veg2,fruits1,fruits2,diary,oil,sugar,FCS,FCG.21.35,food_monthly,nonfood_monthly,total_expenditure,FoodExp_share,Foodexp_4pt,Max_coping_behaviour,FS_final,HHSscore,HHSscore_CAT 2 | XXX,IDPs, Male,1, Male,2,2,1,0, ,2,5,2,5,0, ,0, ,3,5,0, ,0, ,0, ,5,5,0, ,0, ,0, ,0, ,0, ,2,3,3,1,3,4,4,4,4,4,4,4,4,4,4,1,1,4,3,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,15,60,1,1,40,1,1,95,1,13,5,1,1,5000,1,2000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2000,0,0,0,0,0,0,0,0,0,0,1,1500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,5,9,10,1,2,1,2,1,2,21,2,0,3,2,0,5,0,0,2,0,0,29,2,9000,1500,10500,85.71428571,4,1,3,3,2 3 | -------------------------------------------------------------------------------- /tests/files/test_io/input-broken.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HXLStandard/libhxl-python/5d18d9f34e269954583e9f1b3ec3e4eb0386a63d/tests/files/test_io/input-broken.xlsx -------------------------------------------------------------------------------- /tests/files/test_io/input-fuzzy.csv: -------------------------------------------------------------------------------- 1 | Qué?,,,Quién?,Para quién?,,Dónde?,Cuándo? 2 | Registro,Sector/Cluster,Subsector,Organización,Hombres,Mujeres,País,Departamento/Provincia/Estado 3 | ,#sector+es,#subsector+es,#org+es,#targeted+f,#targeted+f,country,adm1 4 | 001,WASH,Higiene,ACNUR,100,100,Panamá,Los Santos,1 March 2015 5 | 002,Salud,Vacunación,OMS,,,Colombia,Cauca, 6 | 003,Educación,Formación de enseñadores,UNICEF,250,300,Colombia,Chocó, 7 | 004,WASH,Urbano,OMS,80,95,Venezuela,Amazonas, 8 | -------------------------------------------------------------------------------- /tests/files/test_io/input-invalid.csv: -------------------------------------------------------------------------------- 1 | Qué?,,,Quién?,Para quién?,,Dónde?,Cuándo? 2 | Registro,Sector/Cluster,Subsector,Organización,Hombres,Mujeres,País,Departamento/Provincia/Estado 3 | ,sector+es,subsector+es,org+es,targeted+f,targeted+m,country,adm1,date+reported 4 | 001,WASH,Higiene,ACNUR,100,100,Panamá,Los Santos,1 March 2015 5 | 002,Salud,Vacunación,OMS,,,Colombia,Cauca, 6 | 003,Educación,Formación de enseñadores,UNICEF,250,300,Colombia,Chocó, 7 | 004,WASH,Urbano,OMS,80,95,Venezuela,Amazonas, 8 | -------------------------------------------------------------------------------- /tests/files/test_io/input-invalid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HXLStandard/libhxl-python/5d18d9f34e269954583e9f1b3ec3e4eb0386a63d/tests/files/test_io/input-invalid.png -------------------------------------------------------------------------------- /tests/files/test_io/input-merged.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HXLStandard/libhxl-python/5d18d9f34e269954583e9f1b3ec3e4eb0386a63d/tests/files/test_io/input-merged.xlsx -------------------------------------------------------------------------------- /tests/files/test_io/input-multiline.csv: -------------------------------------------------------------------------------- 1 | #description 2 | "Line 1 3 | Line 2 4 | Line 3" 5 | -------------------------------------------------------------------------------- /tests/files/test_io/input-multiple-headers.csv: -------------------------------------------------------------------------------- 1 | Junk,Junk,Junk,Junk 2 | Org,No hashtag,Province,Number affected 3 | #org,,#adm1,#affected 4 | Org X,xxx,Coast, 100 5 | Org Y,yyy,Mountains, 200 6 | -------------------------------------------------------------------------------- /tests/files/test_io/input-notag1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /tests/files/test_io/input-notag2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /tests/files/test_io/input-quality.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HXLStandard/libhxl-python/5d18d9f34e269954583e9f1b3ec3e4eb0386a63d/tests/files/test_io/input-quality.xls -------------------------------------------------------------------------------- /tests/files/test_io/input-quality.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HXLStandard/libhxl-python/5d18d9f34e269954583e9f1b3ec3e4eb0386a63d/tests/files/test_io/input-quality.xlsx -------------------------------------------------------------------------------- /tests/files/test_io/input-untagged-csv.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HXLStandard/libhxl-python/5d18d9f34e269954583e9f1b3ec3e4eb0386a63d/tests/files/test_io/input-untagged-csv.zip -------------------------------------------------------------------------------- /tests/files/test_io/input-untagged-objects.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "Registro": "001", 4 | "Sector/Cluster": "WASH", 5 | "Subsector": "Higiene", 6 | "Organización": "ACNUR", 7 | "Hombres": "100", 8 | "Mujeres": "100", 9 | "País": "Panamá", 10 | "Departamento/Provincia/Estado": "Los Santos" 11 | }, 12 | { 13 | "Registro": "002", 14 | "Sector/Cluster": "Salud", 15 | "Subsector": "Vacunación", 16 | "Organización": "OMS", 17 | "Hombres": "", 18 | "Mujeres": "", 19 | "País": "Colombia", 20 | "Departamento/Provincia/Estado": "Cauca" 21 | }, 22 | { 23 | "Registro": "003", 24 | "Sector/Cluster": "Educación", 25 | "Subsector": "Formación de enseñadores", 26 | "Organización": "UNICEF", 27 | "Hombres": "250", 28 | "Mujeres": "300", 29 | "País": "Colombia", 30 | "Departamento/Provincia/Estado": "Chocó" 31 | }, 32 | { 33 | "Registro": "004", 34 | "Sector/Cluster": "WASH", 35 | "Subsector": "Urbano", 36 | "Organización": "OMS", 37 | "Hombres": "80", 38 | "Mujeres": "95", 39 | "País": "Venezuela", 40 | "Departamento/Provincia/Estado": "Amazonas" 41 | } 42 | ] 43 | -------------------------------------------------------------------------------- /tests/files/test_io/input-untagged.csv: -------------------------------------------------------------------------------- 1 | ,¿Qué?,,¿Quién?,¿Para quién?,,¿Dónde?,,¿Cuándo? 2 | Registro,Sector/Cluster,Subsector,Organización,Hombres,Mujeres,País,Departamento/Provincia/Estado 3 | 001,WASH,Higiene,ACNUR,100,100,Panamá,Los Santos,1 March 2015 4 | 002,Salud,Vacunación,OMS,,,Colombia,Cauca, 5 | 003,Educación,Formación de enseñadores,UNICEF,250,300,Colombia,Chocó, 6 | 004,WASH,Urbano,OMS,80,95,Venezuela,Amazonas, 7 | -------------------------------------------------------------------------------- /tests/files/test_io/input-untagged.json: -------------------------------------------------------------------------------- 1 | [ 2 | ["","¿Qué?", "", "¿Quién?", "¿Para quién?", "", "¿Dónde?", "", "¿Cuándo?"], 3 | ["Registro", "Sector/Cluster", "Subsector", "Organización", "Hombres", "Mujeres", "País", "Departamento/Provincia/Estado"], 4 | ["001", "WASH", "Higiene", "ACNUR", "100", "100", "Panamá", "Los Santos", "1 March 2015"], 5 | ["002", "Salud", "Vacunación", "OMS", "", "", "Colombia", "Cauca", ""], 6 | ["003", "Educación", "Formación de enseñadores", "UNICEF", "250", "300", "Colombia", "Chocó", ""], 7 | ["004", "WASH", "Urbano", "OMS", "80", "95", "Venezuela", "Amazonas", ""] 8 | ] 9 | -------------------------------------------------------------------------------- /tests/files/test_io/input-valid-csv.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HXLStandard/libhxl-python/5d18d9f34e269954583e9f1b3ec3e4eb0386a63d/tests/files/test_io/input-valid-csv.zip -------------------------------------------------------------------------------- /tests/files/test_io/input-valid-json-selector.json: -------------------------------------------------------------------------------- 1 | { 2 | "sel1": [ 3 | ["#adm1", "#affected"], 4 | ["Coast", "100"] 5 | ], 6 | "sel2": [ 7 | ["#adm1", "#affected"], 8 | ["Plains", "200"] 9 | ] 10 | } 11 | -------------------------------------------------------------------------------- /tests/files/test_io/input-valid-json.txt: -------------------------------------------------------------------------------- 1 | [ 2 | ["Qué?", "", "", "Quién?", "Para quién?", "", "Dónde?", "Cuándo?"], 3 | ["Registro", "Sector/Cluster", "Subsector", "Organización", "Hombres", "Mujeres", "País", "Departamento/Provincia/Estado"], 4 | ["", "#sector+es", "#subsector+es", "#org+es", "#targeted+f", "#targeted+m", "#country", "#adm1", "#date+reported"], 5 | ["001", "WASH", "Higiene", "ACNUR", "100", "100", "Panamá", "Los Santos", "1 March 2015"], 6 | ["002", "Salud", "Vacunación", "OMS", "", "", "Colombia", "Cauca", ""], 7 | ["003", "Educación", "Formación de enseñadores", "UNICEF", "250", "300", "Colombia", "Chocó", ""], 8 | ["004", "WASH", "Urbano", "OMS", "80", "95", "Venezuela", "Amazonas", ""] 9 | ] 10 | -------------------------------------------------------------------------------- /tests/files/test_io/input-valid-latin1.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HXLStandard/libhxl-python/5d18d9f34e269954583e9f1b3ec3e4eb0386a63d/tests/files/test_io/input-valid-latin1.csv -------------------------------------------------------------------------------- /tests/files/test_io/input-valid-nested.json: -------------------------------------------------------------------------------- 1 | { 2 | "status":"OK", 3 | "date":"2017-06-01", 4 | "results": [ 5 | { 6 | "": "001", 7 | "#sector+es": "WASH", 8 | "#subsector+es": "Higiene", 9 | "#org+es": "ACNUR", 10 | "#targeted+f": "100", 11 | "#targeted+m": "100", 12 | "#country": "Panamá", 13 | "#adm1": "Los Santos", 14 | "#date+reported": "1 March 2015" 15 | }, 16 | { 17 | "": "002", 18 | "#sector+es": "Salud", 19 | "#subsector+es": "Vacunación", 20 | "#org+es": "OMS", 21 | "#country": "Colombia", 22 | "#adm1": "Cauca" 23 | }, 24 | { 25 | "": "003", 26 | "#sector+es": "Educación", 27 | "#subsector+es": "Formación de enseñadores", 28 | "#org+es": "UNICEF", 29 | "#targeted+f": "250", 30 | "#targeted+m": "300", 31 | "#country": "Colombia", 32 | "#adm1": "Chocó" 33 | }, 34 | { 35 | "": "004", 36 | "#sector+es": "WASH", 37 | "#subsector+es": "Urbano", 38 | "#org+es": "OMS", 39 | "#targeted+f": "80", 40 | "#targeted+m": "95", 41 | "#country": "Venezuela", 42 | "#adm1": 43 | "Amazonas" 44 | } 45 | ] 46 | } 47 | -------------------------------------------------------------------------------- /tests/files/test_io/input-valid-objects.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "": "001", 4 | "#sector+es": "WASH", 5 | "#subsector+es": "Higiene", 6 | "#org+es": "ACNUR", 7 | "#targeted+f": "100", 8 | "#targeted+m": "100", 9 | "#country": "Panamá", 10 | "#adm1": "Los Santos", 11 | "#date+reported": "1 March 2015" 12 | }, 13 | { 14 | "": "002", 15 | "#sector+es": "Salud", 16 | "#subsector+es": "Vacunación", 17 | "#org+es": "OMS", 18 | "#country": "Colombia", 19 | "#adm1": "Cauca" 20 | }, 21 | { 22 | "": "003", 23 | "#sector+es": "Educación", 24 | "#subsector+es": "Formación de enseñadores", 25 | "#org+es": "UNICEF", 26 | "#targeted+f": "250", 27 | "#targeted+m": "300", 28 | "#country": "Colombia", 29 | "#adm1": "Chocó" 30 | }, 31 | { 32 | "": "004", 33 | "#sector+es": "WASH", 34 | "#subsector+es": "Urbano", 35 | "#org+es": "OMS", 36 | "#targeted+f": "80", 37 | "#targeted+m": "95", 38 | "#country": "Venezuela", 39 | "#adm1": 40 | "Amazonas" 41 | } 42 | ] 43 | -------------------------------------------------------------------------------- /tests/files/test_io/input-valid-xlsx.NOEXT: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HXLStandard/libhxl-python/5d18d9f34e269954583e9f1b3ec3e4eb0386a63d/tests/files/test_io/input-valid-xlsx.NOEXT -------------------------------------------------------------------------------- /tests/files/test_io/input-valid.csv: -------------------------------------------------------------------------------- 1 | ,Qué?,,¿Quién?,¿Para quién?,,¿Dónde?,,¿Cuándo? 2 | Registro,Sector/Cluster,Subsector,Organización,Hombres,Mujeres,País,Departamento/Provincia/Estado 3 | ,#sector+es,#subsector+es,#org+es,#targeted+f,#targeted+m,#country,#adm1,#date+reported 4 | 001,WASH,Higiene,ACNUR,100,100,Panamá,Los Santos,1 March 2015 5 | 002,Salud,Vacunación,OMS,,,Colombia,Cauca, 6 | 003,Educación,Formación de enseñadores,UNICEF,250,300,Colombia,Chocó, 7 | 004,WASH,Urbano,OMS,80,95,Venezuela,Amazonas, 8 | -------------------------------------------------------------------------------- /tests/files/test_io/input-valid.hxl: -------------------------------------------------------------------------------- 1 | ,Qué?,,¿Quién?,¿Para quién?,,¿Dónde?,,¿Cuándo? 2 | Registro,Sector/Cluster,Subsector,Organización,Hombres,Mujeres,País,Departamento/Provincia/Estado 3 | ,#sector+es,#subsector+es,#org+es,#targeted+f,#targeted+m,#country,#adm1,#date+reported 4 | 001,WASH,Higiene,ACNUR,100,100,Panamá,Los Santos,1 March 2015 5 | 002,Salud,Vacunación,OMS,,,Colombia,Cauca, 6 | 003,Educación,Formación de enseñadores,UNICEF,250,300,Colombia,Chocó, 7 | 004,WASH,Urbano,OMS,80,95,Venezuela,Amazonas, 8 | -------------------------------------------------------------------------------- /tests/files/test_io/input-valid.json: -------------------------------------------------------------------------------- 1 | [ 2 | ["Qué?", "", "", "Quién?", "Para quién?", "", "Dónde?", "Cuándo?"], 3 | ["Registro", "Sector/Cluster", "Subsector", "Organización", "Hombres", "Mujeres", "País", "Departamento/Provincia/Estado"], 4 | ["", "#sector+es", "#subsector+es", "#org+es", "#targeted+f", "#targeted+m", "#country", "#adm1", "#date+reported"], 5 | ["001", "WASH", "Higiene", "ACNUR", "100", "100", "Panamá", "Los Santos", "1 March 2015"], 6 | ["002", "Salud", "Vacunación", "OMS", "", "", "Colombia", "Cauca", ""], 7 | ["003", "Educación", "Formación de enseñadores", "UNICEF", "250", "300", "Colombia", "Chocó", ""], 8 | ["004", "WASH", "Urbano", "OMS", "80", "95", "Venezuela", "Amazonas", ""] 9 | ] 10 | -------------------------------------------------------------------------------- /tests/files/test_io/input-valid.ssv: -------------------------------------------------------------------------------- 1 | Qué?;;;Quién?;Para quién?;;Dónde?;Cuándo? 2 | Registro;Sector/Cluster;Subsector;Organización;Hombres;Mujeres;País;Departamento/Provincia/Estado 3 | ;#sector+es;#subsector+es;#org+es;#targeted+f;#targeted+m;#country;#adm1;#date+reported 4 | 001;WASH;Higiene;ACNUR;100;100;Panamá;Los Santos;1 March 2015 5 | 002;Salud;Vacunación;OMS;;;Colombia;Cauca; 6 | 003;Educación;Formación de enseñadores;UNICEF;250;300;Colombia;Chocó; 7 | 004;WASH;Urbano;OMS;80;95;Venezuela;Amazonas; 8 | -------------------------------------------------------------------------------- /tests/files/test_io/input-valid.tsv: -------------------------------------------------------------------------------- 1 | Qué? Quién? Para quién? Dónde? Cuándo? 2 | Registro Sector/Cluster Subsector Organización Hombres Mujeres País Departamento/Provincia/Estado 3 | #sector+es #subsector+es #org+es #targeted+f #targeted+m #country #adm1 #date+reported 4 | 001 WASH Higiene ACNUR 100 100 Panamá Los Santos 1 March 2015 5 | 002 Salud Vacunación OMS Colombia Cauca 6 | 003 Educación Formación de enseñadores UNICEF 250 300 Colombia Chocó 7 | 004 WASH Urbano OMS 80 95 Venezuela Amazonas 8 | -------------------------------------------------------------------------------- /tests/files/test_io/input-valid.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HXLStandard/libhxl-python/5d18d9f34e269954583e9f1b3ec3e4eb0386a63d/tests/files/test_io/input-valid.xls -------------------------------------------------------------------------------- /tests/files/test_io/input-valid.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HXLStandard/libhxl-python/5d18d9f34e269954583e9f1b3ec3e4eb0386a63d/tests/files/test_io/input-valid.xlsx -------------------------------------------------------------------------------- /tests/files/test_io/input-zip-invalid.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HXLStandard/libhxl-python/5d18d9f34e269954583e9f1b3ec3e4eb0386a63d/tests/files/test_io/input-zip-invalid.zip -------------------------------------------------------------------------------- /tests/files/test_io/output-valid-objects.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "#adm1": "Los Santos", 4 | "#country": "Panam\u00e1", 5 | "#date+reported": "1 March 2015", 6 | "#org+es": "ACNUR", 7 | "#sector+es": "WASH", 8 | "#subsector+es": "Higiene", 9 | "#targeted+f": "100", 10 | "#targeted+m": "100" 11 | }, 12 | { 13 | "#adm1": "Cauca", 14 | "#country": "Colombia", 15 | "#date+reported": "", 16 | "#org+es": "OMS", 17 | "#sector+es": "Salud", 18 | "#subsector+es": "Vacunaci\u00f3n", 19 | "#targeted+f": "", 20 | "#targeted+m": "" 21 | }, 22 | { 23 | "#adm1": "Choc\u00f3", 24 | "#country": "Colombia", 25 | "#date+reported": "", 26 | "#org+es": "UNICEF", 27 | "#sector+es": "Educaci\u00f3n", 28 | "#subsector+es": "Formaci\u00f3n de ense\u00f1adores", 29 | "#targeted+f": "250", 30 | "#targeted+m": "300" 31 | }, 32 | { 33 | "#adm1": "Amazonas", 34 | "#country": "Venezuela", 35 | "#date+reported": "", 36 | "#org+es": "OMS", 37 | "#sector+es": "WASH", 38 | "#subsector+es": "Urbano", 39 | "#targeted+f": "80", 40 | "#targeted+m": "95" 41 | } 42 | ] 43 | -------------------------------------------------------------------------------- /tests/files/test_io/output-valid.csv: -------------------------------------------------------------------------------- 1 | Registro,Sector/Cluster,Subsector,Organización,Hombres,Mujeres,País,Departamento/Provincia/Estado, 2 | ,#sector+es,#subsector+es,#org+es,#targeted+f,#targeted+m,#country,#adm1,#date+reported 3 | 001,WASH,Higiene,ACNUR,100,100,Panamá,Los Santos,1 March 2015 4 | 002,Salud,Vacunación,OMS,,,Colombia,Cauca, 5 | 003,Educación,Formación de enseñadores,UNICEF,250,300,Colombia,Chocó, 6 | 004,WASH,Urbano,OMS,80,95,Venezuela,Amazonas, 7 | -------------------------------------------------------------------------------- /tests/files/test_io/output-valid.json: -------------------------------------------------------------------------------- 1 | [ 2 | ["Registro", "Sector/Cluster", "Subsector", "Organizaci\u00f3n", "Hombres", "Mujeres", "Pa\u00eds", "Departamento/Provincia/Estado", null], 3 | ["", "#sector+es", "#subsector+es", "#org+es", "#targeted+f", "#targeted+m", "#country", "#adm1", "#date+reported"], 4 | ["001", "WASH", "Higiene", "ACNUR", "100", "100", "Panam\u00e1", "Los Santos", "1 March 2015"], 5 | ["002", "Salud", "Vacunaci\u00f3n", "OMS", "", "", "Colombia", "Cauca", ""], 6 | ["003", "Educaci\u00f3n", "Formaci\u00f3n de ense\u00f1adores", "UNICEF", "250", "300", "Colombia", "Choc\u00f3", ""], 7 | ["004", "WASH", "Urbano", "OMS", "80", "95", "Venezuela", "Amazonas", ""] 8 | ] 9 | -------------------------------------------------------------------------------- /tests/files/test_scripts/add-output-before.csv: -------------------------------------------------------------------------------- 1 | ,Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #date+reported,#sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | 2015-03-31,WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 4 | 2015-03-31,WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 5 | 2015-03-31,Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 6 | 2015-03-31,Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 7 | 2015-03-31,Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 8 | 2015-03-31,Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 9 | 2015-03-31,WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 10 | 2015-03-31,WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/add-output-default.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado, 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1,#date+reported 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos,2015-03-31 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos,2015-03-31 5 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca,2015-03-31 6 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca,2015-03-31 7 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó,2015-03-31 8 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó,2015-03-31 9 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas,2015-03-31 10 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas,2015-03-31 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/add-output-headers.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado,Report Date 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1,#date+reported 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos,2015-03-31 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos,2015-03-31 5 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca,2015-03-31 6 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca,2015-03-31 7 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó,2015-03-31 8 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó,2015-03-31 9 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas,2015-03-31 10 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas,2015-03-31 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/append-dataset.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 5 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 6 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 7 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 8 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 9 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 10 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 11 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 12 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 13 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 14 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 15 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 16 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 17 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 18 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 19 | -------------------------------------------------------------------------------- /tests/files/test_scripts/clean-output-headers.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 5 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 6 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 7 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 8 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 9 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 10 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/clean-output-lower.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | wash,higiene,ACNUR,Hombres,100,Panamá,Los Santos 4 | wash,higiene,ACNUR,Mujeres,100,Panamá,Los Santos 5 | salud,vacunación,OMS,Hombres,,Colombia,Cauca 6 | salud,vacunación,OMS,Mujeres,,Colombia,Cauca 7 | educación,formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 8 | educación,formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 9 | wash,urbano,OMS,Hombres,80,Venezuela,Amazonas 10 | wash,urbano,OMS,Mujeres,95,Venezuela,Amazonas 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/clean-output-noheaders.csv: -------------------------------------------------------------------------------- 1 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 2 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 3 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 4 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 5 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 6 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 7 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 8 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 9 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 10 | -------------------------------------------------------------------------------- /tests/files/test_scripts/clean-output-upper.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | WASH,HIGIENE,ACNUR,Hombres,100,Panamá,Los Santos 4 | WASH,HIGIENE,ACNUR,Mujeres,100,Panamá,Los Santos 5 | SALUD,VACUNACIÓN,OMS,Hombres,,Colombia,Cauca 6 | SALUD,VACUNACIÓN,OMS,Mujeres,,Colombia,Cauca 7 | EDUCACIÓN,FORMACIÓN DE ENSEÑADORES,UNICEF,Hombres,250,Colombia,Chocó 8 | EDUCACIÓN,FORMACIÓN DE ENSEÑADORES,UNICEF,Mujeres,300,Colombia,Chocó 9 | WASH,URBANO,OMS,Hombres,80,Venezuela,Amazonas 10 | WASH,URBANO,OMS,Mujeres,95,Venezuela,Amazonas 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/clean-output-whitespace-all.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 5 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 6 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 7 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 8 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 9 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 10 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/clean-output-whitespace-tags.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 5 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 6 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 7 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 8 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 9 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 10 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/count-output-aggregated.csv: -------------------------------------------------------------------------------- 1 | Organización,Departamento/Provincia/Estado,Total targeted 2 | #org,#adm1,#targeted+total 3 | ACNUR,Los Santos,200 4 | OMS,Amazonas,175 5 | OMS,Cauca, 6 | UNICEF,Chocó,550 7 | -------------------------------------------------------------------------------- /tests/files/test_scripts/count-output-colspec.csv: -------------------------------------------------------------------------------- 1 | Organización,Departamento/Provincia/Estado,Activities 2 | #org,#adm1,#output+activities 3 | ACNUR,Los Santos,2 4 | OMS,Amazonas,2 5 | OMS,Cauca,2 6 | UNICEF,Chocó,2 7 | -------------------------------------------------------------------------------- /tests/files/test_scripts/count-output-simple.csv: -------------------------------------------------------------------------------- 1 | Organización,Departamento/Provincia/Estado,Count 2 | #org,#adm1,#meta+count 3 | ACNUR,Los Santos,2 4 | OMS,Amazonas,2 5 | OMS,Cauca,2 6 | UNICEF,Chocó,2 7 | -------------------------------------------------------------------------------- /tests/files/test_scripts/cut-output-excludes.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#country,#adm1 3 | WASH,Higiene,ACNUR,Panamá,Los Santos 4 | WASH,Higiene,ACNUR,Panamá,Los Santos 5 | Salud,Vacunación,OMS,Colombia,Cauca 6 | Salud,Vacunación,OMS,Colombia,Cauca 7 | Educación,Formación de enseñadores,UNICEF,Colombia,Chocó 8 | Educación,Formación de enseñadores,UNICEF,Colombia,Chocó 9 | WASH,Urbano,OMS,Venezuela,Amazonas 10 | WASH,Urbano,OMS,Venezuela,Amazonas 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/cut-output-includes.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Organización,Departamento/Provincia/Estado 2 | #sector,#org,#adm1 3 | WASH,ACNUR,Los Santos 4 | WASH,ACNUR,Los Santos 5 | Salud,OMS,Cauca 6 | Salud,OMS,Cauca 7 | Educación,UNICEF,Chocó 8 | Educación,UNICEF,Chocó 9 | WASH,OMS,Amazonas 10 | WASH,OMS,Amazonas 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/input-date.csv: -------------------------------------------------------------------------------- 1 | Date,Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #date+reported,#sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | 1 March 2015,WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 4 | 03-18-14,WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 5 | 2015-03-01,Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 6 | 2015-03-02,Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 7 | 2015-03-10,Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 8 | 2015-03-11,Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 9 | 19/3/15,WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 10 | "March 31, 2015",WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/input-merge.csv: -------------------------------------------------------------------------------- 1 | Sector,Current status 2 | #sector,#status 3 | WASH,activo 4 | Salud,inactivo 5 | Educación, -------------------------------------------------------------------------------- /tests/files/test_scripts/input-simple.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 5 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 6 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 7 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 8 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 9 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 10 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/input-status.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado,Current status 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1,#status 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos, 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos, 5 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca,activo 6 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca,activo 7 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó,inactivo 8 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó,inactivo 9 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas,activo 10 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas,activo 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/input-untagged.csv: -------------------------------------------------------------------------------- 1 | Organisation,Cluster,Country,Subdivision 2 | ACNUR,WASH,Panamá,Los Santos 3 | OMS,Health,Colombia,Cauca 4 | UNICEF,Education,Colombia,Chocó 5 | OMS,WASH,Venezuela,Amazonas 6 | -------------------------------------------------------------------------------- /tests/files/test_scripts/input-whitespace.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 4 | WASH, Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 5 | Salud,Vacunación ,OMS,Hombres,,Colombia,Cauca 6 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 7 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 8 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 9 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 10 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/merge-output-basic.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado,Current status 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1,#status 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos,activo 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos,activo 5 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca,inactivo 6 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca,inactivo 7 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó, 8 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó, 9 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas,activo 10 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas,activo 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/merge-output-overwrite.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado,Current status 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1,#status 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos,activo 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos,activo 5 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca,inactivo 6 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca,inactivo 7 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó, 8 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó, 9 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas,activo 10 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas,activo 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/merge-output-replace.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado,Current status 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1,#status 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos,activo 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos,activo 5 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca,activo 6 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca,activo 7 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó,inactivo 8 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó,inactivo 9 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas,activo 10 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas,activo 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/rename-output-header.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Affected,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#affected,#country,#adm1 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 5 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 6 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 7 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 8 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 9 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 10 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/rename-output-multiple.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#funding,#population+sex,#affected,#country,#adm1 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 5 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 6 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 7 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 8 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 9 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 10 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/rename-output-single.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#affected,#country,#adm1 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 5 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 6 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 7 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 8 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 9 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 10 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/select-output-eq.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 5 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 6 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 7 | -------------------------------------------------------------------------------- /tests/files/test_scripts/select-output-ge.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 5 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 6 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 7 | -------------------------------------------------------------------------------- /tests/files/test_scripts/select-output-gt.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 4 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 5 | -------------------------------------------------------------------------------- /tests/files/test_scripts/select-output-le.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 5 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 6 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 7 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 8 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 9 | -------------------------------------------------------------------------------- /tests/files/test_scripts/select-output-lt.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 5 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 6 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 7 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 8 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 9 | -------------------------------------------------------------------------------- /tests/files/test_scripts/select-output-multiple.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 5 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 6 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 7 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 8 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 9 | -------------------------------------------------------------------------------- /tests/files/test_scripts/select-output-ne.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 4 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 5 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 6 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 7 | -------------------------------------------------------------------------------- /tests/files/test_scripts/select-output-nre.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 4 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 5 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 6 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 7 | -------------------------------------------------------------------------------- /tests/files/test_scripts/select-output-re.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 4 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 5 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 6 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 7 | -------------------------------------------------------------------------------- /tests/files/test_scripts/select-output-reverse.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 4 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 5 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 6 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 7 | -------------------------------------------------------------------------------- /tests/files/test_scripts/sort-output-date.csv: -------------------------------------------------------------------------------- 1 | Date,Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #date+reported,#sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | 03-18-14,WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 4 | 1 March 2015,WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 5 | 2015-03-01,Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 6 | 2015-03-02,Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 7 | 2015-03-10,Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 8 | 2015-03-11,Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 9 | 19/3/15,WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 10 | "March 31, 2015",WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/sort-output-default.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 4 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 5 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 6 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 7 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 8 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 9 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 10 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/sort-output-numeric.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 4 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 5 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 6 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 7 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 8 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 9 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 10 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/sort-output-reverse.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 4 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 5 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 6 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 7 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 8 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 9 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 10 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/sort-output-tags.csv: -------------------------------------------------------------------------------- 1 | Sector/Cluster,Subsector,Organización,Sex,Targeted,País,Departamento/Provincia/Estado 2 | #sector,#subsector,#org,#population+sex,#targeted,#country,#adm1 3 | Salud,Vacunación,OMS,Hombres,,Colombia,Cauca 4 | Salud,Vacunación,OMS,Mujeres,,Colombia,Cauca 5 | Educación,Formación de enseñadores,UNICEF,Hombres,250,Colombia,Chocó 6 | Educación,Formación de enseñadores,UNICEF,Mujeres,300,Colombia,Chocó 7 | WASH,Higiene,ACNUR,Hombres,100,Panamá,Los Santos 8 | WASH,Higiene,ACNUR,Mujeres,100,Panamá,Los Santos 9 | WASH,Urbano,OMS,Hombres,80,Venezuela,Amazonas 10 | WASH,Urbano,OMS,Mujeres,95,Venezuela,Amazonas 11 | -------------------------------------------------------------------------------- /tests/files/test_scripts/tag-output-ambiguous.csv: -------------------------------------------------------------------------------- 1 | Organisation,Cluster,Country,Subdivision 2 | #org,,,#adm1 3 | ACNUR,WASH,Panamá,Los Santos 4 | OMS,Health,Colombia,Cauca 5 | UNICEF,Education,Colombia,Chocó 6 | OMS,WASH,Venezuela,Amazonas 7 | -------------------------------------------------------------------------------- /tests/files/test_scripts/tag-output-default.csv: -------------------------------------------------------------------------------- 1 | Organisation,Cluster,Country,Subdivision 2 | #org,#meta,#meta,#meta 3 | ACNUR,WASH,Panamá,Los Santos 4 | OMS,Health,Colombia,Cauca 5 | UNICEF,Education,Colombia,Chocó 6 | OMS,WASH,Venezuela,Amazonas 7 | -------------------------------------------------------------------------------- /tests/files/test_scripts/tag-output-full.csv: -------------------------------------------------------------------------------- 1 | Organisation,Cluster,Country,Subdivision 2 | #org,#sector,#country,#adm1 3 | ACNUR,WASH,Panamá,Los Santos 4 | OMS,Health,Colombia,Cauca 5 | UNICEF,Education,Colombia,Chocó 6 | OMS,WASH,Venezuela,Amazonas 7 | -------------------------------------------------------------------------------- /tests/files/test_scripts/tag-output-notsubstrings.csv: -------------------------------------------------------------------------------- 1 | Organisation,Cluster,Country,Subdivision 2 | ,#sector,, 3 | ACNUR,WASH,Panamá,Los Santos 4 | OMS,Health,Colombia,Cauca 5 | UNICEF,Education,Colombia,Chocó 6 | OMS,WASH,Venezuela,Amazonas 7 | -------------------------------------------------------------------------------- /tests/files/test_scripts/tag-output-partial.csv: -------------------------------------------------------------------------------- 1 | Organisation,Cluster,Country,Subdivision 2 | ,#sector,, 3 | ACNUR,WASH,Panamá,Los Santos 4 | OMS,Health,Colombia,Cauca 5 | UNICEF,Education,Colombia,Chocó 6 | OMS,WASH,Venezuela,Amazonas 7 | -------------------------------------------------------------------------------- /tests/files/test_scripts/validation-schema-invalid.csv: -------------------------------------------------------------------------------- 1 | #valid_tag,#valid_datatype 2 | #sector,number 3 | -------------------------------------------------------------------------------- /tests/files/test_scripts/validation-schema-valid.csv: -------------------------------------------------------------------------------- 1 | #valid_tag,#valid_datatype 2 | #sector,text 3 | -------------------------------------------------------------------------------- /tests/files/test_validation/truthy-schema.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "#valid_tag": "#adm2+code", 4 | "#valid_severity": "error", 5 | "#valid_required": true 6 | } 7 | ] 8 | -------------------------------------------------------------------------------- /tests/test_converters.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Unit tests for converters 4 | David Megginson 5 | June 2016 6 | 7 | License: Public Domain 8 | """ 9 | 10 | import unittest 11 | import hxl 12 | from . import resolve_path 13 | 14 | 15 | class TaggerTest(unittest.TestCase): 16 | """Unit tests for hxl.converters.Tagger""" 17 | 18 | UNTAGGED = [ 19 | ['Country Name', 'Country Code', '2016', '2015', '2014', '2013', '2012'], 20 | ['Sudan', 'SUD', '10000', '8500', '9000', '7500', '6000'], 21 | ['Syria', 'SYR', '100000', '85000', '90000', '75000', '60000'], 22 | ['Yemen', 'YEM', '50000', '43000', '45000', '38000', '30000'] 23 | ] 24 | 25 | EXPECTED_TAGS_SIMPLE = ['#country+name', '#country+code', '', '', '', '', ''] 26 | 27 | EXPECTED_TAGS_DEFAULT = ['#country+name', '#country+code', '#targeted', '#targeted', '#targeted', '#targeted', '#targeted'] 28 | 29 | def setUp(self): 30 | pass 31 | 32 | def test_basic(self): 33 | """Basic tagging operation.""" 34 | tagging_specs = [('Country Name', '#country+name'), ('Country Code', '#country+code')] 35 | source = hxl.tagger(self.UNTAGGED, tagging_specs) 36 | self.assertEqual(self.EXPECTED_TAGS_SIMPLE, source.display_tags) 37 | 38 | def test_case_insensitive(self): 39 | """Test that the tagger is case-insensitive.""" 40 | tagging_specs = [('country name', '#country+name'), ('code', '#country+code')] 41 | source = hxl.data(hxl.converters.Tagger(self.UNTAGGED, tagging_specs)) 42 | self.assertEqual(self.EXPECTED_TAGS_SIMPLE, source.display_tags) 43 | 44 | def test_space_insensitive(self): 45 | """Test that the tagger is whitespace-insensitive.""" 46 | tagging_specs = [(' Country Name', '#country+name'), ('Country Code ', '#country+code')] 47 | source = hxl.data(hxl.converters.Tagger(self.UNTAGGED, tagging_specs)) 48 | self.assertEqual(self.EXPECTED_TAGS_SIMPLE, source.display_tags) 49 | 50 | def test_partial_match(self): 51 | """Test for substrings.""" 52 | tagging_specs = [('name', '#country+name'), ('code', '#country+code')] 53 | source = hxl.data(hxl.converters.Tagger(self.UNTAGGED, tagging_specs)) 54 | self.assertEqual(self.EXPECTED_TAGS_SIMPLE, source.display_tags) 55 | 56 | def test_full_match(self): 57 | """Test for full match.""" 58 | tagging_specs = [('country name', '#country+name'), ('code', '#country+code')] 59 | source = hxl.data(hxl.converters.Tagger(self.UNTAGGED, tagging_specs, match_all=True)) 60 | self.assertEqual(['#country+name', '', '', '', '', '', ''], source.display_tags) 61 | 62 | def test_default_tag(self): 63 | """Test for default tag.""" 64 | tagging_specs = [('Country Name', '#country+name'), ('Country Code', '#country+code')] 65 | source = hxl.data(hxl.converters.Tagger(self.UNTAGGED, tagging_specs, default_tag='#targeted')) 66 | self.assertEqual(self.EXPECTED_TAGS_DEFAULT, source.display_tags) 67 | 68 | def test_wide_data(self): 69 | """Test for very wide data""" 70 | tagging_specs = [ 71 | ('cod_wardsr', '#adm3+code',), 72 | ('food_monthly', '#value+expenditure+food_monthly',), 73 | ] 74 | filename = resolve_path("files/test_converters/wide-tagging-test.csv") 75 | source = hxl.data(hxl.converters.Tagger(hxl.input.make_input(filename, hxl.input.InputOptions(allow_local=True)), tagging_specs)).cache() 76 | self.assertTrue('#value+expenditure+food_monthly' in source.display_tags) 77 | 78 | -------------------------------------------------------------------------------- /tests/test_datatypes.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unit tests for the hxl.datatypes module 3 | David Megginson 4 | April 2018 5 | 6 | License: Public Domain 7 | """ 8 | 9 | import hxl.datatypes, unittest 10 | 11 | 12 | class TestStrings(unittest.TestCase): 13 | 14 | def test_empty(self): 15 | self.assertTrue(hxl.datatypes.is_empty(None)) 16 | self.assertTrue(hxl.datatypes.is_empty('')) 17 | self.assertTrue(hxl.datatypes.is_empty(' ')) 18 | self.assertTrue(hxl.datatypes.is_empty(" \t\r\n ")) 19 | 20 | def test_not_empty(self): 21 | self.assertFalse(hxl.datatypes.is_empty(0)) 22 | self.assertFalse(hxl.datatypes.is_empty('0')) 23 | self.assertFalse(hxl.datatypes.is_empty(' x ')) 24 | 25 | def test_normalise(self): 26 | self.assertEqual('', hxl.datatypes.normalise_string(None)) 27 | self.assertEqual('', hxl.datatypes.normalise_string(' ')) 28 | self.assertEqual('3.0', hxl.datatypes.normalise_string(3.0)) 29 | self.assertEqual('foo', hxl.datatypes.normalise_string(' FoO ')) 30 | self.assertEqual('foo bar', hxl.datatypes.normalise_string(" FOO \r\n bAr ")) 31 | 32 | class TestNumbers(unittest.TestCase): 33 | 34 | def test_is_number(self): 35 | self.assertTrue(hxl.datatypes.is_number(1)) 36 | self.assertTrue(hxl.datatypes.is_number(' 1 ')) 37 | self.assertTrue(hxl.datatypes.is_number(1.1)) 38 | self.assertTrue(hxl.datatypes.is_number(' 1.1 ')) 39 | self.assertTrue(hxl.datatypes.is_number(-1)) 40 | self.assertTrue(hxl.datatypes.is_number('-1')) 41 | self.assertTrue(hxl.datatypes.is_number('2.1e10')) 42 | 43 | def test_not_number(self): 44 | self.assertFalse(hxl.datatypes.is_number('1x')) 45 | 46 | def test_normalise(self): 47 | self.assertEqual(1, hxl.datatypes.normalise_number(1.0)) 48 | self.assertEqual(1, hxl.datatypes.normalise_number('1.0')) 49 | self.assertEqual(1.1, hxl.datatypes.normalise_number(1.1)) 50 | self.assertEqual(1.1, hxl.datatypes.normalise_number('1.1')) 51 | 52 | def test_normalise_exception(self): 53 | seen_exception = False 54 | try: 55 | hxl.datatypes.normalise_number('foo') 56 | except ValueError: 57 | seen_exception = True 58 | self.assertTrue(seen_exception) 59 | 60 | class TestDates(unittest.TestCase): 61 | 62 | def test_is_iso_date(self): 63 | self.assertTrue(hxl.datatypes.is_date('2018')) 64 | self.assertTrue(hxl.datatypes.is_date(' 2018 ')) 65 | self.assertTrue(hxl.datatypes.is_date('2018W2')) 66 | self.assertTrue(hxl.datatypes.is_date('2018-03')) 67 | self.assertTrue(hxl.datatypes.is_date('2018-03-01')) 68 | 69 | # ISO edge cases 70 | self.assertFalse(hxl.datatypes.is_date('2018-04-31')) 71 | self.assertFalse(hxl.datatypes.is_date('2018-13-01')) 72 | self.assertFalse(hxl.datatypes.is_date('2018W54')) 73 | 74 | def test_iso_datetime(self): 75 | self.assertTrue(hxl.datatypes.is_date("2011-01-01T00:00:00.000Z")) 76 | self.assertEqual('2011-01-01', hxl.datatypes.normalise_date("2011-01-01T00:00:00.000Z")) 77 | 78 | def test_sql_datetime(self): 79 | self.assertTrue(hxl.datatypes.is_date('2017-12-01 00:00:00')) 80 | self.assertEqual('2017-12-01', hxl.datatypes.normalise_date('2017-12-01 00:00:00')) 81 | 82 | def test_rfc822_datetime(self): 83 | self.assertTrue(hxl.datatypes.is_date("30 May 2018 02:57:50 GMT")) 84 | self.assertTrue(hxl.datatypes.is_date("Thu, 30 May 2018 02:57:50 GMT")) 85 | self.assertEqual('2018-05-30', hxl.datatypes.normalise_date("Thu, 30 May 2018 02:57:50 GMT")) 86 | 87 | def test_normalise_date_dayfirst(self): 88 | self.assertEqual('2018-11-10', hxl.datatypes.normalise_date('10-11-18', dayfirst=True)) 89 | self.assertEqual('2018-10-11', hxl.datatypes.normalise_date('10-11-18', dayfirst=False)) 90 | 91 | def test_is_quarter(self): 92 | self.assertTrue(hxl.datatypes.is_date('2018Q2')) 93 | self.assertFalse(hxl.datatypes.is_date('2018Q5')) 94 | 95 | def test_is_non_iso_date(self): 96 | self.assertTrue(hxl.datatypes.is_date('Feb 2/17')) 97 | self.assertTrue(hxl.datatypes.is_date('Feb 2 17')) 98 | self.assertTrue(hxl.datatypes.is_date('Feb 2 2017')) 99 | self.assertTrue(hxl.datatypes.is_date('12 June 2017')) 100 | 101 | def test_not_date(self): 102 | self.assertFalse(hxl.datatypes.is_date('Feb Feb 2017')) 103 | self.assertFalse(hxl.datatypes.is_date('13.13.2017')) 104 | 105 | def test_normalise_iso_date(self): 106 | self.assertEqual('2008', hxl.datatypes.normalise_date('2008')) 107 | self.assertEqual('2008-01', hxl.datatypes.normalise_date('2008-01')) 108 | self.assertEqual('2008-01', hxl.datatypes.normalise_date('2008-1')) 109 | self.assertEqual('2008-01-01', hxl.datatypes.normalise_date('2008-01-01')) 110 | self.assertEqual('2008-01-01', hxl.datatypes.normalise_date('2008-1-1')) 111 | self.assertEqual('2008W01', hxl.datatypes.normalise_date('2008w1')) 112 | self.assertEqual('2008Q1', hxl.datatypes.normalise_date('2008q1')) 113 | 114 | def test_normalise_other_date(self): 115 | self.assertEqual('2008-01-20', hxl.datatypes.normalise_date('Jan 20, 2008')) 116 | self.assertEqual('2008-01-20', hxl.datatypes.normalise_date('01-20-2008')) 117 | self.assertEqual('2008-01-20', hxl.datatypes.normalise_date('20-01-2008')) 118 | self.assertEqual('2008-01', hxl.datatypes.normalise_date('Jan 2008')) 119 | 120 | def test_partial_dates(self): 121 | # Year alone is OK 122 | self.assertTrue(hxl.datatypes.normalise_date('2018')) 123 | 124 | # Month alone is OK (supply current year) 125 | self.assertTrue(hxl.datatypes.normalise_date('July')) 126 | 127 | # Day alone is not OK 128 | with self.assertRaises(ValueError): 129 | hxl.datatypes.normalise_date('30') 130 | 131 | def test_epoch_seconds(self): 132 | """ Seconds since epoch """ 133 | self.assertEqual('2022-09-26', hxl.datatypes.normalise_date('1664212110')) 134 | 135 | def test_epoch_days(self): 136 | """ Days since epoch """ 137 | self.assertEqual('2016-07-17', hxl.datatypes.normalise_date('17000')) 138 | 139 | 140 | class TestFlatten(unittest.TestCase): 141 | 142 | def test_none(self): 143 | self.assertEqual('', hxl.datatypes.flatten(None)) 144 | 145 | def test_number(self): 146 | self.assertEqual("3", hxl.datatypes.flatten(3)) 147 | 148 | def test_string(self): 149 | self.assertEqual("xxx", hxl.datatypes.flatten("xxx")) 150 | 151 | def test_list(self): 152 | input = ['a', 'b', ['c', 'd'], 'e'] 153 | output = '["a", "b", ["c", "d"], "e"]' 154 | self.assertEqual(output, hxl.datatypes.flatten(input)) 155 | 156 | def test_non_json(self): 157 | input = ['a', 'b', ['c', 'd'], 'e'] 158 | output = 'a | b | c | d | e' 159 | self.assertEqual(output, hxl.datatypes.flatten(input, use_json=False)) 160 | 161 | def test_dict(self): 162 | input = {'a': 'b', 'c': ['d', 'e']} 163 | output = '{"a": "b", "c": ["d", "e"]}' 164 | self.assertEqual(output, hxl.datatypes.flatten(input)) 165 | -------------------------------------------------------------------------------- /tests/test_formulas.py: -------------------------------------------------------------------------------- 1 | """Unit tests for hxl.formulas.functions 2 | """ 3 | 4 | import unittest 5 | import hxl.model, datetime 6 | import hxl.formulas.functions as f, hxl.formulas.parser as p, hxl.formulas.lexer as l, hxl.formulas.eval as e 7 | 8 | 9 | class TestFunctions(unittest.TestCase): 10 | """Test the hxl.formulas.functions class""" 11 | 12 | TAGS = ["#org", "#adm1", "#affected+f+children", "#affected+m+children", "#affected+f+adults", "#affected+m+adults"] 13 | DATA = ["Org A", "Coast Region", "100", "200", "300", "400"] 14 | 15 | def setUp(self): 16 | columns = [hxl.model.Column.parse(tag) for tag in self.TAGS] 17 | self.row = hxl.model.Row(columns=columns, values=self.DATA) 18 | 19 | def test_add(self): 20 | 21 | # integers 22 | result = f.add(self.row, ['2', '3']) 23 | self.assertEqual(5, result) 24 | 25 | # float and integer 26 | result = f.add(self.row, ['2', '3.5']) 27 | self.assertEqual(5.5, result) 28 | 29 | # two tag patterns 30 | # should take only first match for each tag pattern 31 | result = f.add( 32 | self.row, 33 | map(hxl.model.TagPattern.parse, ['#affected+f', '#affected+m']) 34 | ) 35 | self.assertEqual(300, result) 36 | 37 | # tag pattern and integer 38 | result = f.add(self.row, [ 39 | hxl.model.TagPattern.parse('#affected+f'), 40 | '150' 41 | ]) 42 | self.assertEqual(250, result) 43 | 44 | # ignore strings 45 | result = f.add(self.row, [ 46 | hxl.model.TagPattern.parse('#org'), 47 | '150' 48 | ]) 49 | self.assertEqual(150, result) 50 | 51 | def test_subtract(self): 52 | 53 | # integers 54 | result = f.subtract(self.row, ['2', '3']) 55 | self.assertEqual(-1, result) 56 | 57 | # float and integer 58 | result = f.subtract(self.row, ['4', '3.5']) 59 | self.assertEqual(0.5, result) 60 | 61 | # two tag patterns 62 | # should take only first match for each tag pattern 63 | result = f.subtract( 64 | self.row, 65 | map(hxl.model.TagPattern.parse, ['#affected+m', '#affected+f']) 66 | ) 67 | self.assertEqual(100, result) 68 | 69 | # tag pattern and integer 70 | result = f.subtract(self.row, [ 71 | hxl.model.TagPattern.parse('#affected+f'), 72 | '50' 73 | ]) 74 | self.assertEqual(50, result) 75 | 76 | def test_multiply(self): 77 | 78 | # integers 79 | result = f.multiply(self.row, ['2', '3']) 80 | self.assertEqual(6, result) 81 | 82 | # float and integer 83 | result = f.multiply(self.row, ['4', '3.5']) 84 | self.assertEqual(14, result) 85 | 86 | # two tag patterns 87 | # should take only first match for each tag pattern 88 | result = f.multiply( 89 | self.row, 90 | map(hxl.model.TagPattern.parse, ['#affected+m', '#affected+f']) 91 | ) 92 | self.assertEqual(20000, result) 93 | 94 | # tag pattern and integer 95 | result = f.multiply(self.row, [ 96 | hxl.model.TagPattern.parse('#affected+f'), 97 | '50' 98 | ]) 99 | self.assertEqual(5000, result) 100 | 101 | def test_divide(self): 102 | 103 | # integers 104 | result = f.divide(self.row, ['4', '2']) 105 | self.assertEqual(2, result) 106 | 107 | # float and integer 108 | result = f.divide(self.row, ['6', '1.5']) 109 | self.assertEqual(4, result) 110 | 111 | # two tag patterns 112 | # should take only first match for each tag pattern 113 | result = f.divide( 114 | self.row, 115 | map(hxl.model.TagPattern.parse, ['#affected+m', '#affected+f']) 116 | ) 117 | self.assertEqual(2, result) 118 | 119 | # tag pattern and integer 120 | result = f.divide(self.row, [ 121 | hxl.model.TagPattern.parse('#affected+f'), 122 | '50' 123 | ]) 124 | self.assertEqual(2, result) 125 | 126 | # avoid DIV0 127 | result = f.divide(self.row, ['100', '0']) 128 | self.assertEqual('NaN', result) 129 | 130 | # ignore strings 131 | result = f.divide(self.row, [ 132 | '150', 133 | hxl.model.TagPattern.parse('#org') 134 | ]) 135 | self.assertEqual('NaN', result) 136 | 137 | def test_modulo(self): 138 | 139 | # integers 140 | result = f.modulo(self.row, ['4', '2']) 141 | self.assertEqual(0, result) 142 | 143 | # float and integer 144 | result = f.modulo(self.row, ['5', '1.5']) 145 | self.assertEqual(0.5, result) 146 | 147 | # two tag patterns 148 | # should take only first match for each tag pattern 149 | result = f.modulo( 150 | self.row, 151 | map(hxl.model.TagPattern.parse, ['#affected+adults', '#affected+m']) 152 | ) 153 | self.assertEqual(100, result) # 300 % 200 154 | 155 | # tag pattern and integer 156 | result = f.modulo(self.row, [ 157 | hxl.model.TagPattern.parse('#affected+f'), 158 | '70' 159 | ]) 160 | self.assertEqual(30, result) # 100 % 70 161 | 162 | # avoid DIV0 163 | result = f.modulo(self.row, ['100', '0']) 164 | self.assertEqual(100, result) # 100 % 0 - ignore the 0 165 | 166 | # ignore strings 167 | result = f.modulo(self.row, [ 168 | '150', 169 | hxl.model.TagPattern.parse('#org') 170 | ]) 171 | self.assertEqual(150, result) # 150 % "Org A" - ignore the string 172 | 173 | def test_sum(self): 174 | 175 | # should take all matches for each tag pattern 176 | result = f.FUNCTIONS['sum']( 177 | self.row, 178 | [hxl.model.TagPattern.parse('#affected'), '100'], 179 | True 180 | ) 181 | self.assertEqual(1100, result) 182 | 183 | def test_product(self): 184 | 185 | # should take all matches for each tag pattern 186 | result = f.FUNCTIONS['product']( 187 | self.row, 188 | [hxl.model.TagPattern.parse('#affected'), '100'], 189 | True 190 | ) 191 | self.assertEqual(240000000000, result) 192 | 193 | def test_min(self): 194 | result = f.FUNCTIONS['min']( 195 | self.row, 196 | [hxl.model.TagPattern.parse('#affected')] 197 | ) 198 | self.assertEqual(100, result) 199 | 200 | def test_max(self): 201 | result = f.FUNCTIONS['max']( 202 | self.row, 203 | [hxl.model.TagPattern.parse('#affected')] 204 | ) 205 | self.assertEqual(400, result) 206 | 207 | def test_average(self): 208 | result = f.FUNCTIONS['average']( 209 | self.row, 210 | [hxl.model.TagPattern.parse('#affected')] 211 | ) 212 | self.assertEqual(250, result) 213 | 214 | def test_join(self): 215 | result = f.FUNCTIONS['join']( 216 | self.row, 217 | ['|', hxl.model.TagPattern.parse('#affected')] 218 | ) 219 | self.assertEqual('100|200|300|400', result) 220 | 221 | def test_today(self): 222 | F = f.FUNCTIONS['today'] 223 | today = datetime.datetime.utcnow().strftime("%Y-%m-%d") 224 | self.assertEqual(today, F(self.row, [])) 225 | 226 | def test_datedif(self): 227 | F = f.FUNCTIONS['datedif'] 228 | self.assertEqual(1, F( 229 | self.row, 230 | ['2018-01-01', '2019-03-01', 'Y'] 231 | )) 232 | self.assertEqual(2, F( 233 | self.row, 234 | ['2018-01-01', '2018-03-01', 'M'] 235 | )) 236 | self.assertEqual(5, F( 237 | self.row, 238 | ['2018-01-01', '2018-02-05', 'W'] 239 | )) 240 | self.assertEqual(31, F( 241 | self.row, 242 | ['2018-01-01', '2018-02-01', 'D'] 243 | )) 244 | 245 | def test_toupper(self): 246 | result = f.FUNCTIONS['toupper']( 247 | self.row, 248 | [hxl.model.TagPattern.parse('#adm1'), 'Coast Region'], 249 | False 250 | ) 251 | self.assertEqual('COAST REGION', result) 252 | 253 | def test_tolower(self): 254 | result = f.FUNCTIONS['tolower']( 255 | self.row, 256 | [hxl.model.TagPattern.parse('#adm1'), 'Coast Region'], 257 | False 258 | ) 259 | self.assertEqual('coast region', result) 260 | 261 | def test_embedded(self): 262 | 263 | result = f.multiply(self.row, [ 264 | [f.add, ['1', '2']], 265 | '3' 266 | ]) 267 | self.assertEqual(9, result) 268 | 269 | 270 | class TestParser(unittest.TestCase): 271 | """Test the hxl.formulas.lexer class""" 272 | 273 | def setUp(self): 274 | pass 275 | 276 | def parse(self, s): 277 | return p.parser.parse(s, lexer=l.lexer) 278 | 279 | def test_constants(self): 280 | self.assertEqual([f.const, [1]], self.parse("1")) 281 | self.assertEqual([f.const, [1.1]], self.parse("1.1")) 282 | self.assertEqual([f.const, ['foo']], self.parse('"foo"')) 283 | self.assertEqual([f.const, ["foo\tfoo"]], self.parse('"foo\\tfoo"')) 284 | 285 | def test_simple_math(self): 286 | self.assertEqual([f.add, [[f.const, [1]], [f.const, [1]]]], self.parse("1 + 1")) 287 | 288 | def test_groups(self): 289 | self.assertEqual( 290 | [f.multiply, [[f.const, [2]], [f.add, [[f.const, [1]], [f.const, [1]]]]]], 291 | self.parse("2 * (1 + 1)") 292 | ) 293 | 294 | def test_functions(self): 295 | self.assertEqual( 296 | [f.function, ['sum', [f.const, [1]], [f.const, [2]], [f.const, [3]]]], 297 | self.parse("sum(1, 2, 3)") 298 | ) 299 | 300 | class TestEval(unittest.TestCase): 301 | 302 | TAGS = ["#org", "#adm1", "#affected+f+children", "#affected+m+children", "#affected+f+adults", "#affected+m+adults"] 303 | DATA = ["Org A", "Coast Region", "100", "200", "300", "400"] 304 | 305 | def setUp(self): 306 | columns = [hxl.model.Column.parse(tag) for tag in self.TAGS] 307 | self.row = hxl.model.Row(columns=columns, values=self.DATA) 308 | 309 | def test_constant(self): 310 | self.assertEqual(10, e.eval(self.row, '10')) 311 | 312 | def test_simple(self): 313 | self.assertEqual(2, e.eval(self.row, '1 + 1')) 314 | 315 | def test_non_existant_tag(self): 316 | # non-existant should be zero in numeric calculations 317 | self.assertEqual(0, e.eval(self.row, "#xxx * 100")) 318 | 319 | def test_string_in_calc(self): 320 | self.assertEqual(100, e.eval(self.row, "#org + #affected+f+children")) 321 | self.assertEqual(0, e.eval(self.row, "#org * #affected+f+children")) 322 | 323 | def test_div0(self): 324 | self.assertEqual('NaN', e.eval(self.row, '#affected+m+children / 0')) 325 | self.assertEqual('NaN', e.eval(self.row, '#affected+m+children / #org')) 326 | 327 | def test_order_of_operations(self): 328 | self.assertEqual(7, e.eval(self.row, '1 + 2 * 3')) 329 | self.assertEqual(20100, e.eval(self.row, '#affected+f+children + #affected+m+children * 100')) 330 | self.assertEqual(30000, e.eval(self.row, '(#affected+f+children + #affected+m+children) * 100')) 331 | 332 | def test_complex_results(self): 333 | self.assertEqual(50, e.eval(self.row, '#affected+m+children / #affected+m+adults * 100')) 334 | 335 | def test_tagpatterns(self): 336 | self.assertEqual(300, e.eval(self.row, '#affected+f+children + #affected+m+children')) 337 | 338 | def test_min_function(self): 339 | self.assertEqual(100, e.eval(self.row, 'min(#affected)')) 340 | 341 | def test_max_function(self): 342 | self.assertEqual(400, e.eval(self.row, 'max(#affected)')) 343 | 344 | def test_round_function(self): 345 | self.assertEqual(3, e.eval(self.row, 'round(3.4)')) 346 | self.assertEqual(66.7, e.eval(self.row, 'round(#affected+m+children / #affected+f+adults * 1000) / 10')) 347 | 348 | def test_datedif_function(self): 349 | columns = [hxl.model.Column.parse(tag) for tag in ['#date+start', '#date+end']] 350 | row = hxl.model.Row(columns=columns, values=['2018-01-01', '2018-02-03']) 351 | self.assertEqual(5, e.eval(row, 'datedif(#date+start, #date+end, "W")')) 352 | 353 | def test_toupper_function(self): 354 | self.assertEqual("COAST REGION", e.eval(self.row, 'toupper(#adm1)')) 355 | 356 | def test_tolower_function(self): 357 | self.assertEqual("coast region", e.eval(self.row, 'tolower(#adm1)')) 358 | 359 | def test_nested_functions(self): 360 | self.assertEqual(5, e.eval(self.row, 'round(round(3.4) + round(1.9))')) 361 | 362 | 363 | 364 | -------------------------------------------------------------------------------- /tests/test_geo.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unit tests for the hxl.geo module 3 | David Megginson 4 | February 2018 5 | 6 | License: Public Domain 7 | """ 8 | 9 | import hxl, unittest 10 | 11 | class TestLatLon(unittest.TestCase): 12 | 13 | LATITUDE_SAMPLES = ( 14 | ('45.5', 45.5), 15 | ('45N30', 45.5), 16 | ('N45:30:30', 45.508333,), 17 | ('N45°30\' 30"', 45.508333,), 18 | ('45°N30\' 30"', 45.508333,), 19 | ('S 45 30.5', -45.508333,), 20 | ('1°17′S', -1.283333), 21 | ('N 46° 12′ 0″', 46.2), 22 | ) 23 | 24 | LONGITUDE_SAMPLES = ( 25 | ('-75.5', -75.5), 26 | ('75W30', -75.5), 27 | ('W75:30:30', -75.508333,), 28 | ('W75°30\' 30\"', -75.508333,), 29 | ('75°W30\' 30\"', -75.508333,), 30 | ('W 75 30.5', -75.508333,), 31 | ('36°49′E', 36.816667), 32 | ('E 6° 9′ 0″', 6.15), 33 | ) 34 | 35 | COORDINATE_SAMPLES = ( 36 | ('45.5,-75.5', (45.5, -75.5),), 37 | ('45N30 / 75W30', (45.5, -75.5),), 38 | ('N45:30:30;W75:30:30', (45.508333, -75.508333,),), 39 | ('N45.5,W75.5', (45.5, -75.5)), 40 | ('1°17′S 36°49′E', (-1.283333, 36.816667)), 41 | ('N 46° 12′ 0″, E 6° 9′ 0″', (46.2, 6.15,)), 42 | ) 43 | 44 | def test_parse_lat(self): 45 | for s, n in self.LATITUDE_SAMPLES: 46 | lat = hxl.geo.parse_lat(s) 47 | self.assertIsNotNone(lat) 48 | self.assertAlmostEqual(n, lat, places=6) 49 | 50 | def test_parse_lon(self): 51 | for s, n in self.LONGITUDE_SAMPLES: 52 | lon = hxl.geo.parse_lon(s) 53 | self.assertIsNotNone(lon) 54 | self.assertAlmostEqual(n, lon , places=6) 55 | 56 | def test_parse_coord(self): 57 | for s, c in self.COORDINATE_SAMPLES: 58 | coord = hxl.geo.parse_coord(s) 59 | self.assertIsNotNone(coord) 60 | self.assertAlmostEqual(c[0], coord[0], places=6) 61 | self.assertAlmostEqual(c[1], coord[1], places=6) 62 | 63 | def test_lat_out_of_range(self): 64 | with self.assertRaises(ValueError): 65 | hxl.geo.parse_lat('91 00 00') 66 | with self.assertRaises(ValueError): 67 | hxl.geo.parse_lat('-91 00 00') 68 | with self.assertRaises(ValueError): 69 | hxl.geo.parse_lat('45 60 00') 70 | with self.assertRaises(ValueError): 71 | hxl.geo.parse_lat('45 00 60') 72 | 73 | def test_lon_out_of_range(self): 74 | with self.assertRaises(ValueError): 75 | hxl.geo.parse_lon('181 00 00') 76 | with self.assertRaises(ValueError): 77 | hxl.geo.parse_lon('-181 00 00') 78 | with self.assertRaises(ValueError): 79 | hxl.geo.parse_lon('-75 60 00') 80 | with self.assertRaises(ValueError): 81 | hxl.geo.parse_lon('-75 00 60') 82 | 83 | def test_coord_out_of_range(self): 84 | with self.assertRaises(ValueError): 85 | hxl.geo.parse_coord('45.5,181.5') 86 | -------------------------------------------------------------------------------- /tests/test_scripts.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unit tests for the hxl.scripts module 3 | David Megginson 4 | December 2014 5 | 6 | License: Public Domain 7 | """ 8 | 9 | from __future__ import print_function 10 | 11 | import unittest 12 | import os 13 | import sys 14 | import subprocess 15 | import filecmp 16 | import difflib 17 | import tempfile 18 | 19 | import hxl 20 | import hxl.scripts 21 | 22 | root_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), os.pardir)) 23 | 24 | 25 | ######################################################################## 26 | # Test classes 27 | ######################################################################## 28 | 29 | class BaseTest(unittest.TestCase): 30 | """ 31 | Base for test classes 32 | """ 33 | 34 | def assertOutput(self, options, output_file, input_file=None): 35 | if not input_file: 36 | input_file = self.input_file 37 | self.assertTrue( 38 | try_script( 39 | self.function, 40 | options, 41 | input_file, 42 | expected_output_file = output_file 43 | ) 44 | ) 45 | 46 | def assertExitStatus(self, options, exit_status=hxl.scripts.EXIT_OK, input_file=None): 47 | if not input_file: 48 | input_file = self.input_file 49 | self.assertTrue( 50 | try_script( 51 | self.function, 52 | options, 53 | input_file, 54 | expected_exit_status = exit_status 55 | ) 56 | ) 57 | 58 | 59 | class TestAdd(BaseTest): 60 | """ 61 | Test the hxladd command-line tool. 62 | """ 63 | 64 | def setUp(self): 65 | self.function = hxl.scripts.hxladd_main 66 | self.input_file = 'input-simple.csv' 67 | 68 | def test_default(self): 69 | self.assertOutput(['-s', 'date+reported=2015-03-31'], 'add-output-default.csv') 70 | self.assertOutput(['--spec', 'date+reported=2015-03-31'], 'add-output-default.csv') 71 | 72 | def test_headers(self): 73 | self.assertOutput(['-s', 'Report Date#date+reported=2015-03-31'], 'add-output-headers.csv') 74 | self.assertOutput(['--spec', 'Report Date#date+reported=2015-03-31'], 'add-output-headers.csv') 75 | 76 | def test_before(self): 77 | self.assertOutput(['-b', '-s', 'date+reported=2015-03-31'], 'add-output-before.csv') 78 | self.assertOutput(['--before', '--spec', 'date+reported=2015-03-31'], 'add-output-before.csv') 79 | 80 | 81 | class TestAppend(BaseTest): 82 | """ Test the hxlappend command-line tool. """ 83 | 84 | def setUp(self): 85 | self.function = hxl.scripts.hxlappend_main 86 | self.input_file = 'input-simple.csv' 87 | 88 | def test_append(self): 89 | self.assertOutput(['-a', resolve_file('input-simple.csv')], 'append-dataset.csv') 90 | self.assertOutput(['--append', resolve_file('input-simple.csv')], 'append-dataset.csv') 91 | 92 | 93 | class TestClean(BaseTest): 94 | """ 95 | Test the hxlclean command-line tool. 96 | """ 97 | 98 | def setUp(self): 99 | self.function = hxl.scripts.hxlclean_main 100 | self.input_file = 'input-simple.csv' 101 | 102 | def test_noheaders(self): 103 | self.assertOutput(['--remove-headers'], 'clean-output-noheaders.csv') 104 | 105 | def test_headers(self): 106 | self.assertOutput([], 'clean-output-headers.csv') 107 | 108 | def test_whitespace(self): 109 | self.assertOutput(['-w', 'subsector'], 'clean-output-whitespace-tags.csv', 'input-whitespace.csv') 110 | 111 | def test_case(self): 112 | self.assertOutput(['-u', 'sector,subsector'], 'clean-output-upper.csv') 113 | self.assertOutput(['-l', 'sector,subsector'], 'clean-output-lower.csv') 114 | 115 | # TODO: test dates and numbers 116 | 117 | 118 | class TestCount(BaseTest): 119 | """ 120 | Test the hxlcount command-line tool. 121 | """ 122 | 123 | def setUp(self): 124 | self.function = hxl.scripts.hxlcount_main 125 | self.input_file = 'input-simple.csv' 126 | 127 | def test_simple(self): 128 | self.assertOutput(['-t', 'org,adm1'], 'count-output-simple.csv') 129 | self.assertOutput(['--tags', 'org,adm1'], 'count-output-simple.csv') 130 | 131 | def test_aggregated(self): 132 | self.assertOutput(['-t', 'org,adm1', '-a', 'sum(targeted) as Total targeted#targeted+total'], 'count-output-aggregated.csv') 133 | 134 | def test_count_colspec(self): 135 | self.assertOutput(['-t', 'org,adm1', '-a', 'count() as Activities#output+activities'], 'count-output-colspec.csv') 136 | 137 | 138 | class TestCut(BaseTest): 139 | """ 140 | Test the hxlcut command-line tool. 141 | """ 142 | 143 | def setUp(self): 144 | self.function = hxl.scripts.hxlcut_main 145 | self.input_file = 'input-simple.csv' 146 | 147 | def test_includes(self): 148 | self.assertOutput(['-i', 'sector,org,adm1'], 'cut-output-includes.csv') 149 | self.assertOutput(['--include', 'sector,org,adm1'], 'cut-output-includes.csv') 150 | 151 | def test_excludes(self): 152 | self.assertOutput(['-x', 'population+sex,targeted'], 'cut-output-excludes.csv') 153 | self.assertOutput(['--exclude', 'population+sex,targeted'], 'cut-output-excludes.csv') 154 | 155 | 156 | class TestMerge(BaseTest): 157 | """ 158 | Test the hxlmerge command-line tool. 159 | """ 160 | 161 | def setUp(self): 162 | self.function = hxl.scripts.hxlmerge_main 163 | self.input_file = 'input-simple.csv' 164 | 165 | def test_merge(self): 166 | self.assertOutput(['-k', 'sector', '-t', 'status', '-m', resolve_file('input-merge.csv')], 'merge-output-basic.csv') 167 | self.assertOutput(['--keys', 'sector', '--tags', 'status', '-m', resolve_file('input-merge.csv')], 'merge-output-basic.csv') 168 | 169 | def test_replace(self): 170 | self.input_file = 'input-status.csv' 171 | self.assertOutput(['-r', '-k', 'sector', '-t', 'status', '-m', resolve_file('input-merge.csv')], 'merge-output-replace.csv') 172 | self.assertOutput(['--replace', '-k', 'sector', '-t', 'status', '-m', resolve_file('input-merge.csv')], 'merge-output-replace.csv') 173 | 174 | def test_overwrite (self): 175 | self.input_file = 'input-status.csv' 176 | self.assertOutput(['-O', '-r', '-k', 'sector', '-t', 'status', '-m', resolve_file('input-merge.csv')], 'merge-output-overwrite.csv') 177 | self.assertOutput(['--overwrite', '--replace', '-k', 'sector', '-t', 'status', '-m', resolve_file('input-merge.csv')], 'merge-output-overwrite.csv') 178 | 179 | class TestRename(BaseTest): 180 | """ 181 | Test the hxlrename command-line tool. 182 | """ 183 | 184 | def setUp(self): 185 | self.function = hxl.scripts.hxlrename_main 186 | self.input_file = 'input-simple.csv' 187 | 188 | def test_single(self): 189 | self.assertOutput(['-r', 'targeted:affected'], 'rename-output-single.csv') 190 | self.assertOutput(['--rename', 'targeted:affected'], 'rename-output-single.csv') 191 | 192 | def test_header(self): 193 | self.assertOutput(['-r', 'targeted:Affected#affected'], 'rename-output-header.csv') 194 | 195 | def test_multiple(self): 196 | self.assertOutput(['-r', 'targeted:affected', '-r', 'org:funding'], 'rename-output-multiple.csv') 197 | 198 | 199 | class TestSelect(BaseTest): 200 | """ 201 | Test the hxlselect command-line tool. 202 | """ 203 | 204 | def setUp(self): 205 | self.function = hxl.scripts.hxlselect_main 206 | self.input_file = 'input-simple.csv' 207 | 208 | def test_eq(self): 209 | self.assertOutput(['-q', 'sector=WASH'], 'select-output-eq.csv') 210 | self.assertOutput(['--query', 'sector=WASH'], 'select-output-eq.csv') 211 | 212 | def test_ne(self): 213 | self.assertOutput(['-q', 'sector!=WASH'], 'select-output-ne.csv') 214 | 215 | def test_lt(self): 216 | self.assertOutput(['-q', 'targeted<200'], 'select-output-lt.csv') 217 | 218 | def test_le(self): 219 | self.assertOutput(['-q', 'targeted<=100'], 'select-output-le.csv') 220 | 221 | def test_gt(self): 222 | self.assertOutput(['-q', 'targeted>100'], 'select-output-gt.csv') 223 | 224 | def test_ge(self): 225 | self.assertOutput(['-q', 'targeted>=100'], 'select-output-ge.csv') 226 | 227 | def test_re(self): 228 | self.assertOutput(['-q', 'sector~^W..H'], 'select-output-re.csv') 229 | 230 | def test_nre(self): 231 | self.assertOutput(['-q', 'sector!~^W..H'], 'select-output-nre.csv') 232 | 233 | def test_reverse(self): 234 | self.assertOutput(['-r', '-q', 'sector=WASH'], 'select-output-reverse.csv') 235 | self.assertOutput(['--reverse', '--query', 'sector=WASH'], 'select-output-reverse.csv') 236 | 237 | def test_multiple(self): 238 | self.assertOutput(['-q', 'sector=WASH', '-q', 'sector=Salud'], 'select-output-multiple.csv') 239 | 240 | 241 | class TestSort(BaseTest): 242 | """ 243 | Test the hxlsort command-line tool,. 244 | """ 245 | 246 | def setUp(self): 247 | self.function = hxl.scripts.hxlsort_main 248 | self.input_file = 'input-simple.csv' 249 | 250 | def test_default(self): 251 | self.assertOutput([], 'sort-output-default.csv') 252 | 253 | def test_tags(self): 254 | self.assertOutput(['-t', 'country'], 'sort-output-tags.csv') 255 | self.assertOutput(['--tags', 'country'], 'sort-output-tags.csv') 256 | 257 | def test_numeric(self): 258 | self.assertOutput(['-t', 'targeted'], 'sort-output-numeric.csv') 259 | 260 | def test_date(self): 261 | self.input_file = 'input-date.csv' 262 | self.assertOutput(['-t', 'date+reported'], 'sort-output-date.csv') 263 | 264 | def test_reverse(self): 265 | self.assertOutput(['-r'], 'sort-output-reverse.csv') 266 | self.assertOutput(['--reverse'], 'sort-output-reverse.csv') 267 | 268 | 269 | class TestTag(BaseTest): 270 | """ 271 | Test the hxltag command-line tool. 272 | """ 273 | 274 | def setUp(self): 275 | self.function = hxl.scripts.hxltag_main 276 | self.input_file = 'input-untagged.csv' 277 | 278 | def test_full(self): 279 | """Use full header text for tagging.""" 280 | self.assertOutput([ 281 | '-m', 'Organisation#org', 282 | '-m', 'Cluster#sector', 283 | '-m', 'Country#country', 284 | '-m', 'Subdivision#adm1' 285 | ], 'tag-output-full.csv') 286 | 287 | 288 | def test_substrings(self): 289 | """Use header substrings for tagging.""" 290 | self.assertOutput([ 291 | '-m', 'org#org', 292 | '-m', 'cluster#sector', 293 | '-m', 'ntry#country', 294 | '-m', 'div#adm1' 295 | ], 'tag-output-full.csv') 296 | self.assertOutput([ 297 | '-a', # force match_all 298 | '-m', 'org#org', # should fail 299 | '-m', 'cluster#sector' #should succeed 300 | ], 'tag-output-notsubstrings.csv') 301 | 302 | def test_partial(self): 303 | """Try tagging only one row.""" 304 | self.assertOutput([ 305 | '--map', 'cluster#sector' 306 | ], 'tag-output-partial.csv') 307 | 308 | def test_ambiguous(self): 309 | """Use an ambiguous header for the second one.""" 310 | self.assertOutput([ 311 | '-m', 'organisation#org', 312 | '-m', 'is#adm1' 313 | ], 'tag-output-ambiguous.csv') 314 | 315 | def test_default_tag(self): 316 | """Supply a default tag.""" 317 | self.assertOutput([ 318 | '-m', 'organisation#org', 319 | '-d', '#meta' 320 | ], 'tag-output-default.csv') 321 | 322 | 323 | class TestValidate(BaseTest): 324 | """ 325 | Test the hxltag command-line tool. 326 | """ 327 | 328 | def setUp(self): 329 | self.function = hxl.scripts.hxlvalidate_main 330 | self.input_file = 'input-simple.csv' 331 | 332 | def test_default_valid_status(self): 333 | self.assertExitStatus([]) 334 | 335 | def test_bad_hxl_status(self): 336 | self.input_file = 'input-untagged.csv' 337 | def try_script(): 338 | self.assertExitStatus([], exit_status = hxl.scripts.EXIT_ERROR), 339 | # from the command line, this will get intercepted 340 | self.assertRaises(hxl.input.HXLTagsNotFoundException, try_script) 341 | 342 | def test_default_valid_status(self): 343 | self.assertExitStatus([ 344 | '--schema', resolve_file('validation-schema-valid.csv') 345 | ], hxl.scripts.EXIT_OK) 346 | self.assertExitStatus([ 347 | '-s', resolve_file('validation-schema-valid.csv') 348 | ], hxl.scripts.EXIT_OK) 349 | 350 | def test_default_invalid_status(self): 351 | self.assertExitStatus([ 352 | '--schema', resolve_file('validation-schema-invalid.csv') 353 | ], hxl.scripts.EXIT_ERROR) 354 | self.assertExitStatus([ 355 | '-s', resolve_file('validation-schema-invalid.csv') 356 | ], hxl.scripts.EXIT_ERROR) 357 | 358 | 359 | ######################################################################## 360 | # Support functions 361 | ######################################################################## 362 | 363 | 364 | def resolve_file(name): 365 | """ 366 | Resolve a file name in the test directory. 367 | """ 368 | return os.path.join(root_dir, 'tests', 'files', 'test_scripts', name) 369 | 370 | def try_script(script_function, args, input_file, expected_output_file=None, expected_exit_status=hxl.scripts.EXIT_OK): 371 | """ 372 | Test run a script in its own subprocess. 373 | @param args A list of arguments, including the script name first 374 | @param input_file The name of the input HXL file in ./files/test_scripts/ 375 | @param expected_output_file The name of the expected output HXL file in ./files/test_scripts 376 | @return True if the actual output matches the expected output 377 | """ 378 | 379 | with open(resolve_file(input_file), 'rb') as input: 380 | if expected_output_file is None: 381 | output = sys.stdout 382 | output = tempfile.NamedTemporaryFile(mode='w', newline='', delete=False) 383 | try: 384 | status = script_function(args, stdin=input, stdout=output) 385 | if status == expected_exit_status: 386 | result = True 387 | if expected_output_file: 388 | output.close() 389 | result = diff(output.name, resolve_file(expected_output_file)) 390 | else: 391 | print("Script exit status: {}".format(status)) 392 | result = False 393 | finally: 394 | # Not using with, because Windows won't allow file to be opened twice 395 | os.remove(output.name) 396 | return result 397 | 398 | 399 | def diff(file1, file2): 400 | """ 401 | Compare two files, ignoring line end differences 402 | 403 | If there are differences, print them to stderr in unified diff format. 404 | 405 | @param file1 The full pathname of the first file to compare 406 | @param file2 The full pathname of the second file to compare 407 | @return True if the files are the same, o 408 | """ 409 | with open(file1, 'r') as input1: 410 | with open(file2, 'r') as input2: 411 | diffs = difflib.unified_diff( 412 | input1.read().splitlines(), 413 | input2.read().splitlines() 414 | ) 415 | no_diffs = True 416 | for diff in diffs: 417 | no_diffs = False 418 | print(diff, file=sys.stderr) 419 | return no_diffs 420 | 421 | # end 422 | --------------------------------------------------------------------------------