├── .gitignore
├── LICENSE.md
├── README.md
├── earl.ttl
├── pycsvw
├── __init__.py
├── built_in_datatypes.py
├── csv_parser.py
├── json_generator.py
├── logger.py
├── main.py
├── metadata.py
├── metadata_extractor.py
├── parser_exceptions.py
├── test.py
├── testdata
│ ├── csvw-template
│ │ ├── LICENSE
│ │ ├── README.md
│ │ ├── example.csv
│ │ └── example.csv-metadata.json
│ ├── test124-user-metadata.json
│ ├── test125-metadata.json
│ ├── test125.csv
│ ├── test234-metadata.json
│ ├── test234.csv
│ ├── tree-ops.csv
│ ├── validate-result-missing-column.txt
│ ├── validate-result-primary-key-fail.txt
│ └── validate-result-required-fail.txt
└── validator.py
├── setup.py
└── test
├── __init__.py
├── csvw_implementation_report.py
├── csvw_json_test_cases.py
├── csvw_validation_single_test.py
├── csvw_validation_test_cases.py
├── doap.ttl
├── errors_10-12-15.txt
├── metadata_test.py
└── validator_test.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.DS_Store
3 | */_build/*
4 | *.py~
5 | *.~lock.*#
6 | .idea/
7 |
8 | # Byte-compiled / optimized / DLL files
9 | __pycache__/
10 | *.py[cod]
11 |
12 | # C extensions
13 | *.so
14 |
15 | # Distribution / packaging
16 | .Python
17 | env/
18 | build/
19 | develop-eggs/
20 | dist/
21 | downloads/
22 | eggs/
23 | .eggs/
24 | lib/
25 | lib64/
26 | parts/
27 | sdist/
28 | var/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *,cover
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 |
60 | # Sphinx documentation
61 | docs/_build/
62 |
63 | # PyBuilder
64 | target/
65 |
66 | #visual studio project
67 | *.pyproj
68 | *.suo
69 | *.sln
70 | *.pyproj.user
71 | .vs/
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016 Sebastian Neumaier
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pycsvw
2 |
3 | Python implementation of the W3C CSV on the Web specification, cf. http://w3c.github.io/csvw/
4 |
5 |
6 | ## Authors
7 |
8 | - Sebastian Neumaier
9 | - Jürgen Umbrich
10 | - Mao Li
11 |
--------------------------------------------------------------------------------
/pycsvw/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'sebastian'
2 |
3 | from main import CSVW
--------------------------------------------------------------------------------
/pycsvw/built_in_datatypes.py:
--------------------------------------------------------------------------------
1 | __author__ = 'sebastian'
2 |
3 | from rdflib.namespace import XSD
4 | from rdflib.namespace import RDF
5 | from rdflib import Namespace
6 |
7 | CSVW = Namespace("http://www.w3.org/ns/csvw#")
8 |
9 |
10 | # Valid datatypes
11 | DATATYPES = {
12 | 'anyAtomicType': XSD.anyAtomicType,
13 | 'anyURI': XSD.anyURI,
14 | 'base64Binary': XSD.basee65Binary,
15 | 'boolean': XSD.boolean,
16 | 'byte': XSD.byte,
17 | 'date': XSD.date,
18 | 'dateTime': XSD.dateTime,
19 | 'dayTimeDuration': XSD.dayTimeDuration,
20 | 'dateTimeStamp': XSD.dateTimeStamp,
21 | 'decimal': XSD.decimal,
22 | 'double': XSD.double,
23 | 'duration': XSD.duration,
24 | 'float': XSD.float,
25 | 'ENTITY': XSD.ENTITY,
26 | 'gDay': XSD.gDay,
27 | 'gMonth': XSD.gMonth,
28 | 'gMonthDay': XSD.gMonthDay,
29 | 'gYear': XSD.gYear,
30 | 'gYearMonth': XSD.gYearMonth,
31 | 'hexBinary': XSD.hexBinary,
32 | 'int': XSD.int,
33 | 'integer': XSD.integer,
34 | 'language': XSD.language,
35 | 'long': XSD.long,
36 | 'Name': XSD.Name,
37 | 'NCName': XSD.NCName,
38 | 'negativeInteger': XSD.negativeInteger,
39 | 'NMTOKEN': XSD.NMTOKEN,
40 | 'nonNegativeInteger': XSD.nonNegativeInteger,
41 | 'nonPositiveInteger': XSD.nonPositiveInteger,
42 | 'normalizedString': XSD.normalizedString,
43 | 'NOTATION': XSD.NOTATION,
44 | 'positiveInteger': XSD.positiveInteger,
45 | 'QName': XSD.Qname,
46 | 'short': XSD.short,
47 | 'string': XSD.string,
48 | 'time': XSD.time,
49 | 'token': XSD.token,
50 | 'unsignedByte': XSD.unsignedByte,
51 | 'unsignedInt': XSD.unsignedInt,
52 | 'unsignedLong': XSD.unsignedLong,
53 | 'unsignedShort': XSD.unsignedShort,
54 | 'yearMonthDuration': XSD.yearMonthDuration,
55 |
56 | 'any': XSD.anyAtomicType,
57 | 'binary': XSD.base64Binary,
58 | 'datetime': XSD.dateTime,
59 | 'html': RDF.HTML,
60 | 'json': CSVW.JSON,
61 | 'number': XSD.double,
62 | 'xml': RDF.XMLLiteral,
63 | }
64 |
65 |
66 | def is_built_in_datatype(value):
67 | return value in DATATYPES
68 |
--------------------------------------------------------------------------------
/pycsvw/csv_parser.py:
--------------------------------------------------------------------------------
1 | from parser_exceptions import ParserException
2 |
3 | __author__ = 'neumaier'
4 |
5 | '''
6 | Settings:
7 | comment prefix
8 | A character that, when it appears at the beginning of a row, indicates that the row is a comment that should be associated as a rdfs:comment annotation to the table. This is set by the commentPrefix property of a dialect description. The default is null, which means no rows are treated as comments. A value other than null may mean that the source numbers of rows are different from their numbers.
9 | delimiter
10 | The separator between cells, set by the delimiter property of a dialect description. The default is ,.
11 | encoding
12 | The character encoding for the file, one of the encodings listed in [encoding], set by the encoding property of a dialect description. The default is utf-8.
13 | escape character
14 | The character that is used to escape the quote character within escaped cells, or null, set by the doubleQuote property of a dialect description. The default is " (such that "" is used to escape " within an escaped cell).
15 | header row count
16 | The number of header rows (following the skipped rows) in the file, set by the header or headerRowCount property of a dialect description. The default is 1. A value other than 0 will mean that the source numbers of rows will be different from their numbers.
17 | line terminators
18 | The characters that can be used at the end of a row, set by the lineTerminators property of a dialect description. The default is [CRLF, LF].
19 | quote character
20 | The character that is used around escaped cells, or null, set by the quoteChar property of a dialect description. The default is ".
21 | skip blank rows
22 | Indicates whether to ignore wholly empty rows (ie rows in which all the cells are empty), set by the skipBlankRows property of a dialect description. The default is false. A value other than false may mean that the source numbers of rows are different from their numbers.
23 | skip columns
24 | The number of columns to skip at the beginning of each row, set by the skipColumns property of a dialect description. The default is 0. A value other than 0 will mean that the source numbers of columns will be different from their numbers.
25 | skip rows
26 | The number of rows to skip at the beginning of the file, before a header row or tabular data, set by the skipRows property of a dialect description. The default is 0. A value greater than 0 will mean that the source numbers of rows will be different from their numbers.
27 | trim
28 | Indicates whether to trim whitespace around cells; may be true, false, start, or end, set by the skipInitialSpace or trim property of a dialect description. The default is false.
29 | '''
30 | SETTINGS = {
31 | 'comment prefix': None,
32 | 'delimiter': ',',
33 | 'encoding': 'utf-8',
34 | 'escape character': '"',
35 | 'header row count': 1,
36 | 'line terminators': ['CRLF', 'LF'],
37 | 'quote character': '"',
38 | 'skip blank rows': False,
39 | 'skip columns': 0,
40 | 'skip rows': 0,
41 | 'trim': False
42 | }
43 |
44 |
45 | def parse_row(row, settings):
46 | # http://www.w3.org/TR/2015/WD-tabular-data-model-20150416/#dfn-parse-a-row
47 |
48 | cell_values = []
49 | current_cell_value = ''
50 | quoted = False
51 |
52 | for i, char in enumerate(row):
53 | if char == settings['escape character'] and i + 1 < len(row) and row[i + 1] == settings['quote character']:
54 | current_cell_value += settings['quote character']
55 | elif char == settings['escape character'] and settings['escape character'] != settings['quote character'] and i + 1 < len(row):
56 | current_cell_value += row[i + 1]
57 | elif char == settings['quote character']:
58 | if not quoted:
59 | quoted = True
60 | if current_cell_value:
61 | raise ParserException('Quotation error: ' + row)
62 | else:
63 | quoted = False
64 | if i + 1 < len(row) and row[i + 1] != settings['delimiter']:
65 | raise ParserException('Quotation error: ' + row)
66 | elif char == settings['delimiter']:
67 | if quoted:
68 | current_cell_value += settings['delimiter']
69 | else:
70 | cell_values.append(current_cell_value.strip())
71 | current_cell_value = ''
72 | else:
73 | current_cell_value += char
74 |
75 | cell_values.append(current_cell_value.strip())
76 | return cell_values
77 |
78 |
79 | def parse(handle, url, settings=SETTINGS):
80 | # http://w3c.github.io/csvw/syntax/index.html#parsing
81 | rows = handle.read().splitlines()
82 |
83 | T = Table(url)
84 | M = {
85 | "@context": "http://www.w3.org/ns/csvw",
86 | "rdfs:comment": [],
87 | "tableSchema": {
88 | "columns": []
89 | }
90 | }
91 | if url:
92 | M['url'] = url
93 | else:
94 | M['url'] = 'unspecified'
95 |
96 | source_row_number = 1
97 |
98 | i = 0
99 | # Repeat the following the number of times indicated by skip rows
100 | for i in xrange(len(rows)):
101 | row = rows[i]
102 | if i >= settings['skip rows']:
103 | break
104 | if settings['comment prefix']:
105 | if row.startswith(settings['comment prefix']):
106 | com = row.strip(settings['comment prefix']).strip()
107 | M['rdfs:comment'].append(com)
108 | elif row.strip():
109 | M['rdfs:comment'].append(row)
110 | source_row_number += 1
111 |
112 | j = i
113 | # Repeat the following the number of times indicated by header row count
114 | for j in xrange(i, len(rows)):
115 | row = rows[j]
116 | if j >= settings['header row count']:
117 | break
118 | if settings['comment prefix']:
119 | if row.startswith(settings['comment prefix']):
120 | com = row.strip(settings['comment prefix']).strip()
121 | M['rdfs:comment'].append(com)
122 | else:
123 | # Otherwise, parse the row to provide a list of cell values
124 | cells = parse_row(row, settings)
125 | # Remove the first skip columns number of values from the list of cell values
126 | cells = cells[settings['skip columns']:]
127 | if len(M['tableSchema']['columns']) == 0:
128 | M['tableSchema']['columns'] = [{'titles': []} for _ in range(len(cells))]
129 | for cell_i, v in enumerate(cells):
130 | value = v.strip()
131 | if value == '':
132 | pass
133 | else:
134 | M['tableSchema']['columns'][cell_i]['titles'].append(value)
135 | source_row_number += 1
136 |
137 | row_number = 1
138 | for k in xrange(j, len(rows)):
139 | row = rows[k]
140 | source_column_number = 1
141 | if settings['comment prefix']:
142 | if row.startswith(settings['comment prefix']):
143 | com = row.strip(settings['comment prefix']).strip()
144 | M['rdfs:comment'].append(com)
145 | else:
146 | cells = parse_row(row, settings)
147 | if settings['skip blank rows'] and len(cells) == len([_ for v in cells if v == '']):
148 | pass
149 | else:
150 | R = Row(table=T, number=row_number, source_number=row_number)
151 | T.rows.append(R)
152 | # Remove the first skip columns number of values from the list of cell values
153 | cells = cells[settings['skip columns']:]
154 | source_column_number += settings['skip columns']
155 | # For each of the remaining values at index i in the list of cell values (where i starts at 1)
156 | for index, value in enumerate(cells):
157 | i = index + 1
158 | # Identify the column C at index i within the columns of table T. If there is no such column
159 | if len(T.columns) < i:
160 | C = Column(table=T, number=i, source_number=source_column_number)
161 | T.columns.append(C)
162 | else:
163 | C = T.columns[index]
164 | D = Cell(value=value, table=T, column=C, row=R)
165 | C.cells.append(D)
166 | R.cells.append(D)
167 | source_column_number += 1
168 |
169 | source_row_number += 1
170 | row_number += 1
171 | # If M.rdfs:comment is an empty array, remove the rdfs:comment property from M
172 | if not M['rdfs:comment']:
173 | M.pop('rdfs:comment')
174 |
175 | # Return the table T and the embedded metadata M
176 | return T, M
177 |
178 |
179 | class Cell:
180 | def __init__(self, value, table, column, row):
181 | self.table = table
182 | self.column = column
183 | self.row = row
184 | self.string_value = value
185 | self.value = value
186 | self.errors = []
187 | self.text_direction = 'ltr'
188 | self.about_url = None
189 | self.property_url = None
190 | self.value_url = None
191 |
192 | def __repr__(self):
193 | return 'Cell (' + str(self.row) + ' ' + str(self.column) + ')'
194 |
195 |
196 | class Column:
197 | def __init__(self, table, number, source_number):
198 | self.table = table
199 | self.number = number
200 | self.source_number = source_number
201 | self.name = None
202 | self.titles = []
203 | self.datatype = basestring
204 | self.virtual = False
205 | self.suppress_output = False
206 | self.cells = []
207 |
208 | def __repr__(self):
209 | return 'Column ' + str(self.number)
210 |
211 |
212 | class Row:
213 | def __init__(self, table, number, source_number):
214 | self.table = table
215 | self.number = number
216 | self.source_number = source_number
217 | self.primary_key = []
218 | self.referenced_rows = []
219 | self.cells = []
220 |
221 | def __repr__(self):
222 | return 'Row ' + str(self.number)
223 |
224 | class Table:
225 | def __init__(self, url):
226 | self.columns = []
227 | self.rows = []
228 | self.id = None
229 | self.url = url
230 | self.table_direction = 'auto'
231 | self.suppress_output = False
232 | self.notes = False
233 | self.foreign_keys = []
234 | self.transformations = []
235 |
--------------------------------------------------------------------------------
/pycsvw/json_generator.py:
--------------------------------------------------------------------------------
1 | from uritemplate import expand
2 |
3 |
4 | def generate_object(row, metadata):
5 | obj = {}
6 |
7 |
8 |
9 |
10 | def minimal_mode(table, metadata):
11 | A = []
12 | if not metadata.get('suppressOutput', False):
13 | for row in table.rows:
14 | obj = generate_object(row, metadata)
15 |
16 |
--------------------------------------------------------------------------------
/pycsvw/logger.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | logging.basicConfig(level=logging.WARNING,
4 | format='%(asctime)s %(levelname)s %(message)s')
5 | logger = logging.getLogger(__name__)
6 |
7 |
8 | def _format_message(line, message, args):
9 | if line:
10 | message = str(line) + ': ' + message
11 | for arg in args:
12 | try:
13 | message += str(arg) + ','
14 | except Exception:
15 | pass
16 | return message
17 |
18 |
19 | def warning(line, message, *args):
20 | logger.warning(_format_message(line, message, args))
21 |
22 |
23 | def error(line, message, *args):
24 | logger.error(_format_message(line, message, args))
25 |
26 |
27 | def debug(line, message, *args):
28 | logger.debug(_format_message(line, message, args))
29 |
30 |
31 | def info(line, message, *args):
32 | logger.info(_format_message(line, message, *args))
33 |
--------------------------------------------------------------------------------
/pycsvw/main.py:
--------------------------------------------------------------------------------
1 | from StringIO import StringIO
2 | import urllib2
3 | import logging
4 | import csv_parser
5 | import metadata
6 | import json_generator
7 | import metadata_extractor
8 |
9 |
10 | __author__ = 'sebastian'
11 |
12 | logging.basicConfig()
13 | logger = logging.getLogger(__name__)
14 |
15 |
16 | class CSVW:
17 | def __init__(self, url=None, path=None, handle=None, metadata_url=None, metadata_path=None, metadata_handle=None, date_parsing=False):
18 | # http://www.w3.org/TR/2015/WD-tabular-data-model-20150416/#processing-tables
19 | if handle:
20 | logger.warning('"handle" is used only for testing purposes')
21 | name = None
22 | elif url:
23 | url_resp = urllib2.urlopen(url)
24 | handle = StringIO(url_resp.read())
25 | name = url
26 | elif path:
27 | handle = open(path, 'rb')
28 | name = path
29 | elif path and url:
30 | raise ValueError("only one argument of url and path allowed")
31 | else:
32 | raise ValueError("url or path argument required")
33 |
34 | # metadata_handle = None
35 | if metadata_path and metadata_url:
36 | raise ValueError("only one argument of metadata_url and metadata_path allowed")
37 | elif metadata_handle:
38 | logger.warning('"metadata_handle" is used only for testing purposes')
39 | elif metadata_url:
40 | meta_resp = urllib2.urlopen(metadata_url)
41 | metadata_handle = StringIO(meta_resp.read())
42 | elif metadata_path:
43 | metadata_handle = open(metadata_path, 'rb')
44 |
45 | # Retrieve the tabular data file.
46 | self.table, embedded_metadata = csv_parser.parse(handle, url)
47 |
48 | # TODO create settings using arguments or provided metadata
49 | sources = metadata_extractor.metadata_extraction(url, metadata_handle, embedded_metadata=embedded_metadata)
50 | self.metadata = metadata.merge(sources)
51 |
52 | def to_rdf(self):
53 | pass
54 |
55 | def to_json(self):
56 | # TODO group of tables?
57 | json_generator.minimal_mode(self.table, self.metadata.json()['tables'][0])
58 |
--------------------------------------------------------------------------------
/pycsvw/metadata.py:
--------------------------------------------------------------------------------
1 | import re
2 | import built_in_datatypes
3 | from parser_exceptions import ValidationException
4 | import logger
5 | import urlparse
6 | import language_tags
7 |
8 |
9 | def is_absolute(url):
10 | return bool(urlparse.urlparse(url).netloc)
11 |
12 |
13 | def is_common_property(prop):
14 | return re.match('^[a-zA-Z]*:[a-zA-Z]*$', prop)
15 |
16 | class Enumeration(object):
17 | def __init__(self, *names):
18 | for name in names:
19 | setattr(self, name, hash(name))
20 |
21 | def __contains__(self, hash):
22 | return hash in self.__dict__.values()
23 |
24 |
25 | Option = Enumeration('Required', 'NonEmpty')
26 | Commands = Enumeration('Remove', 'Error')
27 |
28 | #class Option:
29 | # Required, NonEmpty = range(2)
30 |
31 | #class Commands:
32 | # Remove = 'REMOVE'
33 | # Error = 'ERROR'
34 |
35 | class MetaObject:
36 | def evaluate(self, meta, params, default=None, line=None):
37 | return False
38 |
39 |
40 | class Property(MetaObject):
41 | def __init__(self):
42 | self.value = None
43 |
44 | def normalize(self, params):
45 | pass
46 |
47 | def merge(self, obj):
48 | # TODO
49 | pass
50 | # print self.value
51 |
52 | def json(self):
53 | return self.value
54 |
55 |
56 | class Uri(Property):
57 | def evaluate(self, meta, params, default=None, line=None):
58 | # TODO
59 | logger.debug(line, 'URI property: ', meta)
60 | result = Uri()
61 | result.value = meta
62 | return result
63 |
64 |
65 | class ColumnReference(Property):
66 |
67 | def evaluate(self, meta, params, default=None, line=None):
68 | result = ColumnReference()
69 | if isinstance(meta, basestring):
70 | # TODO must match the name on a column description object
71 | logger.debug(line, 'Column Reference property: ', meta)
72 | result.value = meta
73 | return result
74 | elif isinstance(meta, list):
75 | if not meta:
76 | logger.warning(line, 'the supplied value is an empty array')
77 | return result
78 | for m in meta:
79 | if isinstance(m, basestring):
80 | # TODO must match the name on a column description object
81 | pass
82 | else:
83 | logger.warning(line, 'the values in the supplied array are not strings: ', meta)
84 | result.value = meta
85 | return result
86 |
87 | # TODO must match the name on a column description object
88 | logger.debug(line, 'Column Reference property: ', meta)
89 | result.value = meta
90 | return result
91 | else:
92 | logger.warning(line, 'the supplied value is not a string or array: ', meta)
93 | return result
94 |
95 |
96 | class NaturalLanguage(Property):
97 |
98 | def evaluate(self, meta, params, default=None, line=None):
99 | # strings
100 | # arrays
101 | # objects
102 | # TODO
103 | if isinstance(meta, dict):
104 | for k in meta:
105 | if not language_tags.tags.check(k):
106 | logger.error(line, 'Natural language properties MUST be language codes as defined by [BCP47]: ', meta)
107 | return False
108 | logger.debug(line, 'Natural language property: ', meta)
109 | result = NaturalLanguage()
110 | result.value = meta
111 | return result
112 |
113 | def normalize(self, params):
114 | if isinstance(self.value, list):
115 | if 'default_language' in params:
116 | value = {params['default_language']: list(self.value)}
117 | else:
118 | value = {'und': list(self.value)}
119 | self.value = value
120 | else:
121 | self.value = self._normalize(self.value, params)
122 |
123 | def _normalize(self, value, params):
124 | if isinstance(value, basestring):
125 | value = [value]
126 | if 'default_language' in params:
127 | value = {params['default_language']: value}
128 | else:
129 | value = {'und': value}
130 | return value
131 |
132 | def merge(self, obj):
133 | for k in obj.value:
134 | # k is a language code of B
135 | for v in obj.value[k]:
136 | # values from A followed by those from B that were not already a value in A
137 | if k in self.value and v not in self.value[k]:
138 | self.value[k].append(v)
139 |
140 | #
141 | if 'und' in self.value and v in self.value['und']:
142 | self.value['und'].remove(v)
143 | if k not in self.value:
144 | self.value[k] = []
145 | self.value[k].append(v)
146 | if len(self.value['und']) == 0:
147 | self.value.pop('und')
148 |
149 | class NumberPattern(Property):
150 | def evaluate(self, meta, params, default=None, line=None):
151 | result = NumberPattern()
152 | if isinstance(meta, basestring):
153 | logger.debug(line, 'Number pattern property: ', meta)
154 | result.value = meta
155 | return result
156 | else:
157 | # issue a warning
158 | logger.warning(line, 'value of number pattern property is not a string: ', meta)
159 | return result
160 |
161 |
162 | class Link(Property):
163 | def __init__(self, link_type):
164 | Property.__init__(self)
165 | self.link_type = link_type
166 |
167 | def evaluate(self, meta, params, default=None, line=None):
168 | result = Link(self.link_type)
169 | if isinstance(meta, basestring):
170 | if self.link_type == '@id':
171 | # @id must not start with _:
172 | if meta.startswith('_:'):
173 | err_msg = '@id must not start with _:'
174 | logger.error(line, err_msg)
175 | return Commands.Error
176 | logger.debug(line, 'Link property: ', self.link_type, meta)
177 | result.value = meta
178 | return result
179 | else:
180 | # issue a warning
181 | logger.warning(line, 'value of link property is not a string: ', meta)
182 | return result
183 |
184 | def normalize(self, params):
185 | # turn into absolute url using base url
186 | if self.value and not is_absolute(self.value) and 'base_url' in params:
187 | self.value = urlparse.urljoin(params['base_url'], self.value)
188 |
189 |
190 | class Array(Property):
191 | def __init__(self, arg, warning_only=False):
192 | Property.__init__(self)
193 | self.arg = arg
194 | self.warning_only = warning_only
195 |
196 | def evaluate(self, meta, params, default=None, line=None):
197 | result = Array(self.arg)
198 | if isinstance(meta, list):
199 | # if the arg is a operator, it should take a list as argument
200 | result.value = self.arg.evaluate(meta, params, line)
201 | if result.value in Commands:
202 | return result.value
203 | if result.value:
204 | return result
205 | # error while parsing
206 | if self.warning_only:
207 | result.value = {}
208 | return result
209 | else:
210 | # the meta obj should be a list
211 | return False
212 |
213 | def normalize(self, params):
214 | for v in self.value:
215 | v.normalize(params)
216 |
217 | def merge(self, obj):
218 | if isinstance(obj.value, list):
219 | # TODO maybe wrong??
220 | for i, v in enumerate(obj.value):
221 | if len(self.value) > i:
222 | self.value[i].merge(v)
223 |
224 | def json(self):
225 | return [v.json() for v in self.value]
226 |
227 |
228 | class Common(Property):
229 | def __init__(self, prop):
230 | Property.__init__(self)
231 | self.prop = prop
232 |
233 | def evaluate(self, meta, params, default=None, line=None):
234 | # TODO http://www.w3.org/TR/2015/WD-tabular-metadata-20150416/#h-values-of-common-properties
235 | logger.debug(line, 'CommonProperty: ', self.prop)
236 | result = Common(self.prop)
237 | result.value = meta
238 | return result
239 |
240 | def normalize(self, params):
241 | self.value = self._normalize(self.value, params)
242 |
243 | def _normalize(self, value, params):
244 | if isinstance(value, list):
245 | norm_list = []
246 | for v in value:
247 | norm_list.append(self._normalize(v, params))
248 | value = norm_list
249 | elif isinstance(value, dict) and '@value' in value:
250 | pass
251 | elif isinstance(value, dict):
252 | for k in value:
253 | if k == '@id':
254 | if not is_absolute(value[k]) and 'base_url' in params:
255 | value[k] = urlparse.urljoin(params['base_url'], value[k])
256 | elif k == '@type':
257 | pass
258 | else:
259 | value[k] = self._normalize(value[k], params)
260 | elif isinstance(value, basestring):
261 | value = {'@value': value}
262 | if 'default_language' in params:
263 | value['@language'] = params['default_language']
264 | return value
265 |
266 |
267 | class Base(Property):
268 | def evaluate(self, meta, params, default=None, line=None):
269 | if '@base' in meta:
270 | result = Base()
271 | result.value = meta['@base']
272 | params['base_url'] = result.value
273 | return result
274 | else:
275 | return False
276 |
277 | def json(self):
278 | return {'@base': self.value}
279 |
280 |
281 | class Language(Property):
282 | def evaluate(self, meta, params, default=None, line=None):
283 | if '@language' in meta:
284 | result = Language()
285 | result.value = meta['@language']
286 | params['default_language'] = result.value
287 | return result
288 | else:
289 | return False
290 |
291 | def json(self):
292 | return {'@language': self.value}
293 |
294 |
295 | class Atomic(Property):
296 | def __init__(self, arg):
297 | Property.__init__(self)
298 | self.arg = arg
299 |
300 | def evaluate(self, meta, params, default=None, line=None):
301 | result = Atomic(self.arg)
302 | if isinstance(self.arg, MetaObject):
303 | # a predefined type or an operator
304 |
305 | result.value = self.arg.evaluate(meta, params, default, line)
306 | if result.value == Commands.Remove:
307 | return Commands.Remove
308 | elif result.value != None:
309 | return result
310 | else:
311 | # numbers, interpreted as integers or doubles
312 | # booleans, interpreted as booleans (true or false)
313 | # strings, interpreted as defined by the property
314 | # objects, interpreted as defined by the property
315 | # arrays, lists of numbers, booleans, strings, or objects
316 | # TODO
317 | if meta == self.arg:
318 | result.value = meta
319 | return result
320 | return None
321 |
322 | def json(self):
323 | if isinstance(self.value, MetaObject):
324 | return self.value.json()
325 | else:
326 | return self.value
327 |
328 |
329 | class Object(Property):
330 | def __init__(self, dict_obj, inherited_obj=None, common_properties=False, warning_only=False):
331 | Property.__init__(self)
332 | self.dict_obj = dict_obj
333 | self.inherited_obj = inherited_obj
334 | self.common_properties = common_properties
335 | self.warning_only = warning_only
336 |
337 | def evaluate(self, meta, params, default=None, line=None):
338 | result = Object(self.dict_obj, self.inherited_obj, self.common_properties)
339 | if isinstance(self.dict_obj, dict) and isinstance(meta, dict):
340 | if self.inherited_obj:
341 | self.dict_obj = self.dict_obj.copy()
342 | self.dict_obj.update(self.inherited_obj)
343 | # arg is a new schema to validate the metadata
344 | result.value = _validate(line, meta, params, self.dict_obj, self.common_properties)
345 | if result.value is not False:
346 | if result.value not in Commands:
347 | return result
348 |
349 | # logger.error(line, 'object property is not a dictionary: ' + str(meta))
350 | if result.value in Commands:
351 | return result.value
352 | elif self.warning_only:
353 | logger.warning(line, 'The value of an object property is not a string or object.'
354 | ' An object with no properties is returned.', meta)
355 | result.value = {}
356 | return result
357 | else:
358 | return False
359 |
360 | def normalize(self, params):
361 | for prop in self.value:
362 | self.value[prop].normalize(params)
363 |
364 | if '@context' in self.value:
365 | p = Atomic('http://www.w3.org/ns/csvw')
366 | p.value = 'http://www.w3.org/ns/csvw'
367 | self.value['@context'] = p
368 |
369 | def merge(self, obj):
370 | for k in obj.value:
371 | if k in self.value:
372 | self.value[k].merge(obj.value[k])
373 | else:
374 | # if property not in A, just add it
375 | self.value[k] = obj.value[k]
376 |
377 | def json(self):
378 | return {k: self.value[k].json() for k in self.value}
379 |
380 |
381 | class Operator(MetaObject):
382 | pass
383 |
384 |
385 | class BoolOperator(Operator):
386 | pass
387 |
388 |
389 | class IsBuiltinDatatype(Operator):
390 | def __init__(self, warning_only=False):
391 | self.warning_only = warning_only
392 |
393 | def evaluate(self, meta, params, default=None, line=None):
394 | if isinstance(meta, basestring) and built_in_datatypes.is_built_in_datatype(meta):
395 | return meta
396 | elif isinstance(meta, basestring) and self.warning_only:
397 | logger.warning(line, 'Value is not a built in datatype: ', meta)
398 | return Commands.Remove
399 | else:
400 | #TODO logger.error(line, 'Value is not a built in datatype: ', meta)
401 | return False
402 |
403 |
404 | class OfType(Operator):
405 | def __init__(self, base_type, warning_only=False):
406 | self.base_type = base_type
407 | self.warning_only = warning_only
408 |
409 | def evaluate(self, meta, params, default=None, line=None):
410 | if isinstance(meta, self.base_type):
411 | return meta
412 | elif self.warning_only:
413 | logger.warning(line, 'Value (1) has to be of type (2): ', meta, self.base_type)
414 | return Commands.Remove
415 | else:
416 | return False
417 |
418 |
419 | class AllDiff(BoolOperator):
420 | def __init__(self, arg):
421 | self.arg = arg
422 |
423 | def evaluate(self, meta_list, params, default=None, line=None):
424 | values = []
425 | for meta in meta_list:
426 | v = None
427 | if isinstance(meta, dict):
428 | if self.arg in meta:
429 | v = meta[self.arg]
430 | if isinstance(meta, basestring):
431 | v = meta
432 | if v and v in values:
433 | return False
434 | values.append(v)
435 | return True
436 |
437 |
438 | class Or(Operator):
439 | def __init__(self, *values):
440 | self.values = list(values)
441 |
442 | def evaluate(self, meta, params, warning_only=False, default=None, line=None):
443 | props = []
444 | for v in self.values:
445 | prop = False
446 | if isinstance(v, BoolOperator) and not v.evaluate(meta, params, default, line):
447 | return False
448 | elif isinstance(v, MetaObject):
449 | prop = v.evaluate(meta, params, default, line)
450 | elif v == meta:
451 | prop = Atomic(v)
452 | prop.value = v
453 | if prop == Commands.Remove:
454 | return prop
455 | if prop:
456 | if isinstance(prop, list):
457 | props += prop
458 | else:
459 | props.append(prop)
460 |
461 | if not props and warning_only:
462 | logger.warning(line, 'Value is not allowed', meta)
463 | if default:
464 | props = [default]
465 | else:
466 | props = [Commands.Remove]
467 |
468 | # two types of or: on a list or a value
469 | if not isinstance(meta, list) and len(props) == 1:
470 | return props[0]
471 | return props
472 |
473 |
474 | class And(Operator):
475 | def __init__(self, *values):
476 | self.values = list(values)
477 |
478 | def evaluate(self, meta, params, default=None, line=None):
479 | props = []
480 | for v in self.values:
481 | if isinstance(v, BoolOperator):
482 | if not v.evaluate(meta, params, default, line):
483 | return False
484 | else:
485 | prop = v.evaluate(meta, params, default, line)
486 | if not prop:
487 | return False
488 | if isinstance(prop, list):
489 | props += prop
490 | else:
491 | props.append(prop)
492 | return props
493 |
494 |
495 | class All(Operator):
496 | """
497 | All operator
498 | Takes a Type in constructor.
499 | On evaluation, checks if all given items have the given type.
500 | """
501 | def __init__(self, typ, warning_only=False):
502 | self.typ = typ
503 | self.warning_only = warning_only
504 |
505 | def evaluate(self, meta_list, params, default=None, line=None):
506 | props = []
507 | warn = []
508 | for meta in meta_list:
509 | prop = self.typ.evaluate(meta, params, default, line)
510 | if prop in Commands:
511 | return prop
512 | if not prop:
513 | if self.warning_only:
514 | warn.append(str(meta))
515 | else:
516 | return False
517 | else:
518 | if isinstance(prop, list):
519 | props += prop
520 | else:
521 | props.append(prop)
522 |
523 | if props and warn:
524 | logger.warning(line, 'Any items that are not valid objects '
525 | 'of the type expected are ignored: ', warn)
526 | return props
527 |
528 |
529 | class Some(Operator):
530 | """
531 | Some operator
532 | Takes a Type in constructor.
533 | On evaluation, checks if some of given items have the given type.
534 | """
535 | def __init__(self, typ):
536 | self.typ = typ
537 |
538 | def evaluate(self, meta_list, params, default=None, line=None):
539 | props = []
540 | valid = False
541 | for meta in meta_list:
542 | prop = self.typ.evaluate(meta, params, default, line)
543 | if prop:
544 | valid = True
545 | if isinstance(prop, list):
546 | props += prop
547 | else:
548 | props.append(prop)
549 | if valid:
550 | return props
551 | return False
552 |
553 |
554 | class Selection(Operator):
555 | def __init__(self, *values):
556 | self.values = list(values)
557 |
558 | def evaluate(self, meta, params, default=None, line=None):
559 | prop = False
560 | for v in self.values:
561 | tmp = v.evaluate(meta, params, default, line)
562 | if tmp and prop:
563 | # already the second match
564 | logger.debug(line, '(Selection Operator) Only one match allowed: ', meta)
565 | return False
566 | if tmp:
567 | prop = tmp
568 | # if we get here, we found zero or one match
569 | return prop
570 |
571 | class SetOrDefault(Operator):
572 | """
573 | Used for tableDirection. If the given value is not in a predefined set than the default value is used.
574 | If no default value is provided for that property, it generates a warning
575 | and behave as if the property had not been specified.
576 | """
577 | def __init__(self, *values):
578 | self.values = list(values)
579 |
580 | def evaluate(self, meta, params, default=None, line=None):
581 | prop = 'not given'
582 | for v in self.values:
583 | if v == meta:
584 | prop = meta
585 | break
586 | if prop == 'not given':
587 | if default:
588 | prop = default
589 | else:
590 | logger.warning(line, 'Unknown value (no default value is provided for that property): ', meta)
591 | prop = Commands.Remove
592 | return prop
593 |
594 | FORMAT = {
595 | 'decimalChar': {
596 | 'options': [],
597 | 'type': Atomic(OfType(basestring)),
598 | 'default': '.'
599 | },
600 | 'groupChar': {
601 | 'options': [],
602 | 'type': Atomic(OfType(basestring)),
603 | 'default': ','
604 | },
605 | 'pattern': {
606 | 'options': [],
607 | 'type': NumberPattern()
608 | },
609 | }
610 |
611 | DATATYPE = {
612 | 'base': {
613 | 'options': [],
614 | 'type': Atomic(OfType(basestring)),
615 | 'default': 'string'
616 | },
617 | 'format': {
618 | # TODO object property
619 | 'options': [],
620 | 'type': Atomic(Or(OfType(basestring), Object(FORMAT)))
621 | },
622 | 'length': {
623 | 'options': [],
624 | 'type': Atomic(OfType(int))
625 | },
626 | 'minLength': {
627 | 'options': [],
628 | 'type': Atomic(OfType(int))
629 | }
630 | # TODO datatype description object
631 | }
632 |
633 | INHERITED = {
634 | 'aboutUrl': {
635 | 'options': [],
636 | 'type': Uri()
637 | },
638 | 'datatype': {
639 | 'options': [],
640 | 'type': Atomic(Or(IsBuiltinDatatype(warning_only=True), Object(DATATYPE)))
641 | },
642 | 'default': {
643 | 'options': [],
644 | 'type': Atomic(OfType(basestring, warning_only=True)),
645 | 'default': ''
646 | },
647 | 'lang': {
648 | 'options': [],
649 | 'type': Atomic(OfType(basestring)),
650 | 'default': 'und'
651 | },
652 | 'null': {
653 | 'options': [],
654 | 'type': Atomic(OfType(basestring, warning_only=True)),
655 | 'default': ''
656 | },
657 | 'ordered': {
658 | 'options': [],
659 | 'type': Atomic(OfType(bool, warning_only=True)),
660 | 'default': False
661 | },
662 | 'propertyUrl': {
663 | 'options': [],
664 | 'type': Uri()
665 | },
666 | 'required': {
667 | 'options': [],
668 | 'type': Atomic(OfType(bool)),
669 | 'default': False
670 | },
671 | 'separator': {
672 | 'options': [],
673 | 'type': Atomic(OfType(basestring, warning_only=True)),
674 | 'default': None
675 | },
676 | 'textDirection': {
677 | 'options': [],
678 | 'type': Atomic(SetOrDefault('ltr', 'rtl')),
679 | 'default': 'ltr'
680 | },
681 | 'valueUrl': {
682 | 'options': [],
683 | 'type': Uri()
684 | }
685 | }
686 |
687 | COLUMN = {
688 | 'name': {
689 | 'options': [],
690 | 'type': Atomic(OfType(basestring, warning_only=True))
691 | },
692 | 'suppressOutput': {
693 | 'options': [],
694 | 'type': Atomic(OfType(bool, warning_only=True)),
695 | 'default': False
696 | },
697 | 'titles': {
698 | 'options': [],
699 | 'type': NaturalLanguage()
700 | },
701 | 'virtual': {
702 | 'options': [],
703 | 'type': Atomic(OfType(bool, warning_only=True)),
704 | 'default': False
705 | },
706 | '@id': {
707 | 'options': [],
708 | 'type': Link('@id')
709 | },
710 | '@type': {
711 | 'options': [],
712 | 'type': Atomic('Column')
713 | }
714 | }
715 |
716 | FOREIGN_KEY = {
717 | 'columnReference': {
718 | 'options': [Option.Required],
719 | 'type': ColumnReference()
720 | },
721 | 'reference': {
722 | 'options': [],
723 | 'type': Object({
724 | 'resource': {
725 | 'options': [],
726 | 'type': Link('resources')
727 | },
728 | 'schemaReference': {
729 | 'options': [],
730 | 'type': Link('schemaReference')
731 | },
732 | 'columnReference': {
733 | 'options': [Option.Required],
734 | 'type': ColumnReference()
735 | }
736 | })
737 | }
738 | }
739 |
740 | SCHEMA = {
741 | 'foreignKeys': {
742 | 'options': [],
743 | 'type': Array(All(Object(FOREIGN_KEY)))
744 | },
745 | 'columns': {
746 | 'options': [],
747 | 'type': Array(And(All(Object(COLUMN, inherited_obj=INHERITED, common_properties=True)),
748 | AllDiff('name')))
749 | },
750 | 'primaryKey': {
751 | 'options': [],
752 | 'type': ColumnReference()
753 | },
754 | '@id': {
755 | 'options': [],
756 | 'type': Link('@id')
757 | },
758 | '@type': {
759 | 'options': [],
760 | 'type': Atomic('Schema')
761 | }
762 | }
763 |
764 | DIALECT = {
765 | 'encoding': {
766 | 'options': [],
767 | 'type': Atomic(OfType(basestring)),
768 | 'default': 'utf-8'
769 | },
770 | 'lineTerminators': {
771 | 'options': [],
772 | 'type': Atomic(OfType(list, warning_only=True)),
773 | 'default': ["\r\n", "\n"]
774 | },
775 | 'quoteChar': {
776 | 'options': [],
777 | 'type': Atomic(Or(OfType(basestring), None)),
778 | 'default': '"'
779 | },
780 | 'doubleQuote': {
781 | 'options': [],
782 | 'type': Atomic(OfType(bool, warning_only=True)),
783 | 'default': True
784 | },
785 | 'skipRows': {
786 | 'options': [],
787 | 'type': Atomic(OfType(int, warning_only=True)),
788 | 'default': 0
789 | },
790 | 'commentPrefix': {
791 | 'options': [],
792 | 'type': Atomic(OfType(basestring, warning_only=True)),
793 | 'default': '#'
794 | },
795 | 'header': {
796 | 'options': [],
797 | 'type': Atomic(OfType(bool, warning_only=True)),
798 | 'default': True
799 | },
800 | 'headerRowCount': {
801 | 'options': [],
802 | 'type': Atomic(OfType(int, warning_only=True)),
803 | 'default': 1
804 | },
805 | 'delimiter': {
806 | 'options': [],
807 | 'type': Atomic(OfType(basestring, warning_only=True)),
808 | 'default': ','
809 | },
810 | 'skipColumns': {
811 | 'options': [],
812 | 'type': Atomic(OfType(int, warning_only=True)),
813 | 'default': 0
814 | },
815 | 'skipBlankRows': {
816 | 'options': [],
817 | 'type': Atomic(OfType(bool, warning_only=True)),
818 | 'default': False
819 | },
820 | 'skipInitialSpace': {
821 | 'options': [],
822 | 'type': Atomic(OfType(bool, warning_only=True)),
823 | 'default': False
824 | },
825 | 'trim': {
826 | 'options': [],
827 | 'type': Atomic(SetOrDefault(True, False, 'start', 'end')),
828 | 'default': 'false'
829 | },
830 | '@id': {
831 | 'options': [],
832 | 'type': Link('@id')
833 | },
834 | '@type': {
835 | 'options': [],
836 | 'type': Atomic('Dialect')
837 | }
838 | }
839 |
840 |
841 |
842 | TRANSFORMATION = {
843 | 'url': {
844 | 'options': [Option.Required],
845 | 'type': Link('url')
846 | },
847 | 'targetFormat': {
848 | 'options': [Option.Required],
849 | 'type': Link('targetFormat')
850 | },
851 | 'scriptFormat': {
852 | 'options': [Option.Required],
853 | 'type': Link('scriptFormat')
854 | },
855 | 'titles': {
856 | 'options': [],
857 | 'type': NaturalLanguage()
858 | },
859 | 'source': {
860 | 'options': [],
861 | 'type': Atomic(OfType(basestring))
862 | },
863 | '@id': {
864 | 'options': [],
865 | 'type': Link('@id')
866 | },
867 | '@type': {
868 | 'options': [],
869 | 'type': Atomic('Template')
870 | }
871 | }
872 |
873 |
874 | CONTEXT = Selection(Atomic('http://www.w3.org/ns/csvw'),
875 | Array(And(Some(Atomic('http://www.w3.org/ns/csvw')),
876 | Some(Or(Atomic(Base()), Atomic(Language()))))
877 | )
878 | )
879 |
880 | TABLE = {
881 | 'url': {
882 | 'options': [Option.Required],
883 | 'type': Link('url')
884 | },
885 | 'transformations': {
886 | 'options': [],
887 | 'type': Array(All(Object(TRANSFORMATION), warning_only=True), warning_only=True)
888 | },
889 | 'tableDirection': {
890 | 'options': [],
891 | 'type': Atomic(SetOrDefault('rtl', 'ltr', 'default')),
892 | 'default': 'default'
893 | },
894 | 'tableSchema': {
895 | 'options': [],
896 | 'type': Object(SCHEMA, inherited_obj=INHERITED, common_properties=True)
897 | },
898 | 'dialect': {
899 | 'options': [],
900 | 'type': Object(DIALECT, warning_only=True)
901 | },
902 | 'notes': {
903 | 'options': [],
904 | 'type': Array(All(Object({}, common_properties=True)))
905 | },
906 | 'suppressOutput': {
907 | 'options': [],
908 | 'type': Atomic(OfType(bool, warning_only=True)),
909 | 'default': False
910 | },
911 | '@id': {
912 | 'options': [],
913 | 'type': Link('@id')
914 | },
915 | '@type': {
916 | 'options': [],
917 | 'type': Atomic('Table')
918 | },
919 | '@context': {
920 | 'options': [],
921 | 'type': CONTEXT
922 | }
923 | }
924 |
925 |
926 | TABLE_GROUP = {
927 | '@context': {
928 | 'options': [Option.Required],
929 | 'type': CONTEXT
930 | },
931 | 'tables': {
932 | 'options': [Option.Required, Option.NonEmpty],
933 | 'type': Array(All(Object(TABLE, inherited_obj=INHERITED, common_properties=True), warning_only=True))
934 | },
935 | 'transformations': {
936 | 'options': [],
937 | 'type': Array(All(Object(TRANSFORMATION)))
938 | },
939 | 'tableDirection': {
940 | 'options': [],
941 | 'type': Atomic(SetOrDefault('rtl', 'ltr', 'default')),
942 | 'default': 'default'
943 | },
944 | 'tableSchema': {
945 | 'options': [],
946 | 'type': Object(SCHEMA, inherited_obj=INHERITED, common_properties=True)
947 | },
948 | 'dialect': {
949 | 'options': [],
950 | 'type': Object(DIALECT, warning_only=True)
951 | },
952 | 'notes': {
953 | 'options': [],
954 | 'type': Array(Property())
955 | },
956 | '@id': {
957 | 'options': [],
958 | 'type': Link('@id')
959 | },
960 | '@type': {
961 | 'options': [],
962 | 'type': Atomic('TableGroup')
963 | },
964 | }
965 |
966 |
967 | def _validate(line, meta, params, schema, common_properties):
968 | model = {}
969 | remove_props = []
970 | default = None
971 | for prop in meta:
972 | value = meta[prop]
973 | if prop in schema:
974 | opts = schema[prop]['options']
975 | t = schema[prop]['type']
976 | # check for default value
977 | if 'default' in schema[prop]:
978 | default = schema[prop]['default']
979 | # check if not empty
980 | if value != None:
981 | prop_eval = t.evaluate(value, params, default, line)
982 | if prop_eval == Commands.Remove:
983 | remove_props.append(prop)
984 | elif prop_eval == Commands.Error:
985 | return prop_eval
986 | elif not prop_eval:
987 | return False
988 | model[prop] = prop_eval
989 | elif Option.NonEmpty in opts:
990 | logger.debug(line, 'Property is empty: ', prop)
991 | if prop == 'tables':
992 | logger.error(line, 'array does not contain one or more "table descriptions"')
993 | return False
994 | elif common_properties and is_common_property(prop):
995 | prop_eval = Common(prop).evaluate(value, params, default, line)
996 | if not prop_eval:
997 | return False
998 | model[prop] = prop_eval
999 | else:
1000 | logger.warning(line, 'Unknown property: ', prop)
1001 | model[prop] = Atomic(prop)
1002 | # check for missing props
1003 | for prop in schema:
1004 | if Option.Required in schema[prop]['options'] and prop not in meta:
1005 | logger.error(line, 'Property missing: ', prop)
1006 | return False
1007 | # remove props with warnings
1008 | for prop in remove_props:
1009 | del model[prop]
1010 | return model
1011 |
1012 |
1013 | def validate(metadata):
1014 | metadata = expand(metadata)
1015 | # outer_group = Or(Object(TABLE_GROUP, inherited_obj=INHERITED, common_properties=True), Object(TABLE, inherited_obj=INHERITED, common_properties=True))
1016 | outer_group = Object(TABLE_GROUP, inherited_obj=INHERITED, common_properties=True)
1017 | params = {}
1018 | validated = outer_group.evaluate(metadata, params)
1019 | # TODO look for language, column references, ...
1020 | if not validated or validated == Commands.Error:
1021 | return False
1022 | return Model(validated, params)
1023 |
1024 | def expand(meta):
1025 | # turn into table group description
1026 | if 'tables' not in meta:
1027 | tmp = {'tables': [meta]}
1028 | context = meta.pop('@context', None)
1029 | if context:
1030 | tmp['@context'] = context
1031 | return tmp
1032 | return meta
1033 |
1034 |
1035 | class Model:
1036 | def __init__(self, obj, params):
1037 | self.params = params
1038 | self.object = obj
1039 |
1040 | def normalize(self):
1041 | self.object.normalize(self.params)
1042 |
1043 | def merge(self, B):
1044 | self.object.merge(B.object)
1045 |
1046 | def json(self):
1047 | return self.object.json()
1048 |
1049 |
1050 | def normalize(metadata):
1051 | """
1052 | 1)If the property is a common property or notes the value must be normalized as follows:
1053 | 1.1)If the value is an array, each value within the array is normalized in place as described here.
1054 | 1.2)If the value is a string, replace it with an object with a @value property whose value is that string. If a default language is specified, add a @language property whose value is that default language.
1055 | 1.3)If the value is an object with a @value property, it remains as is.
1056 | 1.4)If the value is any other object, normalize each property of that object as follows:
1057 | 1.4.1)If the property is @id, expand any prefixed names and resolve its value against the base URL.
1058 | 1.4.2)If the property is @type, then its value remains as is.
1059 | 1.4.3)Otherwise, normalize the value of the property as if it were a common property, according to this algorithm.
1060 | 1.5)Otherwise, the value remains as is.
1061 | 2)If the property is an array property each element of the value is normalized using this algorithm.
1062 | 3)If the property is a link property the value is turned into an absolute URL using the base URL.
1063 | 4)If the property is an object property with a string value, the string is a URL referencing a JSON document containing a single object. Fetch this URL to retrieve an object, which may have a local @context. Raise an error if fetching this URL does not result in a JSON object. Normalize each property in the resulting object recursively using this algorithm and with its local @context then remove the local @context property. If the resulting object does not have an @id property, add an @id whose value is the original URL. This object becomes the value of the original object property.
1064 | 5)If the property is an object property with an object value, normalize each property recursively using this algorithm.
1065 | 6)If the property is a natural language property and the value is not already an object, it is turned into an object whose properties are language codes and where the values of those properties are arrays. The suitable language code for the values is determined through the default language; if it can't be determined the language code und must be used.
1066 | 7)If the property is an atomic property that can be a string or an object, normalize to the object form as described for that property.
1067 | Following this normalization process, the @base and @language properties within the @context are no longer relevant; the normalized metadata can have its @context set to http://www.w3.org/ns/csvw.
1068 | """
1069 | model = validate(metadata)
1070 | if model:
1071 | model.normalize()
1072 | return model
1073 |
1074 |
1075 | def merge(meta_sources):
1076 | """
1077 | from highest priority to lowest priority by merging the first two metadata files
1078 | """
1079 | # at first normalize (and validate) the metadata objects
1080 | norm_sources = []
1081 | for s in meta_sources:
1082 | norm = normalize(s)
1083 | if norm:
1084 | norm_sources.append(norm)
1085 | else:
1086 | raise ValidationException('validation failed for metadata: ' + str(s))
1087 |
1088 |
1089 | # then merge them into one object
1090 | A = None
1091 | for m in norm_sources:
1092 | # check if m is a valid metadata object
1093 | if m:
1094 | B = m
1095 | # check if we are in the first iteration
1096 | if not A:
1097 | A = B
1098 | else:
1099 | A.merge(B)
1100 | return A
1101 |
1102 |
1103 |
--------------------------------------------------------------------------------
/pycsvw/metadata_extractor.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import urllib2
3 | import os
4 | import simplejson
5 | import metadata
6 |
7 | __author__ = 'sebastian'
8 | logger = logging.getLogger(__name__)
9 |
10 | # 1. command-line option
11 | # 2. metadata embedded within the tabular data file itself
12 | # 3. metadata in a document linked to using a Link header associated with the tabular data file
13 | HEADER_LINK = ['link', 'Link']
14 | # 4. file-specific metadata in a document located based on the location of the tabular data file
15 | FILE_SPECIFIC_METADATA = '-metadata.json'
16 | # 5. directory-specific metadata in a document located based on the location of the tabular data file
17 | DIRECTORY_METADATA = ['metadata.json', 'csv-metadata.json']
18 |
19 |
20 | def parse_to_json(metadata_handle):
21 | meta_json = simplejson.load(metadata_handle)
22 | # meta = metadata.validate(meta_json)
23 | return meta_json
24 |
25 |
26 | def _parse_header_field(header_field):
27 | raise NotImplementedError()
28 |
29 |
30 | def metadata_extraction(url, metadata_handle, embedded_metadata=False):
31 | meta_sources = []
32 |
33 | # case 1
34 | if metadata_handle is not None:
35 | meta_sources.append(parse_to_json(metadata_handle))
36 |
37 | # case 2
38 | if embedded_metadata:
39 | meta_sources.append(embedded_metadata)
40 |
41 | if url:
42 | # case 3
43 | try:
44 | response = urllib2.urlopen(url)
45 | header = response.info()
46 | if header is not None:
47 | for link in HEADER_LINK:
48 | if link in header:
49 | header_field = header[link]
50 | logger.debug('found link in http header: %s', header_field)
51 | meta_sources.append(_parse_header_field(header_field))
52 | except urllib2.URLError:
53 | pass
54 |
55 | # case 4
56 | try:
57 | meta_url = url + FILE_SPECIFIC_METADATA
58 | response = urllib2.urlopen(meta_url)
59 | if response.getcode() == 200:
60 | logger.debug('found file specific metadata: %s', meta_url)
61 | meta_sources.append(parse_to_json(response))
62 | except urllib2.URLError:
63 | pass
64 |
65 | # case 5
66 | for dir_meta in DIRECTORY_METADATA:
67 | try:
68 | # split away the part after the last slash
69 | directory = url.rsplit('/', 1)[-2]
70 | meta_url = os.path.join(directory, dir_meta)
71 | response = urllib2.urlopen(meta_url)
72 | if response.getcode() == 200:
73 | logger.debug('found directory specific metadata: %s', meta_url)
74 | meta_sources.append(parse_to_json(response))
75 | break
76 | except urllib2.URLError:
77 | pass
78 |
79 | return meta_sources
80 |
--------------------------------------------------------------------------------
/pycsvw/parser_exceptions.py:
--------------------------------------------------------------------------------
1 | __author__ = 'sebastian'
2 |
3 |
4 | class ValidationException(Exception):
5 | pass
6 |
7 | class ParserException(Exception):
8 | pass
--------------------------------------------------------------------------------
/pycsvw/test.py:
--------------------------------------------------------------------------------
1 | import json
2 | import pprint
3 | from cStringIO import StringIO
4 | from pycsvw import CSVW
5 |
6 |
7 | def test():
8 |
9 | t1 = 'GID,On Street,Species,Trim Cycle,Diameter at Breast Ht,Inventory Date,Comments,Protected,KML\n' \
10 | '1,ADDISON AV,Celtis australis,Large Tree Routine Prune,11,10/18/2010,,,"-122.156485,37.440963"\n' \
11 | '2,EMERSON ST,Liquidambar styraciflua,Large Tree Routine Prune,11,6/2/2010,,,"-122.156749,37.440958"\n' \
12 | '6,ADDISON AV,Robinia pseudoacacia,Large Tree Routine Prune,29,6/1/2010,cavity or decay; trunk decay; codominant leaders; included bark; large leader or limb decay; previous failure root damage; root decay; beware of BEES,YES,"-122.156299,37.441151"'
13 |
14 | m1_dict = {
15 | "@context": ["http://www.w3.org/ns/csvw", {"@language": "en"}],
16 | "@id": "http://example.org/tree-ops-ext",
17 | "url": "tree-ops-ext.csv",
18 | "dc:title": "Tree Operations",
19 | "dcat:keyword": ["tree", "street", "maintenance"],
20 | "dc:publisher": [{
21 | "schema:name": "Example Municipality",
22 | "schema:url": {"@id": "http://example.org"}
23 | }],
24 | "dc:license": {"@id": "http://opendefinition.org/licenses/cc-by/"},
25 | "dc:modified": {"@value": "2010-12-31", "@type": "xsd:date"},
26 | "notes": [{
27 | "@type": "oa:Annotation",
28 | "oa:hasTarget": {"@id": "http://example.org/tree-ops-ext"},
29 | "oa:hasBody": {
30 | "@type": "oa:EmbeddedContent",
31 | "rdf:value": "This is a very interesting comment about the table; it's a table!",
32 | "dc:format": {"@value": "text/plain"}
33 | }
34 | }],
35 | "dialect": {"trim": True},
36 | "tableSchema": {
37 | "columns": [{
38 | "name": "GID",
39 | "titles": [
40 | "GID",
41 | "Generic Identifier"
42 | ],
43 | "dc:description": "An identifier for the operation on a tree.",
44 | "datatype": "string",
45 | "required": True,
46 | "suppressOutput": True
47 | }, {
48 | "name": "on_street",
49 | "titles": "On Street",
50 | "dc:description": "The street that the tree is on.",
51 | "datatype": "string"
52 | }, {
53 | "name": "species",
54 | "titles": "Species",
55 | "dc:description": "The species of the tree.",
56 | "datatype": "string"
57 | }, {
58 | "name": "trim_cycle",
59 | "titles": "Trim Cycle",
60 | "dc:description": "The operation performed on the tree.",
61 | "datatype": "string",
62 | "lang": "en"
63 | }, {
64 | "name": "dbh",
65 | "titles": "Diameter at Breast Ht",
66 | "dc:description": "Diameter at Breast Height (DBH) of the tree (in feet), measured 4.5ft above ground.",
67 | "datatype": "integer"
68 | }, {
69 | "name": "inventory_date",
70 | "titles": "Inventory Date",
71 | "dc:description": "The date of the operation that was performed.",
72 | "datatype": {"base": "date", "format": "M/d/yyyy"}
73 | }, {
74 | "name": "comments",
75 | "titles": "Comments",
76 | "dc:description": "Supplementary comments relating to the operation or tree.",
77 | "datatype": "string",
78 | "separator": ";"
79 | }, {
80 | "name": "protected",
81 | "titles": "Protected",
82 | "dc:description": "Indication (YES / NO) whether the tree is subject to a protection order.",
83 | "datatype": {"base": "boolean", "format": "YES|NO"},
84 | "default": "NO"
85 | }, {
86 | "name": "kml",
87 | "titles": "KML",
88 | "dc:description": "KML-encoded description of tree location.",
89 | "datatype": "xml"
90 | }],
91 | "primaryKey": "GID",
92 | "aboutUrl": "http://example.org/tree-ops-ext#gid-{GID}"
93 | }
94 | }
95 | m1 = StringIO(json.dumps(m1_dict))
96 | f = StringIO(t1)
97 | csvw = CSVW(handle=f, metadata_handle=m1, url='http://example.org/tree-ops-ext.csv')
98 | for col in csvw.table.columns:
99 | pprint.pprint(col.name)
100 | pprint.pprint(col.titles)
101 | pprint.pprint(col.cells)
102 | for c in col.cells:
103 | pprint.pprint(c.value)
104 | pprint.pprint(csvw.table.rows)
105 |
106 | pprint.pprint(csvw.metadata.json())
107 |
108 | csvw.to_json()
109 |
110 |
111 |
112 | if __name__ == '__main__':
113 | test()
--------------------------------------------------------------------------------
/pycsvw/testdata/csvw-template/LICENSE:
--------------------------------------------------------------------------------
1 | Creative Commons Legal Code
2 |
3 | CC0 1.0 Universal
4 |
5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
12 | HEREUNDER.
13 |
14 | Statement of Purpose
15 |
16 | The laws of most jurisdictions throughout the world automatically confer
17 | exclusive Copyright and Related Rights (defined below) upon the creator
18 | and subsequent owner(s) (each and all, an "owner") of an original work of
19 | authorship and/or a database (each, a "Work").
20 |
21 | Certain owners wish to permanently relinquish those rights to a Work for
22 | the purpose of contributing to a commons of creative, cultural and
23 | scientific works ("Commons") that the public can reliably and without fear
24 | of later claims of infringement build upon, modify, incorporate in other
25 | works, reuse and redistribute as freely as possible in any form whatsoever
26 | and for any purposes, including without limitation commercial purposes.
27 | These owners may contribute to the Commons to promote the ideal of a free
28 | culture and the further production of creative, cultural and scientific
29 | works, or to gain reputation or greater distribution for their Work in
30 | part through the use and efforts of others.
31 |
32 | For these and/or other purposes and motivations, and without any
33 | expectation of additional consideration or compensation, the person
34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she
35 | is an owner of Copyright and Related Rights in the Work, voluntarily
36 | elects to apply CC0 to the Work and publicly distribute the Work under its
37 | terms, with knowledge of his or her Copyright and Related Rights in the
38 | Work and the meaning and intended legal effect of CC0 on those rights.
39 |
40 | 1. Copyright and Related Rights. A Work made available under CC0 may be
41 | protected by copyright and related or neighboring rights ("Copyright and
42 | Related Rights"). Copyright and Related Rights include, but are not
43 | limited to, the following:
44 |
45 | i. the right to reproduce, adapt, distribute, perform, display,
46 | communicate, and translate a Work;
47 | ii. moral rights retained by the original author(s) and/or performer(s);
48 | iii. publicity and privacy rights pertaining to a person's image or
49 | likeness depicted in a Work;
50 | iv. rights protecting against unfair competition in regards to a Work,
51 | subject to the limitations in paragraph 4(a), below;
52 | v. rights protecting the extraction, dissemination, use and reuse of data
53 | in a Work;
54 | vi. database rights (such as those arising under Directive 96/9/EC of the
55 | European Parliament and of the Council of 11 March 1996 on the legal
56 | protection of databases, and under any national implementation
57 | thereof, including any amended or successor version of such
58 | directive); and
59 | vii. other similar, equivalent or corresponding rights throughout the
60 | world based on applicable law or treaty, and any national
61 | implementations thereof.
62 |
63 | 2. Waiver. To the greatest extent permitted by, but not in contravention
64 | of, applicable law, Affirmer hereby overtly, fully, permanently,
65 | irrevocably and unconditionally waives, abandons, and surrenders all of
66 | Affirmer's Copyright and Related Rights and associated claims and causes
67 | of action, whether now known or unknown (including existing as well as
68 | future claims and causes of action), in the Work (i) in all territories
69 | worldwide, (ii) for the maximum duration provided by applicable law or
70 | treaty (including future time extensions), (iii) in any current or future
71 | medium and for any number of copies, and (iv) for any purpose whatsoever,
72 | including without limitation commercial, advertising or promotional
73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
74 | member of the public at large and to the detriment of Affirmer's heirs and
75 | successors, fully intending that such Waiver shall not be subject to
76 | revocation, rescission, cancellation, termination, or any other legal or
77 | equitable action to disrupt the quiet enjoyment of the Work by the public
78 | as contemplated by Affirmer's express Statement of Purpose.
79 |
80 | 3. Public License Fallback. Should any part of the Waiver for any reason
81 | be judged legally invalid or ineffective under applicable law, then the
82 | Waiver shall be preserved to the maximum extent permitted taking into
83 | account Affirmer's express Statement of Purpose. In addition, to the
84 | extent the Waiver is so judged Affirmer hereby grants to each affected
85 | person a royalty-free, non transferable, non sublicensable, non exclusive,
86 | irrevocable and unconditional license to exercise Affirmer's Copyright and
87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the
88 | maximum duration provided by applicable law or treaty (including future
89 | time extensions), (iii) in any current or future medium and for any number
90 | of copies, and (iv) for any purpose whatsoever, including without
91 | limitation commercial, advertising or promotional purposes (the
92 | "License"). The License shall be deemed effective as of the date CC0 was
93 | applied by Affirmer to the Work. Should any part of the License for any
94 | reason be judged legally invalid or ineffective under applicable law, such
95 | partial invalidity or ineffectiveness shall not invalidate the remainder
96 | of the License, and in such case Affirmer hereby affirms that he or she
97 | will not (i) exercise any of his or her remaining Copyright and Related
98 | Rights in the Work or (ii) assert any associated claims and causes of
99 | action with respect to the Work, in either case contrary to Affirmer's
100 | express Statement of Purpose.
101 |
102 | 4. Limitations and Disclaimers.
103 |
104 | a. No trademark or patent rights held by Affirmer are waived, abandoned,
105 | surrendered, licensed or otherwise affected by this document.
106 | b. Affirmer offers the Work as-is and makes no representations or
107 | warranties of any kind concerning the Work, express, implied,
108 | statutory or otherwise, including without limitation warranties of
109 | title, merchantability, fitness for a particular purpose, non
110 | infringement, or the absence of latent or other defects, accuracy, or
111 | the present or absence of errors, whether or not discoverable, all to
112 | the greatest extent permissible under applicable law.
113 | c. Affirmer disclaims responsibility for clearing rights of other persons
114 | that may apply to the Work or any use thereof, including without
115 | limitation any person's Copyright and Related Rights in the Work.
116 | Further, Affirmer disclaims responsibility for obtaining any necessary
117 | consents, permissions or other rights required for any use of the
118 | Work.
119 | d. Affirmer understands and acknowledges that Creative Commons is not a
120 | party to this document and has no duty or obligation with respect to
121 | this CC0 or use of the Work.
122 |
--------------------------------------------------------------------------------
/pycsvw/testdata/csvw-template/README.md:
--------------------------------------------------------------------------------
1 | This is a simple example of using [CSV on the Web] to document the semantics
2 | of a CSV file. Fork it, and change it for your CSV data. With apologies
3 | to [Dan Bricklin].
4 |
5 | [CSV on the Web]: http://www.w3.org/2013/csvw/wiki/Main_Page
6 | [Dan Bricklin]: https://en.wikipedia.org/wiki/Dan_Bricklin
7 |
--------------------------------------------------------------------------------
/pycsvw/testdata/csvw-template/example.csv:
--------------------------------------------------------------------------------
1 | isbn,title,author
2 | "0470402377","Bricklin on Technology","Dan Bricklin"
3 |
--------------------------------------------------------------------------------
/pycsvw/testdata/csvw-template/example.csv-metadata.json:
--------------------------------------------------------------------------------
1 | {
2 | "@context": {
3 | "@vocab": "http://www.w3.org/ns/csvw#",
4 | "dc": "http://purl.org/dc/terms/"
5 | },
6 | "@type": "Table",
7 | "url": "example.csv",
8 | "dc:creator": "Dan Bricklin",
9 | "dc:title": "My Spreadsheet",
10 | "dc:modified": "2014-05-09T15:44:58Z",
11 | "dc:publisher": "My Books",
12 | "tableSchema": {
13 | "aboutUrl": "http://librarything.com/isbn/{isbn}",
14 | "primaryKey": "isbn",
15 | "columns": [
16 | {
17 | "name": "isbn",
18 | "titles": "ISBN-10",
19 | "datatype": "string",
20 | "unique": true,
21 | "propertyUrl": "http://purl.org/dc/terms/identifier"
22 | },
23 | {
24 | "name": "title",
25 | "titles": "Book Title",
26 | "datatype": "string",
27 | "propertyUrl": "http://purl.org/dc/terms/title"
28 | },
29 | {
30 | "name": "author",
31 | "titles": "Book Author",
32 | "datatype": "string",
33 | "propertyUrl": "http://purl.org/dc/terms/creator"
34 | }
35 | ]
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/pycsvw/testdata/test124-user-metadata.json:
--------------------------------------------------------------------------------
1 | {
2 | "@context": "http://www.w3.org/ns/csvw",
3 | "rdfs:comment": "If not validating, and one schema has a name property but not a titles property, and the other has a titles property but not a name property.",
4 | "url": "tree-ops.csv",
5 | "tableSchema": {
6 | "columns": [
7 | {"name": "GID1"},
8 | {"name": "on_street1"},
9 | {"name": "species1"},
10 | {"name": "trim_cycle1"},
11 | {"name": "inventory_date1"}
12 | ]
13 | }
14 | }
--------------------------------------------------------------------------------
/pycsvw/testdata/test125-metadata.json:
--------------------------------------------------------------------------------
1 | {
2 | "@context": "http://www.w3.org/ns/csvw",
3 | "rdfs:comment": "If the column required annotation is true, add an error to the list of errors for the cell.",
4 | "tables": [{
5 | "url": "test125.csv",
6 | "tableSchema": {
7 | "columns": [{
8 | "name": "countryCode",
9 | "titles": "countryCode",
10 | "required": true
11 | }, {
12 | "name": "latitude",
13 | "titles": "latitude",
14 | "datatype": "number",
15 | "required": true
16 | }, {
17 | "name": "longitude",
18 | "titles": "longitude",
19 | "datatype": "number",
20 | "required": true
21 | }, {
22 | "name": "name",
23 | "titles": "name",
24 | "datatype": "string",
25 | "required": true
26 | }]
27 | }
28 | }]
29 | }
30 |
--------------------------------------------------------------------------------
/pycsvw/testdata/test125.csv:
--------------------------------------------------------------------------------
1 | countryCode,latitude,longitude,name
2 | AD,42.546245,1.601554,Andorra
3 | AE,23.424076,53.847818,"United Arab Emirates"
4 | AF,,67.709953,Afghanistan
5 |
--------------------------------------------------------------------------------
/pycsvw/testdata/test234-metadata.json:
--------------------------------------------------------------------------------
1 | {
2 | "@context": "http://www.w3.org/ns/csvw",
3 | "rdfs:comment": "Validators MUST raise errors if there is more than one row with the same primary key.",
4 | "rdfs:label": "multiple column primaryKey violation",
5 | "url": "test234.csv",
6 | "tableSchema": {
7 | "columns": [
8 | {"name": "PK1", "titles": "PK1"},
9 | {"name": "PK2", "titles": "PK2"}
10 | ],
11 | "primaryKey": ["PK1", "PK2"]
12 | }
13 | }
--------------------------------------------------------------------------------
/pycsvw/testdata/test234.csv:
--------------------------------------------------------------------------------
1 | PK1,PK2
2 | foo,bar
3 | foo,bar
4 |
--------------------------------------------------------------------------------
/pycsvw/testdata/tree-ops.csv:
--------------------------------------------------------------------------------
1 | GID,On Street,Species,Trim Cycle,Inventory Date
2 | 1,ADDISON AV,Celtis australis,Large Tree Routine Prune,10/18/2010
3 | 2,EMERSON ST,Liquidambar styraciflua,Large Tree Routine Prune,6/2/2010
4 |
--------------------------------------------------------------------------------
/pycsvw/testdata/validate-result-missing-column.txt:
--------------------------------------------------------------------------------
1 | Column: GID1 defined in schema, but not found in csv table!
2 | Column: on_street1 defined in schema, but not found in csv table!
3 | Column: species1 defined in schema, but not found in csv table!
4 | Column: trim_cycle1 defined in schema, but not found in csv table!
5 | Column: inventory_date1 defined in schema, but not found in csv table!
6 |
7 |
--------------------------------------------------------------------------------
/pycsvw/testdata/validate-result-primary-key-fail.txt:
--------------------------------------------------------------------------------
1 | Error in Row 2: duplicated value: ('foo', 'bar') for primary key columns: ['PK1', 'PK2']
2 |
--------------------------------------------------------------------------------
/pycsvw/testdata/validate-result-required-fail.txt:
--------------------------------------------------------------------------------
1 | Error in Cell (Row 3 Column 2): Column latitude is required!
2 |
3 |
--------------------------------------------------------------------------------
/pycsvw/validator.py:
--------------------------------------------------------------------------------
1 | import ntpath
2 | import simplejson
3 | import csv_parser
4 |
5 | def validate_file(csv_file_path, schema_file_path):
6 | csv_handle = open(csv_file_path, 'rb')
7 | csv_file_name = ntpath.basename(csv_file_path)
8 | schema_handle = open(schema_file_path, 'rb')
9 | return validate_handle(csv_handle, csv_file_name, schema_handle)
10 |
11 |
12 | def validate_handle(csv_handle, csv_file_name, schema_handle):
13 | table, embedded_schema = csv_parser.parse(csv_handle, None)
14 | schema = simplejson.load(schema_handle)
15 | tableSchema = None
16 | if "tables" in schema:
17 | talbes = schema["tables"]
18 | for i, current_table in enumerate(talbes):
19 | if "url" in current_table and current_table["url"] == csv_file_name:
20 | tableSchema = current_table
21 | break
22 | else:
23 | tableSchema = schema
24 |
25 | if not tableSchema:
26 | return (False, "Could not find schema for table %s: " % csv_file_name )
27 |
28 | valid, error_message = validate_columns_name(embedded_schema, tableSchema)
29 | if valid:
30 | return validate_table_data(table, tableSchema)
31 | else:
32 | return valid, error_message
33 |
34 | def validate_columns_name(embedded_schema, schema):
35 | columns_in_table = embedded_schema["tableSchema"]["columns"]
36 | columns_in_schema = schema["tableSchema"]["columns"]
37 |
38 | valid = True;
39 | error_message = ""
40 | if len(columns_in_schema) != len(columns_in_table):
41 | error_message += "Column number mismatch! Csv has %s columns, but schema has %s columns.\n" % (len(columns_in_table), len(columns_in_schema))
42 | return (False, error_message)
43 |
44 | for i, column in enumerate(columns_in_schema):
45 | if "name" in column and not column["name"] in columns_in_table[i]["titles"]:
46 | error_message += "Column: %s defined in schema, but not found in csv table!\n" % column["name"]
47 | valid = False
48 |
49 | return (valid, error_message)
50 |
51 | def validate_table_data(table, schema):
52 | table_schema = schema["tableSchema"]
53 | columns_in_schema = table_schema["columns"]
54 |
55 | valid = True;
56 | error_message = ""
57 | pk_value_set = set()
58 |
59 | pk_column_list = list()
60 | if "primaryKey" in table_schema:
61 | pk_json = table_schema["primaryKey"]
62 | if pk_json:
63 | if isinstance(pk_json, list):
64 | pk_column_list = pk_json
65 | else:
66 | pk_column_list.append(pk_json);
67 | pk_column_index_list = get_column_index(columns_in_schema, pk_column_list)
68 |
69 | for row in table.rows:
70 | # check required
71 | for i, cell in enumerate(row.cells):
72 | if not cell.value:
73 | column = columns_in_schema[i]
74 | if "required" in column and column["required"]==True:
75 | error_message += "Error in %s: Column %s is required!\n" % (str(cell), column["name"])
76 | valid = False
77 | # check primary key
78 | if len(pk_column_index_list) > 0:
79 | pk_value = concatenate_pk_value(row, pk_column_index_list)
80 | if pk_value in pk_value_set:
81 | valid = False
82 | error_message += "Error in %s: duplicated value: %s for primary key columns: %s\n" % (str(row), pk_value, pk_column_list)
83 | else:
84 | pk_value_set.add(pk_value)
85 |
86 | return (valid, error_message)
87 |
88 | def concatenate_pk_value(row, pk_column_index_list):
89 | value_list = list()
90 | for cell in row.cells:
91 | if cell.column.number in pk_column_index_list:
92 | value_list.append(cell.value)
93 | pk_value_tuple = tuple(value_list)
94 | return pk_value_tuple
95 |
96 | def get_column_index(columns_in_schema, pk_column_list):
97 | pk_column_index_list = list()
98 | for i, column in enumerate(columns_in_schema):
99 | if column["name"] in pk_column_list:
100 | pk_column_index_list.append(i+1)
101 |
102 | return pk_column_index_list
103 |
104 |
105 | def test_validate():
106 | table_path = "F:\WorkRecord\Feature\MCT\CsvSchema\AdyenAcquirerCode.csv"
107 | schema_path = "F:\WorkRecord\Feature\MCT\CsvSchema\AdyenAcquirerCode.schema"
108 | (ret, error_message) = validate_file(table_path, schema_path)
109 | print("Is valid: %s\nError message: \n%s\n" % (ret, error_message))
110 |
111 |
112 |
113 | if __name__ == '__main__':
114 | test_validate()
115 |
116 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | setup(
4 | name='pycsvw',
5 | version='0.1',
6 | packages=find_packages(),
7 | install_requires=[
8 | 'language_tags',
9 | 'rdflib',
10 | 'uritemplate'
11 | ],
12 | url='https://github.com/sebneu/csvw-parser',
13 | license='',
14 | author='Sebastian Neumaier',
15 | author_email='sebastian.neumaier@wu.ac.at',
16 | description='Python implementation of the W3C CSV on the Web specification, cf. http://w3c.github.io/csvw/.'
17 | )
18 |
--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'sebastian'
2 |
--------------------------------------------------------------------------------
/test/csvw_implementation_report.py:
--------------------------------------------------------------------------------
1 | from csvw_validation_test_cases import implementation_report
2 |
3 | __author__ = 'sebastian'
4 | import rdflib
5 | from rdflib.namespace import FOAF
6 | EARL = rdflib.Namespace("http://www.w3.org/ns/earl#")
7 |
8 |
9 |
10 | class ImplementationReport():
11 | def __init__(self):
12 | self.g = rdflib.Graph()
13 | self.g.parse(location='test/doap.ttl', format='turtle')
14 | for person in self.g.subjects(rdflib.RDF.type, FOAF.Person):
15 | self.assertor = person
16 | break
17 | for subj in self.g.subjects(rdflib.RDF.type, EARL.TestSubject):
18 | self.subject = subj
19 | break
20 |
21 | def run_validation_test(self):
22 | implementation_report(self.g, self.subject, self.assertor)
23 |
24 | def getResult(self):
25 | return self.g.serialize(format='turtle')
26 |
27 |
28 | if __name__ == '__main__':
29 | rep = ImplementationReport()
30 | rep.run_validation_test()
31 | res = rep.getResult()
32 | print res
33 | with open('earl.ttl', 'w') as f:
34 | f.write(res)
35 |
--------------------------------------------------------------------------------
/test/csvw_json_test_cases.py:
--------------------------------------------------------------------------------
1 | import urlparse
2 | import traceback
3 | import unittest
4 | import json
5 | import os
6 | parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
7 | os.sys.path.insert(0,parentdir)
8 | from pycsvw import CSVW
9 | import urllib2
10 |
11 | MAX_TESTS = 5
12 | MANIFEST = 'http://w3c.github.io/csvw/tests/manifest-json.jsonld'
13 | BASE = 'http://w3c.github.io/csvw/tests/'
14 | TYPES = {
15 | 'csvt:ToJsonTest': True,
16 | 'csvt:ToJsonTestWithWarnings': True,
17 | 'csvt:NegativeJsonTest': False
18 | }
19 |
20 |
21 | def get_manifest():
22 | response = urllib2.urlopen(MANIFEST)
23 | return json.loads(response.read())
24 |
25 |
26 | class CSVWJSONTestCases(unittest.TestCase):
27 | pass
28 |
29 |
30 | def test_generator(csv_file, result_url, implicit, type, option):
31 | def test(self):
32 | metadata = None
33 | if 'metadata' in option:
34 | metadata = option['metadata']
35 |
36 | try:
37 | csvw = CSVW(csv_file, metadata_url=metadata)
38 | except Exception as e:
39 | # this should be a negative test
40 | if TYPES[type]:
41 | traceback.print_exc()
42 | self.assertFalse(TYPES[type])
43 | return
44 |
45 | # if we get here this should be a positive test
46 | self.assertTrue(TYPES[type])
47 |
48 | # if we can parse it we should at least produce some embedded metadata
49 | self.assertNotEqual(csvw.metadata, None)
50 | # and the result should exists
51 | self.assertNotEqual(result_url, None)
52 |
53 | # test the json result
54 |
55 | resp = urllib2.urlopen(result_url)
56 | result = json.loads(resp.read())
57 | self.assertEqual(csvw.to_json(), result)
58 |
59 | return test
60 |
61 |
62 |
63 | if __name__ == '__main__':
64 | manifest = get_manifest()
65 | for i, t in enumerate(manifest['entries']):
66 | test_name = 'test ' + t['type'] + ': ' + t['name']
67 | csv_file = t['action']
68 | csv_file = urlparse.urljoin(BASE, csv_file)
69 |
70 | result = None
71 | if 'result' in t:
72 | result = urlparse.urljoin(BASE, t['result'])
73 |
74 | implicit = []
75 | if 'implicit' in t:
76 | for f in t['implicit']:
77 | implicit.append(urlparse.urljoin(BASE, f))
78 |
79 | if 'metadata' in t['option']:
80 | t['option']['metadata'] = urlparse.urljoin(BASE, t['option']['metadata'])
81 |
82 | test = test_generator(csv_file, result, implicit, t['type'], t['option'])
83 | setattr(CSVWJSONTestCases, test_name, test)
84 |
85 | if i > MAX_TESTS:
86 | break
87 |
88 | unittest.main()
89 |
--------------------------------------------------------------------------------
/test/csvw_validation_single_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import urlparse
3 | from csvw_validation_test_cases import get_manifest, BASE, test_generator, CSVWValidationTestCases, test_generator_metadata
4 |
5 | __author__ = 'sebastian'
6 |
7 | if __name__ == '__main__':
8 | test_no = input('Test No.: ')
9 | test_id = '#test' + str(test_no).zfill(3)
10 | manifest = get_manifest()
11 | for i, t in enumerate(manifest['entries']):
12 | if t['id'].endswith(test_id):
13 | test_name = ' '.join(['test', t['id'], t['type'], t['name']])
14 | action_url = t['action']
15 | action_url = urlparse.urljoin(BASE, action_url)
16 | implicit = []
17 | if 'implicit' in t:
18 | for f in t['implicit']:
19 | implicit.append(urlparse.urljoin(BASE, f))
20 |
21 | if 'metadata' in t['option']:
22 | t['option']['metadata'] = urlparse.urljoin(BASE, t['option']['metadata'])
23 |
24 | if action_url.endswith('.csv'):
25 | test = test_generator(action_url, implicit, t['type'], t['option'])
26 | setattr(CSVWValidationTestCases, test_name, test)
27 | elif action_url.endswith('.json'):
28 | test = test_generator_metadata(action_url, implicit, t['type'], t['option'])
29 | setattr(CSVWValidationTestCases, test_name, test)
30 | break
31 |
32 | unittest.main()
33 |
--------------------------------------------------------------------------------
/test/csvw_validation_test_cases.py:
--------------------------------------------------------------------------------
1 | import urlparse
2 | import traceback
3 | import unittest
4 | import json
5 | from StringIO import StringIO
6 | import datetime
7 | import rdflib
8 | import os
9 | parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
10 | os.sys.path.insert(0,parentdir)
11 | from pycsvw import metadata
12 | from pycsvw.main import CSVW
13 | import urllib2
14 |
15 | MAX_TESTS = -1
16 | MANIFEST = 'http://w3c.github.io/csvw/tests/manifest-validation.jsonld'
17 | BASE = 'http://w3c.github.io/csvw/tests/'
18 | TYPES = {
19 | 'csvt:WarningValidationTest': True,
20 | 'csvt:PositiveValidationTest': True,
21 | 'csvt:NegativeValidationTest': False
22 | }
23 |
24 |
25 | def get_manifest():
26 | response = urllib2.urlopen(MANIFEST)
27 | return json.loads(response.read())
28 |
29 |
30 | class CSVWValidationTestCases(unittest.TestCase):
31 | pass
32 |
33 |
34 | def test_generator(csv_url, implicit, type, option):
35 | def test(self):
36 | metadata = option.get('metadata')
37 |
38 | try:
39 | csvw = CSVW(url=csv_url, metadata_url=metadata)
40 | except Exception as e:
41 | # this should be a negative test
42 | if TYPES[type]:
43 | raise e
44 | self.assertFalse(TYPES[type])
45 | return
46 |
47 | # if we get here this should be a positive test
48 | self.assertTrue(TYPES[type])
49 |
50 | # if we can parse it we should at least produce a table and some embedded metadata
51 | self.assertNotEqual(csvw.table, None)
52 | self.assertNotEqual(csvw.metadata, None)
53 |
54 | result_table = csvw.table
55 | result_meta = csvw.metadata.json()
56 |
57 | return test
58 |
59 | def test_generator_metadata(metadata_url, implicit, type, option):
60 | def test(self):
61 | csv_url = None
62 | if implicit:
63 | for url in implicit:
64 | if url.endswith('.csv'):
65 | csv_url = url
66 | break
67 |
68 | if csv_url:
69 | try:
70 | csvw = CSVW(url=csv_url, metadata_url=metadata_url)
71 | except Exception as e:
72 | # this should be a negative test
73 | if TYPES[type]:
74 | raise
75 | self.assertFalse(TYPES[type])
76 | return
77 | self.assertTrue(TYPES[type])
78 | self.assertNotEqual(csvw.table, None)
79 | self.assertNotEqual(csvw.metadata, None)
80 |
81 | result_table = csvw.table
82 | result_meta = csvw.metadata.json()
83 |
84 | else:
85 | try:
86 | url_resp = urllib2.urlopen(metadata_url)
87 | handle = StringIO(url_resp.read())
88 | meta = json.load(handle)
89 | meta_model = metadata.normalize(meta)
90 | except Exception as e:
91 | if TYPES[type]:
92 | raise e
93 | self.assertFalse(TYPES[type])
94 | return
95 | self.assertTrue(TYPES[type])
96 |
97 | self.assertNotEqual(meta_model, None)
98 | result_meta = meta_model.json()
99 |
100 | return test
101 |
102 |
103 |
104 |
105 | def get_test_method(i, t):
106 | action_url = t['action']
107 | action_url = urlparse.urljoin(BASE, action_url)
108 | implicit = []
109 | if 'implicit' in t:
110 | for f in t['implicit']:
111 | implicit.append(urlparse.urljoin(BASE, f))
112 |
113 | if 'metadata' in t['option']:
114 | t['option']['metadata'] = urlparse.urljoin(BASE, t['option']['metadata'])
115 |
116 | test = None
117 | if action_url.endswith('.csv'):
118 | test = test_generator(action_url, implicit, t['type'], t['option'])
119 | elif action_url.endswith('.json'):
120 | test = test_generator_metadata(action_url, implicit, t['type'], t['option'])
121 | return test
122 |
123 |
124 | def implementation_report(graph, subject, assertor):
125 | from rdflib.namespace import XSD, DC, FOAF
126 | EARL = rdflib.Namespace("http://www.w3.org/ns/earl#")
127 |
128 | validation_html = "http://www.w3.org/2013/csvw/tests/"
129 | manifest = get_manifest()
130 | for i, t in enumerate(manifest['entries']):
131 | # add the properties for a test case
132 | assertion = rdflib.BNode()
133 | graph.add( (assertion, rdflib.RDF.type, EARL.Assertion) )
134 | graph.add( (assertion, EARL.assertedBy, assertor) )
135 | graph.add( (assertion, EARL.subject, subject) )
136 | graph.add( (assertion, EARL.test, rdflib.URIRef(validation_html + t['id'])) )
137 | result = rdflib.BNode()
138 | graph.add( (assertion, EARL.result, result) )
139 | graph.add( (result, rdflib.RDF.type, EARL.TestResult) )
140 | graph.add( (result, EARL.mode, EARL.automatic) )
141 |
142 | # TODO edit this hack...
143 | # run test case
144 | test_name = 'tmp'
145 | test = get_test_method(i, t)
146 | setattr(CSVWValidationTestCases, test_name, test)
147 |
148 | suite = unittest.TestSuite()
149 | suite.addTest(CSVWValidationTestCases(test_name))
150 | runner = unittest.TextTestRunner()
151 | test_result = runner.run(suite)
152 |
153 | delattr(CSVWValidationTestCases, test_name)
154 |
155 | # check for failures
156 | if len(test_result.failures) == 0:
157 | outcome = EARL.passed
158 | else:
159 | outcome = EARL.failed
160 | graph.add( (result, EARL.outcome, outcome) )
161 |
162 | # add timestamp
163 | now = datetime.datetime.now().isoformat()
164 | graph.add( (result, DC.date, rdflib.Literal(now, datatype=XSD.date)))
165 |
166 |
167 | if __name__ == '__main__':
168 | manifest = get_manifest()
169 | for i, t in enumerate(manifest['entries']):
170 | test_name = ' '.join(['test', t['id'], t['type'], t['name']])
171 | meth = get_test_method(i, t)
172 | if meth:
173 | setattr(CSVWValidationTestCases, test_name, meth)
174 | if 0 < MAX_TESTS < i:
175 | break
176 |
177 | unittest.main()
178 |
--------------------------------------------------------------------------------
/test/doap.ttl:
--------------------------------------------------------------------------------
1 | @prefix rdf: .
2 | @prefix rdfs: .
3 | @prefix dc: .
4 | @prefix earl: .
5 | @prefix foaf: .
6 | @prefix doap: .
7 |
8 | a doap:Project, earl:TestSubject, earl:Software ;
9 | doap:name "pycsvw" ;
10 | doap:description "Python implementation of the W3C CSV on the Web specification, cf. http://w3c.github.io/csvw/" ;
11 | doap:homepage ;
12 | doap:programming-language "Python" ;
13 | doap:implements ,
14 | ;
15 | doap:download-page ;
16 | doap:developer _:sebneu ;
17 | doap:maintainer _:sebneu ;
18 | doap:documenter _:sebneu ;
19 | foaf:maker _:sebneu ;
20 | dc:title "pycsvw" ;
21 | dc:creator _:sebneu .
22 |
23 | _:sebneu a foaf:Person, earl:Assertor;
24 | foaf:name "Sebastian Neumaier";
25 | foaf:homepage ;
26 | foaf:title "Implementor".
--------------------------------------------------------------------------------
/test/errors_10-12-15.txt:
--------------------------------------------------------------------------------
1 | ======================================================================
2 | ERROR: test manifest-validation#test046 csvt:WarningValidationTest invalid dataype (__main__.CSVWValidationTestCases)
3 | ----------------------------------------------------------------------
4 | Traceback (most recent call last):
5 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 64, in test
6 | csvw = CSVW(url=csv_url, metadata_url=metadata_url)
7 | File "/home/sebastian/Repositories/csvw-parser/csvwparser/main.py", line 50, in __init__
8 | self.metadata = metadata.merge(sources)
9 | File "/home/sebastian/Repositories/csvw-parser/csvwparser/metadata.py", line 1064, in merge
10 | raise ValidationException('validation failed for metadata: ' + str(s))
11 | ValidationException: validation failed for metadata: {u'datatype': u'anySimpleType', u'@context': u'http://www.w3.org/ns/csvw', u'tables': [{u'url': u'test046.csv', u'tableSchema': {u'columns': [{u'titles': u'null'}, {u'titles': u'lang'}, {u'titles': u'textDirection'}, {u'titles': u'separator'}, {u'titles': u'ordered'}, {u'titles': u'default'}, {u'titles': u'datatype'}, {u'titles': u'aboutUrl'}, {u'titles': u'propertyUrl'}, {u'titles': u'valueUrl'}]}}]}
12 |
13 | ======================================================================
14 | ERROR: test manifest-validation#test096 csvt:WarningValidationTest inconsistent array values: columns (__main__.CSVWValidationTestCases)
15 | ----------------------------------------------------------------------
16 | Traceback (most recent call last):
17 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 64, in test
18 | csvw = CSVW(url=csv_url, metadata_url=metadata_url)
19 | File "/home/sebastian/Repositories/csvw-parser/csvwparser/main.py", line 50, in __init__
20 | self.metadata = metadata.merge(sources)
21 | File "/home/sebastian/Repositories/csvw-parser/csvwparser/metadata.py", line 1064, in merge
22 | raise ValidationException('validation failed for metadata: ' + str(s))
23 | ValidationException: validation failed for metadata: {u'url': u'tree-ops.csv', u'rdfs:comment': u'last column is datatype, not column', u'tableSchema': {u'columns': [{u'titles': u'GID', u'name': u'GID'}, {u'titles': u'On Street', u'name': u'on_street'}, {u'titles': u'Species', u'name': u'species'}, {u'titles': u'Trim Cycle', u'name': u'trim_cycle'}, {u'titles': u'Inventory Date', u'name': u'inventory_date'}, 1]}}
24 |
25 | ======================================================================
26 | ERROR: test manifest-validation#test150 csvt:WarningValidationTest non-builtin datatype (datatype value) (__main__.CSVWValidationTestCases)
27 | ----------------------------------------------------------------------
28 | Traceback (most recent call last):
29 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 64, in test
30 | csvw = CSVW(url=csv_url, metadata_url=metadata_url)
31 | File "/home/sebastian/Repositories/csvw-parser/csvwparser/main.py", line 50, in __init__
32 | self.metadata = metadata.merge(sources)
33 | File "/home/sebastian/Repositories/csvw-parser/csvwparser/metadata.py", line 1064, in merge
34 | raise ValidationException('validation failed for metadata: ' + str(s))
35 | ValidationException: validation failed for metadata: {u'url': u'tree-ops.csv', u'rdfs:comment': u'If the value of this property is a string, it MUST be one of the built-in datatypes', u'tableSchema': {u'columns': [{u'datatype': u'foo', u'titles': u'GID'}, {u'titles': u'On Street'}, {u'titles': u'Species'}, {u'titles': u'Trim Cycle'}, {u'titles': u'Inventory Date'}]}, u'rdfs:label': u'non-builtin datatype (datatype value)'}
36 |
37 | ======================================================================
38 | ERROR: test manifest-validation#test238 csvt:WarningValidationTest datatype value an absolute URL (__main__.CSVWValidationTestCases)
39 | ----------------------------------------------------------------------
40 | Traceback (most recent call last):
41 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 64, in test
42 | csvw = CSVW(url=csv_url, metadata_url=metadata_url)
43 | File "/home/sebastian/Repositories/csvw-parser/csvwparser/main.py", line 50, in __init__
44 | self.metadata = metadata.merge(sources)
45 | File "/home/sebastian/Repositories/csvw-parser/csvwparser/metadata.py", line 1064, in merge
46 | raise ValidationException('validation failed for metadata: ' + str(s))
47 | ValidationException: validation failed for metadata: {u'url': u'test238.csv', u'rdfs:comment': u'If the value of this property is a string, it MUST be one of the built-in datatypes defined in section 5.11.1 Built-in Datatypes or an absolute URL.', u'tableSchema': {u'columns': [{u'datatype': u'http://example.org/datatype', u'titles': u'string'}]}, u'rdfs:label': u'datatype value an absolute URL that does not resolve'}
48 |
49 | ======================================================================
50 | FAIL: test manifest-validation#test081 csvt:NegativeValidationTest invalid dialect @id (__main__.CSVWValidationTestCases)
51 | ----------------------------------------------------------------------
52 | Traceback (most recent call last):
53 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
54 | self.assertTrue(TYPES[type])
55 | AssertionError: False is not true
56 |
57 | ======================================================================
58 | FAIL: test manifest-validation#test082 csvt:NegativeValidationTest invalid template @id (__main__.CSVWValidationTestCases)
59 | ----------------------------------------------------------------------
60 | Traceback (most recent call last):
61 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
62 | self.assertTrue(TYPES[type])
63 | AssertionError: False is not true
64 |
65 | ======================================================================
66 | FAIL: test manifest-validation#test087 csvt:NegativeValidationTest invalid dialect @type (__main__.CSVWValidationTestCases)
67 | ----------------------------------------------------------------------
68 | Traceback (most recent call last):
69 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
70 | self.assertTrue(TYPES[type])
71 | AssertionError: False is not true
72 |
73 | ======================================================================
74 | FAIL: test manifest-validation#test088 csvt:NegativeValidationTest invalid transformation @type (__main__.CSVWValidationTestCases)
75 | ----------------------------------------------------------------------
76 | Traceback (most recent call last):
77 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
78 | self.assertTrue(TYPES[type])
79 | AssertionError: False is not true
80 |
81 | ======================================================================
82 | FAIL: test manifest-validation#test103 csvt:NegativeValidationTest inconsistent link values: url (__main__.CSVWValidationTestCases)
83 | ----------------------------------------------------------------------
84 | Traceback (most recent call last):
85 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
86 | self.assertTrue(TYPES[type])
87 | AssertionError: False is not true
88 |
89 | ======================================================================
90 | FAIL: test manifest-validation#test104 csvt:NegativeValidationTest invalid columnReference (__main__.CSVWValidationTestCases)
91 | ----------------------------------------------------------------------
92 | Traceback (most recent call last):
93 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
94 | self.assertTrue(TYPES[type])
95 | AssertionError: False is not true
96 |
97 | ======================================================================
98 | FAIL: test manifest-validation#test108 csvt:NegativeValidationTest invalid reference (__main__.CSVWValidationTestCases)
99 | ----------------------------------------------------------------------
100 | Traceback (most recent call last):
101 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
102 | self.assertTrue(TYPES[type])
103 | AssertionError: False is not true
104 |
105 | ======================================================================
106 | FAIL: test manifest-validation#test124 csvt:NegativeValidationTest metadata with columns not matching csv titles (__main__.CSVWValidationTestCases)
107 | ----------------------------------------------------------------------
108 | Traceback (most recent call last):
109 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 42, in test
110 | self.assertTrue(TYPES[type])
111 | AssertionError: False is not true
112 |
113 | ======================================================================
114 | FAIL: test manifest-validation#test125 csvt:NegativeValidationTest required column with empty cell (__main__.CSVWValidationTestCases)
115 | ----------------------------------------------------------------------
116 | Traceback (most recent call last):
117 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
118 | self.assertTrue(TYPES[type])
119 | AssertionError: False is not true
120 |
121 | ======================================================================
122 | FAIL: test manifest-validation#test126 csvt:NegativeValidationTest required column with cell matching null (__main__.CSVWValidationTestCases)
123 | ----------------------------------------------------------------------
124 | Traceback (most recent call last):
125 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
126 | self.assertTrue(TYPES[type])
127 | AssertionError: False is not true
128 |
129 | ======================================================================
130 | FAIL: test manifest-validation#test127 csvt:NegativeValidationTest incompatible table (__main__.CSVWValidationTestCases)
131 | ----------------------------------------------------------------------
132 | Traceback (most recent call last):
133 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
134 | self.assertTrue(TYPES[type])
135 | AssertionError: False is not true
136 |
137 | ======================================================================
138 | FAIL: test manifest-validation#test133 csvt:NegativeValidationTest virtual before non-virtual (__main__.CSVWValidationTestCases)
139 | ----------------------------------------------------------------------
140 | Traceback (most recent call last):
141 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
142 | self.assertTrue(TYPES[type])
143 | AssertionError: False is not true
144 |
145 | ======================================================================
146 | FAIL: test manifest-validation#test134 csvt:NegativeValidationTest context in common property (__main__.CSVWValidationTestCases)
147 | ----------------------------------------------------------------------
148 | Traceback (most recent call last):
149 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
150 | self.assertTrue(TYPES[type])
151 | AssertionError: False is not true
152 |
153 | ======================================================================
154 | FAIL: test manifest-validation#test135 csvt:NegativeValidationTest @list value (__main__.CSVWValidationTestCases)
155 | ----------------------------------------------------------------------
156 | Traceback (most recent call last):
157 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
158 | self.assertTrue(TYPES[type])
159 | AssertionError: False is not true
160 |
161 | ======================================================================
162 | FAIL: test manifest-validation#test136 csvt:NegativeValidationTest @set value (__main__.CSVWValidationTestCases)
163 | ----------------------------------------------------------------------
164 | Traceback (most recent call last):
165 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
166 | self.assertTrue(TYPES[type])
167 | AssertionError: False is not true
168 |
169 | ======================================================================
170 | FAIL: test manifest-validation#test137 csvt:NegativeValidationTest @type out of range (as datatype) (__main__.CSVWValidationTestCases)
171 | ----------------------------------------------------------------------
172 | Traceback (most recent call last):
173 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
174 | self.assertTrue(TYPES[type])
175 | AssertionError: False is not true
176 |
177 | ======================================================================
178 | FAIL: test manifest-validation#test138 csvt:NegativeValidationTest @type out of range (as node type) (__main__.CSVWValidationTestCases)
179 | ----------------------------------------------------------------------
180 | Traceback (most recent call last):
181 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
182 | self.assertTrue(TYPES[type])
183 | AssertionError: False is not true
184 |
185 | ======================================================================
186 | FAIL: test manifest-validation#test139 csvt:NegativeValidationTest @type out of range (as node type) - string (__main__.CSVWValidationTestCases)
187 | ----------------------------------------------------------------------
188 | Traceback (most recent call last):
189 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
190 | self.assertTrue(TYPES[type])
191 | AssertionError: False is not true
192 |
193 | ======================================================================
194 | FAIL: test manifest-validation#test140 csvt:NegativeValidationTest @type out of range (as node type) - integer (__main__.CSVWValidationTestCases)
195 | ----------------------------------------------------------------------
196 | Traceback (most recent call last):
197 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
198 | self.assertTrue(TYPES[type])
199 | AssertionError: False is not true
200 |
201 | ======================================================================
202 | FAIL: test manifest-validation#test141 csvt:NegativeValidationTest @id out of range (as node type) - bnode (__main__.CSVWValidationTestCases)
203 | ----------------------------------------------------------------------
204 | Traceback (most recent call last):
205 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
206 | self.assertTrue(TYPES[type])
207 | AssertionError: False is not true
208 |
209 | ======================================================================
210 | FAIL: test manifest-validation#test142 csvt:NegativeValidationTest @value with @language and @type (__main__.CSVWValidationTestCases)
211 | ----------------------------------------------------------------------
212 | Traceback (most recent call last):
213 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
214 | self.assertTrue(TYPES[type])
215 | AssertionError: False is not true
216 |
217 | ======================================================================
218 | FAIL: test manifest-validation#test143 csvt:NegativeValidationTest @value with extra properties (__main__.CSVWValidationTestCases)
219 | ----------------------------------------------------------------------
220 | Traceback (most recent call last):
221 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
222 | self.assertTrue(TYPES[type])
223 | AssertionError: False is not true
224 |
225 | ======================================================================
226 | FAIL: test manifest-validation#test144 csvt:NegativeValidationTest @language outside of @value (__main__.CSVWValidationTestCases)
227 | ----------------------------------------------------------------------
228 | Traceback (most recent call last):
229 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
230 | self.assertTrue(TYPES[type])
231 | AssertionError: False is not true
232 |
233 | ======================================================================
234 | FAIL: test manifest-validation#test145 csvt:NegativeValidationTest @value with invalid @language (__main__.CSVWValidationTestCases)
235 | ----------------------------------------------------------------------
236 | Traceback (most recent call last):
237 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
238 | self.assertTrue(TYPES[type])
239 | AssertionError: False is not true
240 |
241 | ======================================================================
242 | FAIL: test manifest-validation#test146 csvt:NegativeValidationTest Invalid faux-keyword (__main__.CSVWValidationTestCases)
243 | ----------------------------------------------------------------------
244 | Traceback (most recent call last):
245 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
246 | self.assertTrue(TYPES[type])
247 | AssertionError: False is not true
248 |
249 | ======================================================================
250 | FAIL: test manifest-validation#test147 csvt:NegativeValidationTest title incompatible with title on case (__main__.CSVWValidationTestCases)
251 | ----------------------------------------------------------------------
252 | Traceback (most recent call last):
253 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
254 | self.assertTrue(TYPES[type])
255 | AssertionError: False is not true
256 |
257 | ======================================================================
258 | FAIL: test manifest-validation#test148 csvt:NegativeValidationTest title incompatible with title on language (__main__.CSVWValidationTestCases)
259 | ----------------------------------------------------------------------
260 | Traceback (most recent call last):
261 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
262 | self.assertTrue(TYPES[type])
263 | AssertionError: False is not true
264 |
265 | ======================================================================
266 | FAIL: test manifest-validation#test154 csvt:NegativeValidationTest string format (value not matching format) (__main__.CSVWValidationTestCases)
267 | ----------------------------------------------------------------------
268 | Traceback (most recent call last):
269 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
270 | self.assertTrue(TYPES[type])
271 | AssertionError: False is not true
272 |
273 | ======================================================================
274 | FAIL: test manifest-validation#test157 csvt:NegativeValidationTest number format (value not matching format) (__main__.CSVWValidationTestCases)
275 | ----------------------------------------------------------------------
276 | Traceback (most recent call last):
277 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
278 | self.assertTrue(TYPES[type])
279 | AssertionError: False is not true
280 |
281 | ======================================================================
282 | FAIL: test manifest-validation#test160 csvt:NegativeValidationTest number format (not matching values with pattern) (__main__.CSVWValidationTestCases)
283 | ----------------------------------------------------------------------
284 | Traceback (most recent call last):
285 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
286 | self.assertTrue(TYPES[type])
287 | AssertionError: False is not true
288 |
289 | ======================================================================
290 | FAIL: test manifest-validation#test161 csvt:NegativeValidationTest number format (not matching values without pattern) (__main__.CSVWValidationTestCases)
291 | ----------------------------------------------------------------------
292 | Traceback (most recent call last):
293 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
294 | self.assertTrue(TYPES[type])
295 | AssertionError: False is not true
296 |
297 | ======================================================================
298 | FAIL: test manifest-validation#test162 csvt:NegativeValidationTest numeric format (consecutive groupChar) (__main__.CSVWValidationTestCases)
299 | ----------------------------------------------------------------------
300 | Traceback (most recent call last):
301 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
302 | self.assertTrue(TYPES[type])
303 | AssertionError: False is not true
304 |
305 | ======================================================================
306 | FAIL: test manifest-validation#test163 csvt:NegativeValidationTest integer datatype with decimalChar (__main__.CSVWValidationTestCases)
307 | ----------------------------------------------------------------------
308 | Traceback (most recent call last):
309 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
310 | self.assertTrue(TYPES[type])
311 | AssertionError: False is not true
312 |
313 | ======================================================================
314 | FAIL: test manifest-validation#test164 csvt:NegativeValidationTest decimal datatype with exponent (__main__.CSVWValidationTestCases)
315 | ----------------------------------------------------------------------
316 | Traceback (most recent call last):
317 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
318 | self.assertTrue(TYPES[type])
319 | AssertionError: False is not true
320 |
321 | ======================================================================
322 | FAIL: test manifest-validation#test165 csvt:NegativeValidationTest decimal type with NaN (__main__.CSVWValidationTestCases)
323 | ----------------------------------------------------------------------
324 | Traceback (most recent call last):
325 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
326 | self.assertTrue(TYPES[type])
327 | AssertionError: False is not true
328 |
329 | ======================================================================
330 | FAIL: test manifest-validation#test166 csvt:NegativeValidationTest decimal type with INF (__main__.CSVWValidationTestCases)
331 | ----------------------------------------------------------------------
332 | Traceback (most recent call last):
333 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
334 | self.assertTrue(TYPES[type])
335 | AssertionError: False is not true
336 |
337 | ======================================================================
338 | FAIL: test manifest-validation#test167 csvt:NegativeValidationTest decimal type with -INF (__main__.CSVWValidationTestCases)
339 | ----------------------------------------------------------------------
340 | Traceback (most recent call last):
341 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
342 | self.assertTrue(TYPES[type])
343 | AssertionError: False is not true
344 |
345 | ======================================================================
346 | FAIL: test manifest-validation#test169 csvt:NegativeValidationTest invalid decimal (__main__.CSVWValidationTestCases)
347 | ----------------------------------------------------------------------
348 | Traceback (most recent call last):
349 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
350 | self.assertTrue(TYPES[type])
351 | AssertionError: False is not true
352 |
353 | ======================================================================
354 | FAIL: test manifest-validation#test172 csvt:NegativeValidationTest invalid byte (__main__.CSVWValidationTestCases)
355 | ----------------------------------------------------------------------
356 | Traceback (most recent call last):
357 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
358 | self.assertTrue(TYPES[type])
359 | AssertionError: False is not true
360 |
361 | ======================================================================
362 | FAIL: test manifest-validation#test173 csvt:NegativeValidationTest invald unsignedLong (__main__.CSVWValidationTestCases)
363 | ----------------------------------------------------------------------
364 | Traceback (most recent call last):
365 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
366 | self.assertTrue(TYPES[type])
367 | AssertionError: False is not true
368 |
369 | ======================================================================
370 | FAIL: test manifest-validation#test174 csvt:NegativeValidationTest invalid unsignedShort (__main__.CSVWValidationTestCases)
371 | ----------------------------------------------------------------------
372 | Traceback (most recent call last):
373 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
374 | self.assertTrue(TYPES[type])
375 | AssertionError: False is not true
376 |
377 | ======================================================================
378 | FAIL: test manifest-validation#test175 csvt:NegativeValidationTest invalid unsignedByte (__main__.CSVWValidationTestCases)
379 | ----------------------------------------------------------------------
380 | Traceback (most recent call last):
381 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
382 | self.assertTrue(TYPES[type])
383 | AssertionError: False is not true
384 |
385 | ======================================================================
386 | FAIL: test manifest-validation#test176 csvt:NegativeValidationTest invalid positiveInteger (__main__.CSVWValidationTestCases)
387 | ----------------------------------------------------------------------
388 | Traceback (most recent call last):
389 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
390 | self.assertTrue(TYPES[type])
391 | AssertionError: False is not true
392 |
393 | ======================================================================
394 | FAIL: test manifest-validation#test177 csvt:NegativeValidationTest invalid negativeInteger (__main__.CSVWValidationTestCases)
395 | ----------------------------------------------------------------------
396 | Traceback (most recent call last):
397 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
398 | self.assertTrue(TYPES[type])
399 | AssertionError: False is not true
400 |
401 | ======================================================================
402 | FAIL: test manifest-validation#test178 csvt:NegativeValidationTest invalid nonPositiveInteger (__main__.CSVWValidationTestCases)
403 | ----------------------------------------------------------------------
404 | Traceback (most recent call last):
405 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
406 | self.assertTrue(TYPES[type])
407 | AssertionError: False is not true
408 |
409 | ======================================================================
410 | FAIL: test manifest-validation#test179 csvt:NegativeValidationTest invalid nonNegativeInteger (__main__.CSVWValidationTestCases)
411 | ----------------------------------------------------------------------
412 | Traceback (most recent call last):
413 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
414 | self.assertTrue(TYPES[type])
415 | AssertionError: False is not true
416 |
417 | ======================================================================
418 | FAIL: test manifest-validation#test180 csvt:NegativeValidationTest invalid double (__main__.CSVWValidationTestCases)
419 | ----------------------------------------------------------------------
420 | Traceback (most recent call last):
421 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
422 | self.assertTrue(TYPES[type])
423 | AssertionError: False is not true
424 |
425 | ======================================================================
426 | FAIL: test manifest-validation#test181 csvt:NegativeValidationTest invalid number (__main__.CSVWValidationTestCases)
427 | ----------------------------------------------------------------------
428 | Traceback (most recent call last):
429 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
430 | self.assertTrue(TYPES[type])
431 | AssertionError: False is not true
432 |
433 | ======================================================================
434 | FAIL: test manifest-validation#test182 csvt:NegativeValidationTest invalid float (__main__.CSVWValidationTestCases)
435 | ----------------------------------------------------------------------
436 | Traceback (most recent call last):
437 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
438 | self.assertTrue(TYPES[type])
439 | AssertionError: False is not true
440 |
441 | ======================================================================
442 | FAIL: test manifest-validation#test185 csvt:NegativeValidationTest boolean format (value not matching format) (__main__.CSVWValidationTestCases)
443 | ----------------------------------------------------------------------
444 | Traceback (most recent call last):
445 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
446 | self.assertTrue(TYPES[type])
447 | AssertionError: False is not true
448 |
449 | ======================================================================
450 | FAIL: test manifest-validation#test186 csvt:NegativeValidationTest boolean format (not matching datatype) (__main__.CSVWValidationTestCases)
451 | ----------------------------------------------------------------------
452 | Traceback (most recent call last):
453 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
454 | self.assertTrue(TYPES[type])
455 | AssertionError: False is not true
456 |
457 | ======================================================================
458 | FAIL: test manifest-validation#test191 csvt:NegativeValidationTest date format (bad format string) (__main__.CSVWValidationTestCases)
459 | ----------------------------------------------------------------------
460 | Traceback (most recent call last):
461 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
462 | self.assertTrue(TYPES[type])
463 | AssertionError: False is not true
464 |
465 | ======================================================================
466 | FAIL: test manifest-validation#test192 csvt:NegativeValidationTest date format (value not matching format) (__main__.CSVWValidationTestCases)
467 | ----------------------------------------------------------------------
468 | Traceback (most recent call last):
469 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
470 | self.assertTrue(TYPES[type])
471 | AssertionError: False is not true
472 |
473 | ======================================================================
474 | FAIL: test manifest-validation#test194 csvt:NegativeValidationTest duration format (value not matching format) (__main__.CSVWValidationTestCases)
475 | ----------------------------------------------------------------------
476 | Traceback (most recent call last):
477 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
478 | self.assertTrue(TYPES[type])
479 | AssertionError: False is not true
480 |
481 | ======================================================================
482 | FAIL: test manifest-validation#test196 csvt:NegativeValidationTest values with wrong length (__main__.CSVWValidationTestCases)
483 | ----------------------------------------------------------------------
484 | Traceback (most recent call last):
485 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
486 | self.assertTrue(TYPES[type])
487 | AssertionError: False is not true
488 |
489 | ======================================================================
490 | FAIL: test manifest-validation#test197 csvt:NegativeValidationTest values with wrong maxLength (__main__.CSVWValidationTestCases)
491 | ----------------------------------------------------------------------
492 | Traceback (most recent call last):
493 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
494 | self.assertTrue(TYPES[type])
495 | AssertionError: False is not true
496 |
497 | ======================================================================
498 | FAIL: test manifest-validation#test198 csvt:NegativeValidationTest values with wrong minLength (__main__.CSVWValidationTestCases)
499 | ----------------------------------------------------------------------
500 | Traceback (most recent call last):
501 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
502 | self.assertTrue(TYPES[type])
503 | AssertionError: False is not true
504 |
505 | ======================================================================
506 | FAIL: test manifest-validation#test199 csvt:NegativeValidationTest length < minLength (__main__.CSVWValidationTestCases)
507 | ----------------------------------------------------------------------
508 | Traceback (most recent call last):
509 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
510 | self.assertTrue(TYPES[type])
511 | AssertionError: False is not true
512 |
513 | ======================================================================
514 | FAIL: test manifest-validation#test200 csvt:NegativeValidationTest length > maxLength (__main__.CSVWValidationTestCases)
515 | ----------------------------------------------------------------------
516 | Traceback (most recent call last):
517 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
518 | self.assertTrue(TYPES[type])
519 | AssertionError: False is not true
520 |
521 | ======================================================================
522 | FAIL: test manifest-validation#test201 csvt:NegativeValidationTest length on date (__main__.CSVWValidationTestCases)
523 | ----------------------------------------------------------------------
524 | Traceback (most recent call last):
525 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
526 | self.assertTrue(TYPES[type])
527 | AssertionError: False is not true
528 |
529 | ======================================================================
530 | FAIL: test manifest-validation#test203 csvt:NegativeValidationTest float value constraint not matching minimum (__main__.CSVWValidationTestCases)
531 | ----------------------------------------------------------------------
532 | Traceback (most recent call last):
533 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
534 | self.assertTrue(TYPES[type])
535 | AssertionError: False is not true
536 |
537 | ======================================================================
538 | FAIL: test manifest-validation#test204 csvt:NegativeValidationTest float value constraint not matching maximum (__main__.CSVWValidationTestCases)
539 | ----------------------------------------------------------------------
540 | Traceback (most recent call last):
541 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
542 | self.assertTrue(TYPES[type])
543 | AssertionError: False is not true
544 |
545 | ======================================================================
546 | FAIL: test manifest-validation#test205 csvt:NegativeValidationTest float value constraint not matching minInclusive (__main__.CSVWValidationTestCases)
547 | ----------------------------------------------------------------------
548 | Traceback (most recent call last):
549 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
550 | self.assertTrue(TYPES[type])
551 | AssertionError: False is not true
552 |
553 | ======================================================================
554 | FAIL: test manifest-validation#test206 csvt:NegativeValidationTest float value constraint not matching minExclusive (__main__.CSVWValidationTestCases)
555 | ----------------------------------------------------------------------
556 | Traceback (most recent call last):
557 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
558 | self.assertTrue(TYPES[type])
559 | AssertionError: False is not true
560 |
561 | ======================================================================
562 | FAIL: test manifest-validation#test207 csvt:NegativeValidationTest float value constraint not matching maxInclusive (__main__.CSVWValidationTestCases)
563 | ----------------------------------------------------------------------
564 | Traceback (most recent call last):
565 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
566 | self.assertTrue(TYPES[type])
567 | AssertionError: False is not true
568 |
569 | ======================================================================
570 | FAIL: test manifest-validation#test208 csvt:NegativeValidationTest float value constraint not matching maxExclusive (__main__.CSVWValidationTestCases)
571 | ----------------------------------------------------------------------
572 | Traceback (most recent call last):
573 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
574 | self.assertTrue(TYPES[type])
575 | AssertionError: False is not true
576 |
577 | ======================================================================
578 | FAIL: test manifest-validation#test210 csvt:NegativeValidationTest date value constraint not matching minimum (__main__.CSVWValidationTestCases)
579 | ----------------------------------------------------------------------
580 | Traceback (most recent call last):
581 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
582 | self.assertTrue(TYPES[type])
583 | AssertionError: False is not true
584 |
585 | ======================================================================
586 | FAIL: test manifest-validation#test211 csvt:NegativeValidationTest date value constraint not matching maximum (__main__.CSVWValidationTestCases)
587 | ----------------------------------------------------------------------
588 | Traceback (most recent call last):
589 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
590 | self.assertTrue(TYPES[type])
591 | AssertionError: False is not true
592 |
593 | ======================================================================
594 | FAIL: test manifest-validation#test212 csvt:NegativeValidationTest date value constraint not matching minInclusive (__main__.CSVWValidationTestCases)
595 | ----------------------------------------------------------------------
596 | Traceback (most recent call last):
597 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
598 | self.assertTrue(TYPES[type])
599 | AssertionError: False is not true
600 |
601 | ======================================================================
602 | FAIL: test manifest-validation#test213 csvt:NegativeValidationTest date value constraint not matching minExclusive (__main__.CSVWValidationTestCases)
603 | ----------------------------------------------------------------------
604 | Traceback (most recent call last):
605 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
606 | self.assertTrue(TYPES[type])
607 | AssertionError: False is not true
608 |
609 | ======================================================================
610 | FAIL: test manifest-validation#test214 csvt:NegativeValidationTest date value constraint not matching maxInclusive (__main__.CSVWValidationTestCases)
611 | ----------------------------------------------------------------------
612 | Traceback (most recent call last):
613 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
614 | self.assertTrue(TYPES[type])
615 | AssertionError: False is not true
616 |
617 | ======================================================================
618 | FAIL: test manifest-validation#test215 csvt:NegativeValidationTest date value constraint not matching maxExclusive (__main__.CSVWValidationTestCases)
619 | ----------------------------------------------------------------------
620 | Traceback (most recent call last):
621 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
622 | self.assertTrue(TYPES[type])
623 | AssertionError: False is not true
624 |
625 | ======================================================================
626 | FAIL: test manifest-validation#test216 csvt:NegativeValidationTest minInclusive and minExclusive (__main__.CSVWValidationTestCases)
627 | ----------------------------------------------------------------------
628 | Traceback (most recent call last):
629 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
630 | self.assertTrue(TYPES[type])
631 | AssertionError: False is not true
632 |
633 | ======================================================================
634 | FAIL: test manifest-validation#test217 csvt:NegativeValidationTest maxInclusive and maxExclusive (__main__.CSVWValidationTestCases)
635 | ----------------------------------------------------------------------
636 | Traceback (most recent call last):
637 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
638 | self.assertTrue(TYPES[type])
639 | AssertionError: False is not true
640 |
641 | ======================================================================
642 | FAIL: test manifest-validation#test218 csvt:NegativeValidationTest maxInclusive < minInclusive (__main__.CSVWValidationTestCases)
643 | ----------------------------------------------------------------------
644 | Traceback (most recent call last):
645 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
646 | self.assertTrue(TYPES[type])
647 | AssertionError: False is not true
648 |
649 | ======================================================================
650 | FAIL: test manifest-validation#test219 csvt:NegativeValidationTest maxExclusive = minInclusive (__main__.CSVWValidationTestCases)
651 | ----------------------------------------------------------------------
652 | Traceback (most recent call last):
653 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
654 | self.assertTrue(TYPES[type])
655 | AssertionError: False is not true
656 |
657 | ======================================================================
658 | FAIL: test manifest-validation#test220 csvt:NegativeValidationTest maxExclusive < minExclusive (__main__.CSVWValidationTestCases)
659 | ----------------------------------------------------------------------
660 | Traceback (most recent call last):
661 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
662 | self.assertTrue(TYPES[type])
663 | AssertionError: False is not true
664 |
665 | ======================================================================
666 | FAIL: test manifest-validation#test221 csvt:NegativeValidationTest maxInclusive = minExclusive (__main__.CSVWValidationTestCases)
667 | ----------------------------------------------------------------------
668 | Traceback (most recent call last):
669 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
670 | self.assertTrue(TYPES[type])
671 | AssertionError: False is not true
672 |
673 | ======================================================================
674 | FAIL: test manifest-validation#test222 csvt:NegativeValidationTest string datatype with minimum (__main__.CSVWValidationTestCases)
675 | ----------------------------------------------------------------------
676 | Traceback (most recent call last):
677 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
678 | self.assertTrue(TYPES[type])
679 | AssertionError: False is not true
680 |
681 | ======================================================================
682 | FAIL: test manifest-validation#test223 csvt:NegativeValidationTest string datatype with maxium (__main__.CSVWValidationTestCases)
683 | ----------------------------------------------------------------------
684 | Traceback (most recent call last):
685 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
686 | self.assertTrue(TYPES[type])
687 | AssertionError: False is not true
688 |
689 | ======================================================================
690 | FAIL: test manifest-validation#test224 csvt:NegativeValidationTest string datatype with minInclusive (__main__.CSVWValidationTestCases)
691 | ----------------------------------------------------------------------
692 | Traceback (most recent call last):
693 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
694 | self.assertTrue(TYPES[type])
695 | AssertionError: False is not true
696 |
697 | ======================================================================
698 | FAIL: test manifest-validation#test225 csvt:NegativeValidationTest string datatype with maxInclusive (__main__.CSVWValidationTestCases)
699 | ----------------------------------------------------------------------
700 | Traceback (most recent call last):
701 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
702 | self.assertTrue(TYPES[type])
703 | AssertionError: False is not true
704 |
705 | ======================================================================
706 | FAIL: test manifest-validation#test226 csvt:NegativeValidationTest string datatype with minExclusive (__main__.CSVWValidationTestCases)
707 | ----------------------------------------------------------------------
708 | Traceback (most recent call last):
709 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
710 | self.assertTrue(TYPES[type])
711 | AssertionError: False is not true
712 |
713 | ======================================================================
714 | FAIL: test manifest-validation#test227 csvt:NegativeValidationTest string datatype with maxExclusive (__main__.CSVWValidationTestCases)
715 | ----------------------------------------------------------------------
716 | Traceback (most recent call last):
717 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
718 | self.assertTrue(TYPES[type])
719 | AssertionError: False is not true
720 |
721 | ======================================================================
722 | FAIL: test manifest-validation#test230 csvt:NegativeValidationTest failing minLength with separator (__main__.CSVWValidationTestCases)
723 | ----------------------------------------------------------------------
724 | Traceback (most recent call last):
725 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
726 | self.assertTrue(TYPES[type])
727 | AssertionError: False is not true
728 |
729 | ======================================================================
730 | FAIL: test manifest-validation#test232 csvt:NegativeValidationTest single column primaryKey violation (__main__.CSVWValidationTestCases)
731 | ----------------------------------------------------------------------
732 | Traceback (most recent call last):
733 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
734 | self.assertTrue(TYPES[type])
735 | AssertionError: False is not true
736 |
737 | ======================================================================
738 | FAIL: test manifest-validation#test234 csvt:NegativeValidationTest multiple column primaryKey violation (__main__.CSVWValidationTestCases)
739 | ----------------------------------------------------------------------
740 | Traceback (most recent call last):
741 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
742 | self.assertTrue(TYPES[type])
743 | AssertionError: False is not true
744 |
745 | ======================================================================
746 | FAIL: test manifest-validation#test243 csvt:NegativeValidationTest invalid datatype @id (__main__.CSVWValidationTestCases)
747 | ----------------------------------------------------------------------
748 | Traceback (most recent call last):
749 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
750 | self.assertTrue(TYPES[type])
751 | AssertionError: False is not true
752 |
753 | ======================================================================
754 | FAIL: test manifest-validation#test244 csvt:NegativeValidationTest invalid datatype @id (__main__.CSVWValidationTestCases)
755 | ----------------------------------------------------------------------
756 | Traceback (most recent call last):
757 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
758 | self.assertTrue(TYPES[type])
759 | AssertionError: False is not true
760 |
761 | ======================================================================
762 | FAIL: test manifest-validation#test247 csvt:NegativeValidationTest date format (extra milliseconds) (__main__.CSVWValidationTestCases)
763 | ----------------------------------------------------------------------
764 | Traceback (most recent call last):
765 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
766 | self.assertTrue(TYPES[type])
767 | AssertionError: False is not true
768 |
769 | ======================================================================
770 | FAIL: test manifest-validation#test251 csvt:NegativeValidationTest missing source reference (__main__.CSVWValidationTestCases)
771 | ----------------------------------------------------------------------
772 | Traceback (most recent call last):
773 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
774 | self.assertTrue(TYPES[type])
775 | AssertionError: False is not true
776 |
777 | ======================================================================
778 | FAIL: test manifest-validation#test252 csvt:NegativeValidationTest missing destination reference column (__main__.CSVWValidationTestCases)
779 | ----------------------------------------------------------------------
780 | Traceback (most recent call last):
781 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
782 | self.assertTrue(TYPES[type])
783 | AssertionError: False is not true
784 |
785 | ======================================================================
786 | FAIL: test manifest-validation#test253 csvt:NegativeValidationTest missing destination table (__main__.CSVWValidationTestCases)
787 | ----------------------------------------------------------------------
788 | Traceback (most recent call last):
789 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
790 | self.assertTrue(TYPES[type])
791 | AssertionError: False is not true
792 |
793 | ======================================================================
794 | FAIL: test manifest-validation#test257 csvt:NegativeValidationTest foreign key no referenced row (__main__.CSVWValidationTestCases)
795 | ----------------------------------------------------------------------
796 | Traceback (most recent call last):
797 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
798 | self.assertTrue(TYPES[type])
799 | AssertionError: False is not true
800 |
801 | ======================================================================
802 | FAIL: test manifest-validation#test258 csvt:NegativeValidationTest foreign key multiple referenced rows (__main__.CSVWValidationTestCases)
803 | ----------------------------------------------------------------------
804 | Traceback (most recent call last):
805 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
806 | self.assertTrue(TYPES[type])
807 | AssertionError: False is not true
808 |
809 | ======================================================================
810 | FAIL: test manifest-validation#test261 csvt:NegativeValidationTest maxLength < minLength (__main__.CSVWValidationTestCases)
811 | ----------------------------------------------------------------------
812 | Traceback (most recent call last):
813 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
814 | self.assertTrue(TYPES[type])
815 | AssertionError: False is not true
816 |
817 | ======================================================================
818 | FAIL: test manifest-validation#test267 csvt:NegativeValidationTest @id on datatype is invalid (eg starts with _:) (__main__.CSVWValidationTestCases)
819 | ----------------------------------------------------------------------
820 | Traceback (most recent call last):
821 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
822 | self.assertTrue(TYPES[type])
823 | AssertionError: False is not true
824 |
825 | ======================================================================
826 | FAIL: test manifest-validation#test269 csvt:NegativeValidationTest `format` for a boolean datatype is a string but in the wrong form (eg YN) (__main__.CSVWValidationTestCases)
827 | ----------------------------------------------------------------------
828 | Traceback (most recent call last):
829 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
830 | self.assertTrue(TYPES[type])
831 | AssertionError: False is not true
832 |
833 | ======================================================================
834 | FAIL: test manifest-validation#test271 csvt:NegativeValidationTest foreign key includes an invalid property (eg `dc:description`) (__main__.CSVWValidationTestCases)
835 | ----------------------------------------------------------------------
836 | Traceback (most recent call last):
837 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
838 | self.assertTrue(TYPES[type])
839 | AssertionError: False is not true
840 |
841 | ======================================================================
842 | FAIL: test manifest-validation#test272 csvt:NegativeValidationTest foreign key reference includes an invalid property (eg `dc:description`) (__main__.CSVWValidationTestCases)
843 | ----------------------------------------------------------------------
844 | Traceback (most recent call last):
845 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
846 | self.assertTrue(TYPES[type])
847 | AssertionError: False is not true
848 |
849 | ======================================================================
850 | FAIL: test manifest-validation#test278 csvt:NegativeValidationTest CSV has more headers than there are columns in the metadata (__main__.CSVWValidationTestCases)
851 | ----------------------------------------------------------------------
852 | Traceback (most recent call last):
853 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
854 | self.assertTrue(TYPES[type])
855 | AssertionError: False is not true
856 |
857 | ======================================================================
858 | FAIL: test manifest-validation#test279 csvt:NegativeValidationTest duration not matching xsd pattern (__main__.CSVWValidationTestCases)
859 | ----------------------------------------------------------------------
860 | Traceback (most recent call last):
861 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
862 | self.assertTrue(TYPES[type])
863 | AssertionError: False is not true
864 |
865 | ======================================================================
866 | FAIL: test manifest-validation#test280 csvt:NegativeValidationTest dayTimeDuration not matching xsd pattern (__main__.CSVWValidationTestCases)
867 | ----------------------------------------------------------------------
868 | Traceback (most recent call last):
869 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
870 | self.assertTrue(TYPES[type])
871 | AssertionError: False is not true
872 |
873 | ======================================================================
874 | FAIL: test manifest-validation#test281 csvt:NegativeValidationTest yearMonthDuration not matching xsd pattern (__main__.CSVWValidationTestCases)
875 | ----------------------------------------------------------------------
876 | Traceback (most recent call last):
877 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
878 | self.assertTrue(TYPES[type])
879 | AssertionError: False is not true
880 |
881 | ======================================================================
882 | FAIL: test manifest-validation#test286 csvt:NegativeValidationTest invalid ##0 1,234 (__main__.CSVWValidationTestCases)
883 | ----------------------------------------------------------------------
884 | Traceback (most recent call last):
885 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
886 | self.assertTrue(TYPES[type])
887 | AssertionError: False is not true
888 |
889 | ======================================================================
890 | FAIL: test manifest-validation#test287 csvt:NegativeValidationTest invalid ##0 123.4 (__main__.CSVWValidationTestCases)
891 | ----------------------------------------------------------------------
892 | Traceback (most recent call last):
893 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
894 | self.assertTrue(TYPES[type])
895 | AssertionError: False is not true
896 |
897 | ======================================================================
898 | FAIL: test manifest-validation#test288 csvt:NegativeValidationTest invalid #,#00 1 (__main__.CSVWValidationTestCases)
899 | ----------------------------------------------------------------------
900 | Traceback (most recent call last):
901 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
902 | self.assertTrue(TYPES[type])
903 | AssertionError: False is not true
904 |
905 | ======================================================================
906 | FAIL: test manifest-validation#test289 csvt:NegativeValidationTest invalid #,#00 1234 (__main__.CSVWValidationTestCases)
907 | ----------------------------------------------------------------------
908 | Traceback (most recent call last):
909 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
910 | self.assertTrue(TYPES[type])
911 | AssertionError: False is not true
912 |
913 | ======================================================================
914 | FAIL: test manifest-validation#test290 csvt:NegativeValidationTest invalid #,#00 12,34 (__main__.CSVWValidationTestCases)
915 | ----------------------------------------------------------------------
916 | Traceback (most recent call last):
917 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
918 | self.assertTrue(TYPES[type])
919 | AssertionError: False is not true
920 |
921 | ======================================================================
922 | FAIL: test manifest-validation#test291 csvt:NegativeValidationTest invalid #,#00 12,34,567 (__main__.CSVWValidationTestCases)
923 | ----------------------------------------------------------------------
924 | Traceback (most recent call last):
925 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
926 | self.assertTrue(TYPES[type])
927 | AssertionError: False is not true
928 |
929 | ======================================================================
930 | FAIL: test manifest-validation#test292 csvt:NegativeValidationTest invalid #,##,#00 1 (__main__.CSVWValidationTestCases)
931 | ----------------------------------------------------------------------
932 | Traceback (most recent call last):
933 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
934 | self.assertTrue(TYPES[type])
935 | AssertionError: False is not true
936 |
937 | ======================================================================
938 | FAIL: test manifest-validation#test293 csvt:NegativeValidationTest invalid #,##,#00 1234 (__main__.CSVWValidationTestCases)
939 | ----------------------------------------------------------------------
940 | Traceback (most recent call last):
941 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
942 | self.assertTrue(TYPES[type])
943 | AssertionError: False is not true
944 |
945 | ======================================================================
946 | FAIL: test manifest-validation#test294 csvt:NegativeValidationTest invalid #,##,#00 12,34 (__main__.CSVWValidationTestCases)
947 | ----------------------------------------------------------------------
948 | Traceback (most recent call last):
949 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
950 | self.assertTrue(TYPES[type])
951 | AssertionError: False is not true
952 |
953 | ======================================================================
954 | FAIL: test manifest-validation#test295 csvt:NegativeValidationTest invalid #,##,#00 1,234,567 (__main__.CSVWValidationTestCases)
955 | ----------------------------------------------------------------------
956 | Traceback (most recent call last):
957 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
958 | self.assertTrue(TYPES[type])
959 | AssertionError: False is not true
960 |
961 | ======================================================================
962 | FAIL: test manifest-validation#test296 csvt:NegativeValidationTest invalid #0.# 12.34 (__main__.CSVWValidationTestCases)
963 | ----------------------------------------------------------------------
964 | Traceback (most recent call last):
965 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
966 | self.assertTrue(TYPES[type])
967 | AssertionError: False is not true
968 |
969 | ======================================================================
970 | FAIL: test manifest-validation#test297 csvt:NegativeValidationTest invalid #0.# 1,234.5 (__main__.CSVWValidationTestCases)
971 | ----------------------------------------------------------------------
972 | Traceback (most recent call last):
973 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
974 | self.assertTrue(TYPES[type])
975 | AssertionError: False is not true
976 |
977 | ======================================================================
978 | FAIL: test manifest-validation#test298 csvt:NegativeValidationTest invalid #0.0 1 (__main__.CSVWValidationTestCases)
979 | ----------------------------------------------------------------------
980 | Traceback (most recent call last):
981 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
982 | self.assertTrue(TYPES[type])
983 | AssertionError: False is not true
984 |
985 | ======================================================================
986 | FAIL: test manifest-validation#test299 csvt:NegativeValidationTest invalid #0.0 12.34 (__main__.CSVWValidationTestCases)
987 | ----------------------------------------------------------------------
988 | Traceback (most recent call last):
989 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
990 | self.assertTrue(TYPES[type])
991 | AssertionError: False is not true
992 |
993 | ======================================================================
994 | FAIL: test manifest-validation#test300 csvt:NegativeValidationTest invalid #0.0# 1 (__main__.CSVWValidationTestCases)
995 | ----------------------------------------------------------------------
996 | Traceback (most recent call last):
997 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
998 | self.assertTrue(TYPES[type])
999 | AssertionError: False is not true
1000 |
1001 | ======================================================================
1002 | FAIL: test manifest-validation#test301 csvt:NegativeValidationTest invalid #0.0# 12.345 (__main__.CSVWValidationTestCases)
1003 | ----------------------------------------------------------------------
1004 | Traceback (most recent call last):
1005 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
1006 | self.assertTrue(TYPES[type])
1007 | AssertionError: False is not true
1008 |
1009 | ======================================================================
1010 | FAIL: test manifest-validation#test302 csvt:NegativeValidationTest invalid #0.0#,# 1 (__main__.CSVWValidationTestCases)
1011 | ----------------------------------------------------------------------
1012 | Traceback (most recent call last):
1013 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
1014 | self.assertTrue(TYPES[type])
1015 | AssertionError: False is not true
1016 |
1017 | ======================================================================
1018 | FAIL: test manifest-validation#test303 csvt:NegativeValidationTest invalid #0.0#,# 12.345 (__main__.CSVWValidationTestCases)
1019 | ----------------------------------------------------------------------
1020 | Traceback (most recent call last):
1021 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
1022 | self.assertTrue(TYPES[type])
1023 | AssertionError: False is not true
1024 |
1025 | ======================================================================
1026 | FAIL: test manifest-validation#test304 csvt:NegativeValidationTest invalid #0.0#,# 12.34,567 (__main__.CSVWValidationTestCases)
1027 | ----------------------------------------------------------------------
1028 | Traceback (most recent call last):
1029 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test
1030 | self.assertTrue(TYPES[type])
1031 | AssertionError: False is not true
1032 |
1033 | ----------------------------------------------------------------------
1034 | Ran 276 tests in 139.484s
1035 |
1036 | FAILED (failures=123, errors=4)
1037 |
--------------------------------------------------------------------------------
/test/metadata_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import os
3 | parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
4 | os.sys.path.insert(0,parentdir)
5 | import pprint
6 | from pycsvw import metadata
7 | from pycsvw.metadata import Model
8 |
9 | __author__ = 'sebastian'
10 |
11 | import unittest
12 |
13 |
14 | class DanBrickleyCase(unittest.TestCase):
15 | @unittest.skip("@context appears to be not up-to-date")
16 | def test_dan_brickley(self):
17 | testfile = 'csvwparser/testdata/csvw-template/example.csv'
18 | metafile = 'csvwparser/testdata/csvw-template/example.csv-metadata.json'
19 | csvw = CSVW(path=testfile, metadata_path=metafile)
20 | self.assertNotEqual(csvw, None)
21 | self.assertNotEqual(csvw.metadata, None)
22 | title = csvw.metadata['dc:title']
23 | self.assertEqual(title, "My Spreadsheet")
24 | # TODO write tests
25 |
26 | def test_positive_context(self):
27 | A = {
28 | "@context": [ "http://www.w3.org/ns/csvw", { "@language": "en" } ],
29 | "tables": [{
30 | "url": "http://example.org/countries.csv",
31 | "tableSchema": {
32 | "columns": [{
33 | "name": "countryCode",
34 | "datatype": "string",
35 | "propertyUrl": "http://www.geonames.org/ontology{#_name}"
36 | }, {
37 | "name": "latitude",
38 | "datatype": "number"
39 | }, {
40 | "name": "longitude",
41 | "datatype": "number"
42 | }, {
43 | "name": "name",
44 | "datatype": "string"
45 | }],
46 | "aboutUrl": "http://example.org/countries.csv{#countryCode}",
47 | "propertyUrl": "http://schema.org/{_name}",
48 | "primaryKey": "countryCode"
49 | }
50 | }]
51 | }
52 | result = metadata.validate(A)
53 | self.assertTrue(isinstance(result, Model))
54 | # context is string only
55 | A = {
56 | "@context": "http://www.w3.org/ns/csvw",
57 | "tables": [{
58 | "url": "http://example.org/countries.csv",
59 | "tableSchema": {
60 | "columns": [{
61 | "name": "countryCode",
62 | "datatype": "string",
63 | "propertyUrl": "http://www.geonames.org/ontology{#_name}"
64 | }, {
65 | "name": "latitude",
66 | "datatype": "number"
67 | }, {
68 | "name": "longitude",
69 | "datatype": "number"
70 | }, {
71 | "name": "name",
72 | "datatype": "string"
73 | }],
74 | "aboutUrl": "http://example.org/countries.csv{#countryCode}",
75 | "propertyUrl": "http://schema.org/{_name}",
76 | "primaryKey": "countryCode"
77 | }
78 | }]
79 | }
80 | result = metadata.validate(A)
81 | self.assertTrue(isinstance(result, Model))
82 |
83 | def test_negative_context(self):
84 | # context is missing
85 | A = {
86 | "tables": [{
87 | "url": "http://example.org/countries.csv",
88 | "tableSchema": {
89 | "columns": [{
90 | "name": "countryCode",
91 | "datatype": "string",
92 | "propertyUrl": "http://www.geonames.org/ontology{#_name}"
93 | }, {
94 | "name": "latitude",
95 | "datatype": "number"
96 | }, {
97 | "name": "longitude",
98 | "datatype": "number"
99 | }, {
100 | "name": "name",
101 | "datatype": "string"
102 | }],
103 | "aboutUrl": "http://example.org/countries.csv{#countryCode}",
104 | "propertyUrl": "http://schema.org/{_name}",
105 | "primaryKey": "countryCode"
106 | }
107 | }]
108 | }
109 | result = metadata.validate(A)
110 | self.assertFalse(result)
111 | # wrong context
112 | A = {
113 | "@context": [ "http://www.w3.org/ns/csvw", { "somethingwrong": "en" } ],
114 | "tables": [{
115 | "url": "http://example.org/countries.csv",
116 | "tableSchema": {
117 | "columns": [{
118 | "name": "countryCode",
119 | "datatype": "string",
120 | "propertyUrl": "http://www.geonames.org/ontology{#_name}"
121 | }, {
122 | "name": "latitude",
123 | "datatype": "number"
124 | }, {
125 | "name": "longitude",
126 | "datatype": "number"
127 | }, {
128 | "name": "name",
129 | "datatype": "string"
130 | }],
131 | "aboutUrl": "http://example.org/countries.csv{#countryCode}",
132 | "propertyUrl": "http://schema.org/{_name}",
133 | "primaryKey": "countryCode"
134 | }
135 | }]
136 | }
137 | result = metadata.validate(A)
138 | self.assertFalse(result)
139 | # wrong context
140 | A = {
141 | "@context": "http://www.w3.org/ns/csv",
142 | "tables": [{
143 | "url": "http://example.org/countries.csv",
144 | "tableSchema": {
145 | "columns": [{
146 | "name": "countryCode",
147 | "datatype": "string",
148 | "propertyUrl": "http://www.geonames.org/ontology{#_name}"
149 | }, {
150 | "name": "latitude",
151 | "datatype": "number"
152 | }, {
153 | "name": "longitude",
154 | "datatype": "number"
155 | }, {
156 | "name": "name",
157 | "datatype": "string"
158 | }],
159 | "aboutUrl": "http://example.org/countries.csv{#countryCode}",
160 | "propertyUrl": "http://schema.org/{_name}",
161 | "primaryKey": "countryCode"
162 | }
163 | }]
164 | }
165 | result = metadata.validate(A)
166 | self.assertFalse(result)
167 |
168 | def test_negative_validate(self):
169 | # url is missing
170 | A = {
171 | "@context": [ "http://www.w3.org/ns/csvw", { "@language": "en" } ],
172 | "tables": [{
173 | # "url": "http://example.org/countries.csv",
174 | "tableSchema": {
175 | "columns": [{
176 | "name": "countryCode",
177 | "datatype": "string",
178 | "propertyUrl": "http://www.geonames.org/ontology{#_name}"
179 | }, {
180 | "name": "latitude",
181 | "datatype": "number"
182 | }, {
183 | "name": "longitude",
184 | "datatype": "number"
185 | }, {
186 | "name": "name",
187 | "datatype": "string"
188 | }],
189 | "aboutUrl": "http://example.org/countries.csv{#countryCode}",
190 | "propertyUrl": "http://schema.org/{_name}",
191 | "primaryKey": "countryCode"
192 | }
193 | }]
194 | }
195 | result = metadata.validate(A)
196 | self.assertFalse(result)
197 |
198 | def test_positive_validate(self):
199 | self.maxDiff = None
200 | A = {
201 | "@context": "http://www.w3.org/ns/csvw",
202 | "tables": [{
203 | "url": "http://example.org/countries.csv",
204 | "tableSchema": {
205 | "columns": [{
206 | "name": "countryCode",
207 | "datatype": "string",
208 | "propertyUrl": "http://www.geonames.org/ontology{#_name}"
209 | }, {
210 | "name": "latitude",
211 | "datatype": "number"
212 | }, {
213 | "name": "longitude",
214 | "datatype": "number"
215 | }, {
216 | "name": "name",
217 | "datatype": "string"
218 | }],
219 | "aboutUrl": "http://example.org/countries.csv{#countryCode}",
220 | "propertyUrl": "http://schema.org/{_name}",
221 | "primaryKey": "countryCode"
222 | }
223 | }, {
224 | "url": "http://example.org/country_slice.csv",
225 | "tableSchema": {
226 | "columns": [{
227 | "name": "countryRef",
228 | "valueUrl": "http://example.org/countries.csv{#countryRef}"
229 | }, {
230 | "name": "year",
231 | "datatype": "gYear"
232 | }, {
233 | "name": "population",
234 | "datatype": "integer"
235 | }],
236 | "foreignKeys": [{
237 | "columnReference": "countryRef",
238 | "reference": {
239 | "resource": "http://example.org/countries.csv",
240 | "columnReference": "countryCode"
241 | }
242 | }]
243 | }
244 | }]
245 | }
246 | result = metadata.validate(A)
247 | self.assertTrue(isinstance(result, Model))
248 | json_res = result.json()
249 | print json_res
250 | self.assertEqual(json_res, A)
251 |
252 | def test_normalize(self):
253 | self.maxDiff = None
254 | A = {
255 | "@context": [ "http://www.w3.org/ns/csvw", { "@language": "en" } ],
256 | "@type": "Table",
257 | "url": "http://example.com/table.csv",
258 | "dc:title": [
259 | "The title of this Table",
260 | {"@value": "Der Titel dieser Tabelle", "@language": "de"}
261 | ]
262 | }
263 | norm = {
264 | "@context": "http://www.w3.org/ns/csvw",
265 | "tables": [
266 | {
267 | "@type": "Table",
268 | "url": "http://example.com/table.csv",
269 | "dc:title": [
270 | {"@value": "The title of this Table", "@language": "en"},
271 | {"@value": "Der Titel dieser Tabelle", "@language": "de"}
272 | ]
273 | }
274 | ],
275 | }
276 | val = metadata.validate(A)
277 | #print val.json()
278 | #self.assertEqual(val.json(), A)
279 | val.normalize()
280 | json_res = val.json()
281 | print json_res
282 | self.assertEqual(json_res, norm)
283 |
284 | def test_normalize2(self):
285 | self.maxDiff = None
286 | A = {
287 | "@context": [ "http://www.w3.org/ns/csvw", { "@base": "http://example.com/" } ],
288 | "@type": "Table",
289 | "url": "table.csv",
290 | "schema:url": {"@id": "table.csv"}
291 | }
292 | norm = {
293 | "@context": "http://www.w3.org/ns/csvw",
294 | "tables": [
295 | {
296 | "@type": "Table",
297 | "url": "http://example.com/table.csv",
298 | "schema:url": {"@id": "http://example.com/table.csv"}
299 | }
300 | ]
301 | }
302 | val = metadata.validate(A)
303 | #print val.json()
304 | #self.assertEqual(val.json(), A)
305 | val.normalize()
306 | json_res = val.json()
307 | print json_res
308 | self.assertEqual(json_res, norm)
309 |
310 | def test_merge(self):
311 | self.maxDiff = None
312 | A = {
313 | "@context": ["http://www.w3.org/ns/csvw", {"@language": "en",
314 | "@base": "http://example.com/"}
315 | ],
316 | "tables": [{
317 | "url": "doc1.csv",
318 | "dc:title": "foo",
319 | "tableDirection": "ltr",
320 | "tableSchema": {
321 | "aboutUrl": "{#foo}",
322 | "columns": [{
323 | "name": "foo",
324 | "titles": "Foo",
325 | "required": True
326 | }, {
327 | "name": "bar"
328 | }]
329 | }
330 | }, {
331 | "url": "doc2.csv"
332 | }]
333 | }
334 |
335 | B = {
336 | "@context": "http://www.w3.org/ns/csvw",
337 | "url": "http://example.com/doc1.csv",
338 | "dc:description": "bar",
339 | "tableSchema": {
340 | "propertyUrl": "{#_name}",
341 | "columns": [{
342 | "titles": "Foo",
343 | "required": False
344 | }, {
345 | "name": "bar"
346 | }, {
347 | }]
348 | }
349 | }
350 |
351 | merged = {
352 | "@context": "http://www.w3.org/ns/csvw",
353 | "tables": [{
354 | "url": "http://example.com/doc1.csv",
355 | "dc:title": {"@value": "foo", "@language": "en"},
356 | "dc:description": {"@value": "bar"},
357 | "tableDirection": "ltr",
358 | "tableSchema": {
359 | "aboutUrl": "{#foo}",
360 | "propertyUrl": "{#_name}",
361 | "columns": [{
362 | "name": "foo",
363 | "titles": { "en": [ "Foo" ]},
364 | "required": True
365 | },{
366 | "name": "bar"
367 | }]
368 | }
369 | }, {
370 | "url": "http://example.com/doc2.csv"
371 | }]
372 | }
373 |
374 | # normalizing a
375 | #norm_a = metadata.normalize(A)
376 | #norm_b = metadata.normalize(B)
377 | #pprint.pprint(norm_a.json())
378 | #pprint.pprint(norm_b.json())
379 |
380 | result = metadata.merge([A, B])
381 | pprint.pprint(result.json())
382 | self.assertEqual(merged, result.json())
383 |
384 |
385 | if __name__ == '__main__':
386 | unittest.main()
387 |
--------------------------------------------------------------------------------
/test/validator_test.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import unittest
3 | import os
4 | parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
5 | os.sys.path.insert(0,parentdir)
6 | from pycsvw import validator
7 |
8 |
9 | class TestCsvValidator(unittest.TestCase):
10 |
11 | def test_validate_csv_pass(self):
12 | csvPath = os.path.join(parentdir, r"pycsvw/testdata/csvw-template/example.csv")
13 | schemaPath = os.path.join(parentdir, r"pycsvw/testdata/csvw-template/example.csv-metadata.json")
14 | (ret, error_message) = validator.validate_file(csvPath, schemaPath)
15 | self.assertTrue(ret)
16 |
17 | def test_validate_csv_column_missing(self):
18 | csvPath = os.path.join(parentdir, "pycsvw/testdata/tree-ops.csv")
19 | schemaPath = os.path.join(parentdir, r"pycsvw/testdata/test124-user-metadata.json")
20 | expectedResultPath = os.path.join(parentdir, r"pycsvw/testdata/validate-result-missing-column.txt")
21 |
22 | with open(expectedResultPath, 'r') as myfile:
23 | expectedResult=myfile.read()
24 |
25 | (ret, error_message) = validator.validate_file(csvPath, schemaPath)
26 | self.assertEqual(expectedResult.rstrip(), error_message.rstrip())
27 |
28 | def test_validate_csv_required_missing(self):
29 | csvPath = os.path.join(parentdir, "pycsvw/testdata/test125.csv")
30 | schemaPath = os.path.join(parentdir, r"pycsvw/testdata/test125-metadata.json")
31 | expectedResultPath = os.path.join(parentdir, r"pycsvw/testdata/validate-result-required-fail.txt")
32 |
33 | with open(expectedResultPath, 'r') as myfile:
34 | expectedResult=myfile.read()
35 |
36 | (ret, error_message) = validator.validate_file(csvPath, schemaPath)
37 | self.assertEqual(expectedResult.rstrip(), error_message.rstrip())
38 |
39 | def test_validate_csv_primary_key_fail(self):
40 | csvPath = os.path.join(parentdir, "pycsvw/testdata/test234.csv")
41 | schemaPath = os.path.join(parentdir, r"pycsvw/testdata/test234-metadata.json")
42 | expectedResultPath = os.path.join(parentdir, r"pycsvw/testdata/validate-result-primary-key-fail.txt")
43 |
44 | with open(expectedResultPath, 'r') as myfile:
45 | expectedResult=myfile.read()
46 |
47 | (ret, error_message) = validator.validate_file(csvPath, schemaPath)
48 | self.assertEqual(expectedResult.rstrip(), error_message.rstrip())
49 |
50 | if __name__ == '__main__':
51 | unittest.main()
52 |
--------------------------------------------------------------------------------