├── .gitignore ├── LICENSE.md ├── README.md ├── earl.ttl ├── pycsvw ├── __init__.py ├── built_in_datatypes.py ├── csv_parser.py ├── json_generator.py ├── logger.py ├── main.py ├── metadata.py ├── metadata_extractor.py ├── parser_exceptions.py ├── test.py ├── testdata │ ├── csvw-template │ │ ├── LICENSE │ │ ├── README.md │ │ ├── example.csv │ │ └── example.csv-metadata.json │ ├── test124-user-metadata.json │ ├── test125-metadata.json │ ├── test125.csv │ ├── test234-metadata.json │ ├── test234.csv │ ├── tree-ops.csv │ ├── validate-result-missing-column.txt │ ├── validate-result-primary-key-fail.txt │ └── validate-result-required-fail.txt └── validator.py ├── setup.py └── test ├── __init__.py ├── csvw_implementation_report.py ├── csvw_json_test_cases.py ├── csvw_validation_single_test.py ├── csvw_validation_test_cases.py ├── doap.ttl ├── errors_10-12-15.txt ├── metadata_test.py └── validator_test.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.DS_Store 3 | */_build/* 4 | *.py~ 5 | *.~lock.*# 6 | .idea/ 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | env/ 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *,cover 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | 60 | # Sphinx documentation 61 | docs/_build/ 62 | 63 | # PyBuilder 64 | target/ 65 | 66 | #visual studio project 67 | *.pyproj 68 | *.suo 69 | *.sln 70 | *.pyproj.user 71 | .vs/ -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Sebastian Neumaier 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pycsvw 2 | 3 | Python implementation of the W3C CSV on the Web specification, cf. http://w3c.github.io/csvw/ 4 | 5 | 6 | ## Authors 7 | 8 | - Sebastian Neumaier 9 | - Jürgen Umbrich 10 | - Mao Li 11 | -------------------------------------------------------------------------------- /pycsvw/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'sebastian' 2 | 3 | from main import CSVW -------------------------------------------------------------------------------- /pycsvw/built_in_datatypes.py: -------------------------------------------------------------------------------- 1 | __author__ = 'sebastian' 2 | 3 | from rdflib.namespace import XSD 4 | from rdflib.namespace import RDF 5 | from rdflib import Namespace 6 | 7 | CSVW = Namespace("http://www.w3.org/ns/csvw#") 8 | 9 | 10 | # Valid datatypes 11 | DATATYPES = { 12 | 'anyAtomicType': XSD.anyAtomicType, 13 | 'anyURI': XSD.anyURI, 14 | 'base64Binary': XSD.basee65Binary, 15 | 'boolean': XSD.boolean, 16 | 'byte': XSD.byte, 17 | 'date': XSD.date, 18 | 'dateTime': XSD.dateTime, 19 | 'dayTimeDuration': XSD.dayTimeDuration, 20 | 'dateTimeStamp': XSD.dateTimeStamp, 21 | 'decimal': XSD.decimal, 22 | 'double': XSD.double, 23 | 'duration': XSD.duration, 24 | 'float': XSD.float, 25 | 'ENTITY': XSD.ENTITY, 26 | 'gDay': XSD.gDay, 27 | 'gMonth': XSD.gMonth, 28 | 'gMonthDay': XSD.gMonthDay, 29 | 'gYear': XSD.gYear, 30 | 'gYearMonth': XSD.gYearMonth, 31 | 'hexBinary': XSD.hexBinary, 32 | 'int': XSD.int, 33 | 'integer': XSD.integer, 34 | 'language': XSD.language, 35 | 'long': XSD.long, 36 | 'Name': XSD.Name, 37 | 'NCName': XSD.NCName, 38 | 'negativeInteger': XSD.negativeInteger, 39 | 'NMTOKEN': XSD.NMTOKEN, 40 | 'nonNegativeInteger': XSD.nonNegativeInteger, 41 | 'nonPositiveInteger': XSD.nonPositiveInteger, 42 | 'normalizedString': XSD.normalizedString, 43 | 'NOTATION': XSD.NOTATION, 44 | 'positiveInteger': XSD.positiveInteger, 45 | 'QName': XSD.Qname, 46 | 'short': XSD.short, 47 | 'string': XSD.string, 48 | 'time': XSD.time, 49 | 'token': XSD.token, 50 | 'unsignedByte': XSD.unsignedByte, 51 | 'unsignedInt': XSD.unsignedInt, 52 | 'unsignedLong': XSD.unsignedLong, 53 | 'unsignedShort': XSD.unsignedShort, 54 | 'yearMonthDuration': XSD.yearMonthDuration, 55 | 56 | 'any': XSD.anyAtomicType, 57 | 'binary': XSD.base64Binary, 58 | 'datetime': XSD.dateTime, 59 | 'html': RDF.HTML, 60 | 'json': CSVW.JSON, 61 | 'number': XSD.double, 62 | 'xml': RDF.XMLLiteral, 63 | } 64 | 65 | 66 | def is_built_in_datatype(value): 67 | return value in DATATYPES 68 | -------------------------------------------------------------------------------- /pycsvw/csv_parser.py: -------------------------------------------------------------------------------- 1 | from parser_exceptions import ParserException 2 | 3 | __author__ = 'neumaier' 4 | 5 | ''' 6 | Settings: 7 | comment prefix 8 | A character that, when it appears at the beginning of a row, indicates that the row is a comment that should be associated as a rdfs:comment annotation to the table. This is set by the commentPrefix property of a dialect description. The default is null, which means no rows are treated as comments. A value other than null may mean that the source numbers of rows are different from their numbers. 9 | delimiter 10 | The separator between cells, set by the delimiter property of a dialect description. The default is ,. 11 | encoding 12 | The character encoding for the file, one of the encodings listed in [encoding], set by the encoding property of a dialect description. The default is utf-8. 13 | escape character 14 | The character that is used to escape the quote character within escaped cells, or null, set by the doubleQuote property of a dialect description. The default is " (such that "" is used to escape " within an escaped cell). 15 | header row count 16 | The number of header rows (following the skipped rows) in the file, set by the header or headerRowCount property of a dialect description. The default is 1. A value other than 0 will mean that the source numbers of rows will be different from their numbers. 17 | line terminators 18 | The characters that can be used at the end of a row, set by the lineTerminators property of a dialect description. The default is [CRLF, LF]. 19 | quote character 20 | The character that is used around escaped cells, or null, set by the quoteChar property of a dialect description. The default is ". 21 | skip blank rows 22 | Indicates whether to ignore wholly empty rows (ie rows in which all the cells are empty), set by the skipBlankRows property of a dialect description. The default is false. A value other than false may mean that the source numbers of rows are different from their numbers. 23 | skip columns 24 | The number of columns to skip at the beginning of each row, set by the skipColumns property of a dialect description. The default is 0. A value other than 0 will mean that the source numbers of columns will be different from their numbers. 25 | skip rows 26 | The number of rows to skip at the beginning of the file, before a header row or tabular data, set by the skipRows property of a dialect description. The default is 0. A value greater than 0 will mean that the source numbers of rows will be different from their numbers. 27 | trim 28 | Indicates whether to trim whitespace around cells; may be true, false, start, or end, set by the skipInitialSpace or trim property of a dialect description. The default is false. 29 | ''' 30 | SETTINGS = { 31 | 'comment prefix': None, 32 | 'delimiter': ',', 33 | 'encoding': 'utf-8', 34 | 'escape character': '"', 35 | 'header row count': 1, 36 | 'line terminators': ['CRLF', 'LF'], 37 | 'quote character': '"', 38 | 'skip blank rows': False, 39 | 'skip columns': 0, 40 | 'skip rows': 0, 41 | 'trim': False 42 | } 43 | 44 | 45 | def parse_row(row, settings): 46 | # http://www.w3.org/TR/2015/WD-tabular-data-model-20150416/#dfn-parse-a-row 47 | 48 | cell_values = [] 49 | current_cell_value = '' 50 | quoted = False 51 | 52 | for i, char in enumerate(row): 53 | if char == settings['escape character'] and i + 1 < len(row) and row[i + 1] == settings['quote character']: 54 | current_cell_value += settings['quote character'] 55 | elif char == settings['escape character'] and settings['escape character'] != settings['quote character'] and i + 1 < len(row): 56 | current_cell_value += row[i + 1] 57 | elif char == settings['quote character']: 58 | if not quoted: 59 | quoted = True 60 | if current_cell_value: 61 | raise ParserException('Quotation error: ' + row) 62 | else: 63 | quoted = False 64 | if i + 1 < len(row) and row[i + 1] != settings['delimiter']: 65 | raise ParserException('Quotation error: ' + row) 66 | elif char == settings['delimiter']: 67 | if quoted: 68 | current_cell_value += settings['delimiter'] 69 | else: 70 | cell_values.append(current_cell_value.strip()) 71 | current_cell_value = '' 72 | else: 73 | current_cell_value += char 74 | 75 | cell_values.append(current_cell_value.strip()) 76 | return cell_values 77 | 78 | 79 | def parse(handle, url, settings=SETTINGS): 80 | # http://w3c.github.io/csvw/syntax/index.html#parsing 81 | rows = handle.read().splitlines() 82 | 83 | T = Table(url) 84 | M = { 85 | "@context": "http://www.w3.org/ns/csvw", 86 | "rdfs:comment": [], 87 | "tableSchema": { 88 | "columns": [] 89 | } 90 | } 91 | if url: 92 | M['url'] = url 93 | else: 94 | M['url'] = 'unspecified' 95 | 96 | source_row_number = 1 97 | 98 | i = 0 99 | # Repeat the following the number of times indicated by skip rows 100 | for i in xrange(len(rows)): 101 | row = rows[i] 102 | if i >= settings['skip rows']: 103 | break 104 | if settings['comment prefix']: 105 | if row.startswith(settings['comment prefix']): 106 | com = row.strip(settings['comment prefix']).strip() 107 | M['rdfs:comment'].append(com) 108 | elif row.strip(): 109 | M['rdfs:comment'].append(row) 110 | source_row_number += 1 111 | 112 | j = i 113 | # Repeat the following the number of times indicated by header row count 114 | for j in xrange(i, len(rows)): 115 | row = rows[j] 116 | if j >= settings['header row count']: 117 | break 118 | if settings['comment prefix']: 119 | if row.startswith(settings['comment prefix']): 120 | com = row.strip(settings['comment prefix']).strip() 121 | M['rdfs:comment'].append(com) 122 | else: 123 | # Otherwise, parse the row to provide a list of cell values 124 | cells = parse_row(row, settings) 125 | # Remove the first skip columns number of values from the list of cell values 126 | cells = cells[settings['skip columns']:] 127 | if len(M['tableSchema']['columns']) == 0: 128 | M['tableSchema']['columns'] = [{'titles': []} for _ in range(len(cells))] 129 | for cell_i, v in enumerate(cells): 130 | value = v.strip() 131 | if value == '': 132 | pass 133 | else: 134 | M['tableSchema']['columns'][cell_i]['titles'].append(value) 135 | source_row_number += 1 136 | 137 | row_number = 1 138 | for k in xrange(j, len(rows)): 139 | row = rows[k] 140 | source_column_number = 1 141 | if settings['comment prefix']: 142 | if row.startswith(settings['comment prefix']): 143 | com = row.strip(settings['comment prefix']).strip() 144 | M['rdfs:comment'].append(com) 145 | else: 146 | cells = parse_row(row, settings) 147 | if settings['skip blank rows'] and len(cells) == len([_ for v in cells if v == '']): 148 | pass 149 | else: 150 | R = Row(table=T, number=row_number, source_number=row_number) 151 | T.rows.append(R) 152 | # Remove the first skip columns number of values from the list of cell values 153 | cells = cells[settings['skip columns']:] 154 | source_column_number += settings['skip columns'] 155 | # For each of the remaining values at index i in the list of cell values (where i starts at 1) 156 | for index, value in enumerate(cells): 157 | i = index + 1 158 | # Identify the column C at index i within the columns of table T. If there is no such column 159 | if len(T.columns) < i: 160 | C = Column(table=T, number=i, source_number=source_column_number) 161 | T.columns.append(C) 162 | else: 163 | C = T.columns[index] 164 | D = Cell(value=value, table=T, column=C, row=R) 165 | C.cells.append(D) 166 | R.cells.append(D) 167 | source_column_number += 1 168 | 169 | source_row_number += 1 170 | row_number += 1 171 | # If M.rdfs:comment is an empty array, remove the rdfs:comment property from M 172 | if not M['rdfs:comment']: 173 | M.pop('rdfs:comment') 174 | 175 | # Return the table T and the embedded metadata M 176 | return T, M 177 | 178 | 179 | class Cell: 180 | def __init__(self, value, table, column, row): 181 | self.table = table 182 | self.column = column 183 | self.row = row 184 | self.string_value = value 185 | self.value = value 186 | self.errors = [] 187 | self.text_direction = 'ltr' 188 | self.about_url = None 189 | self.property_url = None 190 | self.value_url = None 191 | 192 | def __repr__(self): 193 | return 'Cell (' + str(self.row) + ' ' + str(self.column) + ')' 194 | 195 | 196 | class Column: 197 | def __init__(self, table, number, source_number): 198 | self.table = table 199 | self.number = number 200 | self.source_number = source_number 201 | self.name = None 202 | self.titles = [] 203 | self.datatype = basestring 204 | self.virtual = False 205 | self.suppress_output = False 206 | self.cells = [] 207 | 208 | def __repr__(self): 209 | return 'Column ' + str(self.number) 210 | 211 | 212 | class Row: 213 | def __init__(self, table, number, source_number): 214 | self.table = table 215 | self.number = number 216 | self.source_number = source_number 217 | self.primary_key = [] 218 | self.referenced_rows = [] 219 | self.cells = [] 220 | 221 | def __repr__(self): 222 | return 'Row ' + str(self.number) 223 | 224 | class Table: 225 | def __init__(self, url): 226 | self.columns = [] 227 | self.rows = [] 228 | self.id = None 229 | self.url = url 230 | self.table_direction = 'auto' 231 | self.suppress_output = False 232 | self.notes = False 233 | self.foreign_keys = [] 234 | self.transformations = [] 235 | -------------------------------------------------------------------------------- /pycsvw/json_generator.py: -------------------------------------------------------------------------------- 1 | from uritemplate import expand 2 | 3 | 4 | def generate_object(row, metadata): 5 | obj = {} 6 | 7 | 8 | 9 | 10 | def minimal_mode(table, metadata): 11 | A = [] 12 | if not metadata.get('suppressOutput', False): 13 | for row in table.rows: 14 | obj = generate_object(row, metadata) 15 | 16 | -------------------------------------------------------------------------------- /pycsvw/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | logging.basicConfig(level=logging.WARNING, 4 | format='%(asctime)s %(levelname)s %(message)s') 5 | logger = logging.getLogger(__name__) 6 | 7 | 8 | def _format_message(line, message, args): 9 | if line: 10 | message = str(line) + ': ' + message 11 | for arg in args: 12 | try: 13 | message += str(arg) + ',' 14 | except Exception: 15 | pass 16 | return message 17 | 18 | 19 | def warning(line, message, *args): 20 | logger.warning(_format_message(line, message, args)) 21 | 22 | 23 | def error(line, message, *args): 24 | logger.error(_format_message(line, message, args)) 25 | 26 | 27 | def debug(line, message, *args): 28 | logger.debug(_format_message(line, message, args)) 29 | 30 | 31 | def info(line, message, *args): 32 | logger.info(_format_message(line, message, *args)) 33 | -------------------------------------------------------------------------------- /pycsvw/main.py: -------------------------------------------------------------------------------- 1 | from StringIO import StringIO 2 | import urllib2 3 | import logging 4 | import csv_parser 5 | import metadata 6 | import json_generator 7 | import metadata_extractor 8 | 9 | 10 | __author__ = 'sebastian' 11 | 12 | logging.basicConfig() 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | class CSVW: 17 | def __init__(self, url=None, path=None, handle=None, metadata_url=None, metadata_path=None, metadata_handle=None, date_parsing=False): 18 | # http://www.w3.org/TR/2015/WD-tabular-data-model-20150416/#processing-tables 19 | if handle: 20 | logger.warning('"handle" is used only for testing purposes') 21 | name = None 22 | elif url: 23 | url_resp = urllib2.urlopen(url) 24 | handle = StringIO(url_resp.read()) 25 | name = url 26 | elif path: 27 | handle = open(path, 'rb') 28 | name = path 29 | elif path and url: 30 | raise ValueError("only one argument of url and path allowed") 31 | else: 32 | raise ValueError("url or path argument required") 33 | 34 | # metadata_handle = None 35 | if metadata_path and metadata_url: 36 | raise ValueError("only one argument of metadata_url and metadata_path allowed") 37 | elif metadata_handle: 38 | logger.warning('"metadata_handle" is used only for testing purposes') 39 | elif metadata_url: 40 | meta_resp = urllib2.urlopen(metadata_url) 41 | metadata_handle = StringIO(meta_resp.read()) 42 | elif metadata_path: 43 | metadata_handle = open(metadata_path, 'rb') 44 | 45 | # Retrieve the tabular data file. 46 | self.table, embedded_metadata = csv_parser.parse(handle, url) 47 | 48 | # TODO create settings using arguments or provided metadata 49 | sources = metadata_extractor.metadata_extraction(url, metadata_handle, embedded_metadata=embedded_metadata) 50 | self.metadata = metadata.merge(sources) 51 | 52 | def to_rdf(self): 53 | pass 54 | 55 | def to_json(self): 56 | # TODO group of tables? 57 | json_generator.minimal_mode(self.table, self.metadata.json()['tables'][0]) 58 | -------------------------------------------------------------------------------- /pycsvw/metadata.py: -------------------------------------------------------------------------------- 1 | import re 2 | import built_in_datatypes 3 | from parser_exceptions import ValidationException 4 | import logger 5 | import urlparse 6 | import language_tags 7 | 8 | 9 | def is_absolute(url): 10 | return bool(urlparse.urlparse(url).netloc) 11 | 12 | 13 | def is_common_property(prop): 14 | return re.match('^[a-zA-Z]*:[a-zA-Z]*$', prop) 15 | 16 | class Enumeration(object): 17 | def __init__(self, *names): 18 | for name in names: 19 | setattr(self, name, hash(name)) 20 | 21 | def __contains__(self, hash): 22 | return hash in self.__dict__.values() 23 | 24 | 25 | Option = Enumeration('Required', 'NonEmpty') 26 | Commands = Enumeration('Remove', 'Error') 27 | 28 | #class Option: 29 | # Required, NonEmpty = range(2) 30 | 31 | #class Commands: 32 | # Remove = 'REMOVE' 33 | # Error = 'ERROR' 34 | 35 | class MetaObject: 36 | def evaluate(self, meta, params, default=None, line=None): 37 | return False 38 | 39 | 40 | class Property(MetaObject): 41 | def __init__(self): 42 | self.value = None 43 | 44 | def normalize(self, params): 45 | pass 46 | 47 | def merge(self, obj): 48 | # TODO 49 | pass 50 | # print self.value 51 | 52 | def json(self): 53 | return self.value 54 | 55 | 56 | class Uri(Property): 57 | def evaluate(self, meta, params, default=None, line=None): 58 | # TODO 59 | logger.debug(line, 'URI property: ', meta) 60 | result = Uri() 61 | result.value = meta 62 | return result 63 | 64 | 65 | class ColumnReference(Property): 66 | 67 | def evaluate(self, meta, params, default=None, line=None): 68 | result = ColumnReference() 69 | if isinstance(meta, basestring): 70 | # TODO must match the name on a column description object 71 | logger.debug(line, 'Column Reference property: ', meta) 72 | result.value = meta 73 | return result 74 | elif isinstance(meta, list): 75 | if not meta: 76 | logger.warning(line, 'the supplied value is an empty array') 77 | return result 78 | for m in meta: 79 | if isinstance(m, basestring): 80 | # TODO must match the name on a column description object 81 | pass 82 | else: 83 | logger.warning(line, 'the values in the supplied array are not strings: ', meta) 84 | result.value = meta 85 | return result 86 | 87 | # TODO must match the name on a column description object 88 | logger.debug(line, 'Column Reference property: ', meta) 89 | result.value = meta 90 | return result 91 | else: 92 | logger.warning(line, 'the supplied value is not a string or array: ', meta) 93 | return result 94 | 95 | 96 | class NaturalLanguage(Property): 97 | 98 | def evaluate(self, meta, params, default=None, line=None): 99 | # strings 100 | # arrays 101 | # objects 102 | # TODO 103 | if isinstance(meta, dict): 104 | for k in meta: 105 | if not language_tags.tags.check(k): 106 | logger.error(line, 'Natural language properties MUST be language codes as defined by [BCP47]: ', meta) 107 | return False 108 | logger.debug(line, 'Natural language property: ', meta) 109 | result = NaturalLanguage() 110 | result.value = meta 111 | return result 112 | 113 | def normalize(self, params): 114 | if isinstance(self.value, list): 115 | if 'default_language' in params: 116 | value = {params['default_language']: list(self.value)} 117 | else: 118 | value = {'und': list(self.value)} 119 | self.value = value 120 | else: 121 | self.value = self._normalize(self.value, params) 122 | 123 | def _normalize(self, value, params): 124 | if isinstance(value, basestring): 125 | value = [value] 126 | if 'default_language' in params: 127 | value = {params['default_language']: value} 128 | else: 129 | value = {'und': value} 130 | return value 131 | 132 | def merge(self, obj): 133 | for k in obj.value: 134 | # k is a language code of B 135 | for v in obj.value[k]: 136 | # values from A followed by those from B that were not already a value in A 137 | if k in self.value and v not in self.value[k]: 138 | self.value[k].append(v) 139 | 140 | # 141 | if 'und' in self.value and v in self.value['und']: 142 | self.value['und'].remove(v) 143 | if k not in self.value: 144 | self.value[k] = [] 145 | self.value[k].append(v) 146 | if len(self.value['und']) == 0: 147 | self.value.pop('und') 148 | 149 | class NumberPattern(Property): 150 | def evaluate(self, meta, params, default=None, line=None): 151 | result = NumberPattern() 152 | if isinstance(meta, basestring): 153 | logger.debug(line, 'Number pattern property: ', meta) 154 | result.value = meta 155 | return result 156 | else: 157 | # issue a warning 158 | logger.warning(line, 'value of number pattern property is not a string: ', meta) 159 | return result 160 | 161 | 162 | class Link(Property): 163 | def __init__(self, link_type): 164 | Property.__init__(self) 165 | self.link_type = link_type 166 | 167 | def evaluate(self, meta, params, default=None, line=None): 168 | result = Link(self.link_type) 169 | if isinstance(meta, basestring): 170 | if self.link_type == '@id': 171 | # @id must not start with _: 172 | if meta.startswith('_:'): 173 | err_msg = '@id must not start with _:' 174 | logger.error(line, err_msg) 175 | return Commands.Error 176 | logger.debug(line, 'Link property: ', self.link_type, meta) 177 | result.value = meta 178 | return result 179 | else: 180 | # issue a warning 181 | logger.warning(line, 'value of link property is not a string: ', meta) 182 | return result 183 | 184 | def normalize(self, params): 185 | # turn into absolute url using base url 186 | if self.value and not is_absolute(self.value) and 'base_url' in params: 187 | self.value = urlparse.urljoin(params['base_url'], self.value) 188 | 189 | 190 | class Array(Property): 191 | def __init__(self, arg, warning_only=False): 192 | Property.__init__(self) 193 | self.arg = arg 194 | self.warning_only = warning_only 195 | 196 | def evaluate(self, meta, params, default=None, line=None): 197 | result = Array(self.arg) 198 | if isinstance(meta, list): 199 | # if the arg is a operator, it should take a list as argument 200 | result.value = self.arg.evaluate(meta, params, line) 201 | if result.value in Commands: 202 | return result.value 203 | if result.value: 204 | return result 205 | # error while parsing 206 | if self.warning_only: 207 | result.value = {} 208 | return result 209 | else: 210 | # the meta obj should be a list 211 | return False 212 | 213 | def normalize(self, params): 214 | for v in self.value: 215 | v.normalize(params) 216 | 217 | def merge(self, obj): 218 | if isinstance(obj.value, list): 219 | # TODO maybe wrong?? 220 | for i, v in enumerate(obj.value): 221 | if len(self.value) > i: 222 | self.value[i].merge(v) 223 | 224 | def json(self): 225 | return [v.json() for v in self.value] 226 | 227 | 228 | class Common(Property): 229 | def __init__(self, prop): 230 | Property.__init__(self) 231 | self.prop = prop 232 | 233 | def evaluate(self, meta, params, default=None, line=None): 234 | # TODO http://www.w3.org/TR/2015/WD-tabular-metadata-20150416/#h-values-of-common-properties 235 | logger.debug(line, 'CommonProperty: ', self.prop) 236 | result = Common(self.prop) 237 | result.value = meta 238 | return result 239 | 240 | def normalize(self, params): 241 | self.value = self._normalize(self.value, params) 242 | 243 | def _normalize(self, value, params): 244 | if isinstance(value, list): 245 | norm_list = [] 246 | for v in value: 247 | norm_list.append(self._normalize(v, params)) 248 | value = norm_list 249 | elif isinstance(value, dict) and '@value' in value: 250 | pass 251 | elif isinstance(value, dict): 252 | for k in value: 253 | if k == '@id': 254 | if not is_absolute(value[k]) and 'base_url' in params: 255 | value[k] = urlparse.urljoin(params['base_url'], value[k]) 256 | elif k == '@type': 257 | pass 258 | else: 259 | value[k] = self._normalize(value[k], params) 260 | elif isinstance(value, basestring): 261 | value = {'@value': value} 262 | if 'default_language' in params: 263 | value['@language'] = params['default_language'] 264 | return value 265 | 266 | 267 | class Base(Property): 268 | def evaluate(self, meta, params, default=None, line=None): 269 | if '@base' in meta: 270 | result = Base() 271 | result.value = meta['@base'] 272 | params['base_url'] = result.value 273 | return result 274 | else: 275 | return False 276 | 277 | def json(self): 278 | return {'@base': self.value} 279 | 280 | 281 | class Language(Property): 282 | def evaluate(self, meta, params, default=None, line=None): 283 | if '@language' in meta: 284 | result = Language() 285 | result.value = meta['@language'] 286 | params['default_language'] = result.value 287 | return result 288 | else: 289 | return False 290 | 291 | def json(self): 292 | return {'@language': self.value} 293 | 294 | 295 | class Atomic(Property): 296 | def __init__(self, arg): 297 | Property.__init__(self) 298 | self.arg = arg 299 | 300 | def evaluate(self, meta, params, default=None, line=None): 301 | result = Atomic(self.arg) 302 | if isinstance(self.arg, MetaObject): 303 | # a predefined type or an operator 304 | 305 | result.value = self.arg.evaluate(meta, params, default, line) 306 | if result.value == Commands.Remove: 307 | return Commands.Remove 308 | elif result.value != None: 309 | return result 310 | else: 311 | # numbers, interpreted as integers or doubles 312 | # booleans, interpreted as booleans (true or false) 313 | # strings, interpreted as defined by the property 314 | # objects, interpreted as defined by the property 315 | # arrays, lists of numbers, booleans, strings, or objects 316 | # TODO 317 | if meta == self.arg: 318 | result.value = meta 319 | return result 320 | return None 321 | 322 | def json(self): 323 | if isinstance(self.value, MetaObject): 324 | return self.value.json() 325 | else: 326 | return self.value 327 | 328 | 329 | class Object(Property): 330 | def __init__(self, dict_obj, inherited_obj=None, common_properties=False, warning_only=False): 331 | Property.__init__(self) 332 | self.dict_obj = dict_obj 333 | self.inherited_obj = inherited_obj 334 | self.common_properties = common_properties 335 | self.warning_only = warning_only 336 | 337 | def evaluate(self, meta, params, default=None, line=None): 338 | result = Object(self.dict_obj, self.inherited_obj, self.common_properties) 339 | if isinstance(self.dict_obj, dict) and isinstance(meta, dict): 340 | if self.inherited_obj: 341 | self.dict_obj = self.dict_obj.copy() 342 | self.dict_obj.update(self.inherited_obj) 343 | # arg is a new schema to validate the metadata 344 | result.value = _validate(line, meta, params, self.dict_obj, self.common_properties) 345 | if result.value is not False: 346 | if result.value not in Commands: 347 | return result 348 | 349 | # logger.error(line, 'object property is not a dictionary: ' + str(meta)) 350 | if result.value in Commands: 351 | return result.value 352 | elif self.warning_only: 353 | logger.warning(line, 'The value of an object property is not a string or object.' 354 | ' An object with no properties is returned.', meta) 355 | result.value = {} 356 | return result 357 | else: 358 | return False 359 | 360 | def normalize(self, params): 361 | for prop in self.value: 362 | self.value[prop].normalize(params) 363 | 364 | if '@context' in self.value: 365 | p = Atomic('http://www.w3.org/ns/csvw') 366 | p.value = 'http://www.w3.org/ns/csvw' 367 | self.value['@context'] = p 368 | 369 | def merge(self, obj): 370 | for k in obj.value: 371 | if k in self.value: 372 | self.value[k].merge(obj.value[k]) 373 | else: 374 | # if property not in A, just add it 375 | self.value[k] = obj.value[k] 376 | 377 | def json(self): 378 | return {k: self.value[k].json() for k in self.value} 379 | 380 | 381 | class Operator(MetaObject): 382 | pass 383 | 384 | 385 | class BoolOperator(Operator): 386 | pass 387 | 388 | 389 | class IsBuiltinDatatype(Operator): 390 | def __init__(self, warning_only=False): 391 | self.warning_only = warning_only 392 | 393 | def evaluate(self, meta, params, default=None, line=None): 394 | if isinstance(meta, basestring) and built_in_datatypes.is_built_in_datatype(meta): 395 | return meta 396 | elif isinstance(meta, basestring) and self.warning_only: 397 | logger.warning(line, 'Value is not a built in datatype: ', meta) 398 | return Commands.Remove 399 | else: 400 | #TODO logger.error(line, 'Value is not a built in datatype: ', meta) 401 | return False 402 | 403 | 404 | class OfType(Operator): 405 | def __init__(self, base_type, warning_only=False): 406 | self.base_type = base_type 407 | self.warning_only = warning_only 408 | 409 | def evaluate(self, meta, params, default=None, line=None): 410 | if isinstance(meta, self.base_type): 411 | return meta 412 | elif self.warning_only: 413 | logger.warning(line, 'Value (1) has to be of type (2): ', meta, self.base_type) 414 | return Commands.Remove 415 | else: 416 | return False 417 | 418 | 419 | class AllDiff(BoolOperator): 420 | def __init__(self, arg): 421 | self.arg = arg 422 | 423 | def evaluate(self, meta_list, params, default=None, line=None): 424 | values = [] 425 | for meta in meta_list: 426 | v = None 427 | if isinstance(meta, dict): 428 | if self.arg in meta: 429 | v = meta[self.arg] 430 | if isinstance(meta, basestring): 431 | v = meta 432 | if v and v in values: 433 | return False 434 | values.append(v) 435 | return True 436 | 437 | 438 | class Or(Operator): 439 | def __init__(self, *values): 440 | self.values = list(values) 441 | 442 | def evaluate(self, meta, params, warning_only=False, default=None, line=None): 443 | props = [] 444 | for v in self.values: 445 | prop = False 446 | if isinstance(v, BoolOperator) and not v.evaluate(meta, params, default, line): 447 | return False 448 | elif isinstance(v, MetaObject): 449 | prop = v.evaluate(meta, params, default, line) 450 | elif v == meta: 451 | prop = Atomic(v) 452 | prop.value = v 453 | if prop == Commands.Remove: 454 | return prop 455 | if prop: 456 | if isinstance(prop, list): 457 | props += prop 458 | else: 459 | props.append(prop) 460 | 461 | if not props and warning_only: 462 | logger.warning(line, 'Value is not allowed', meta) 463 | if default: 464 | props = [default] 465 | else: 466 | props = [Commands.Remove] 467 | 468 | # two types of or: on a list or a value 469 | if not isinstance(meta, list) and len(props) == 1: 470 | return props[0] 471 | return props 472 | 473 | 474 | class And(Operator): 475 | def __init__(self, *values): 476 | self.values = list(values) 477 | 478 | def evaluate(self, meta, params, default=None, line=None): 479 | props = [] 480 | for v in self.values: 481 | if isinstance(v, BoolOperator): 482 | if not v.evaluate(meta, params, default, line): 483 | return False 484 | else: 485 | prop = v.evaluate(meta, params, default, line) 486 | if not prop: 487 | return False 488 | if isinstance(prop, list): 489 | props += prop 490 | else: 491 | props.append(prop) 492 | return props 493 | 494 | 495 | class All(Operator): 496 | """ 497 | All operator 498 | Takes a Type in constructor. 499 | On evaluation, checks if all given items have the given type. 500 | """ 501 | def __init__(self, typ, warning_only=False): 502 | self.typ = typ 503 | self.warning_only = warning_only 504 | 505 | def evaluate(self, meta_list, params, default=None, line=None): 506 | props = [] 507 | warn = [] 508 | for meta in meta_list: 509 | prop = self.typ.evaluate(meta, params, default, line) 510 | if prop in Commands: 511 | return prop 512 | if not prop: 513 | if self.warning_only: 514 | warn.append(str(meta)) 515 | else: 516 | return False 517 | else: 518 | if isinstance(prop, list): 519 | props += prop 520 | else: 521 | props.append(prop) 522 | 523 | if props and warn: 524 | logger.warning(line, 'Any items that are not valid objects ' 525 | 'of the type expected are ignored: ', warn) 526 | return props 527 | 528 | 529 | class Some(Operator): 530 | """ 531 | Some operator 532 | Takes a Type in constructor. 533 | On evaluation, checks if some of given items have the given type. 534 | """ 535 | def __init__(self, typ): 536 | self.typ = typ 537 | 538 | def evaluate(self, meta_list, params, default=None, line=None): 539 | props = [] 540 | valid = False 541 | for meta in meta_list: 542 | prop = self.typ.evaluate(meta, params, default, line) 543 | if prop: 544 | valid = True 545 | if isinstance(prop, list): 546 | props += prop 547 | else: 548 | props.append(prop) 549 | if valid: 550 | return props 551 | return False 552 | 553 | 554 | class Selection(Operator): 555 | def __init__(self, *values): 556 | self.values = list(values) 557 | 558 | def evaluate(self, meta, params, default=None, line=None): 559 | prop = False 560 | for v in self.values: 561 | tmp = v.evaluate(meta, params, default, line) 562 | if tmp and prop: 563 | # already the second match 564 | logger.debug(line, '(Selection Operator) Only one match allowed: ', meta) 565 | return False 566 | if tmp: 567 | prop = tmp 568 | # if we get here, we found zero or one match 569 | return prop 570 | 571 | class SetOrDefault(Operator): 572 | """ 573 | Used for tableDirection. If the given value is not in a predefined set than the default value is used. 574 | If no default value is provided for that property, it generates a warning 575 | and behave as if the property had not been specified. 576 | """ 577 | def __init__(self, *values): 578 | self.values = list(values) 579 | 580 | def evaluate(self, meta, params, default=None, line=None): 581 | prop = 'not given' 582 | for v in self.values: 583 | if v == meta: 584 | prop = meta 585 | break 586 | if prop == 'not given': 587 | if default: 588 | prop = default 589 | else: 590 | logger.warning(line, 'Unknown value (no default value is provided for that property): ', meta) 591 | prop = Commands.Remove 592 | return prop 593 | 594 | FORMAT = { 595 | 'decimalChar': { 596 | 'options': [], 597 | 'type': Atomic(OfType(basestring)), 598 | 'default': '.' 599 | }, 600 | 'groupChar': { 601 | 'options': [], 602 | 'type': Atomic(OfType(basestring)), 603 | 'default': ',' 604 | }, 605 | 'pattern': { 606 | 'options': [], 607 | 'type': NumberPattern() 608 | }, 609 | } 610 | 611 | DATATYPE = { 612 | 'base': { 613 | 'options': [], 614 | 'type': Atomic(OfType(basestring)), 615 | 'default': 'string' 616 | }, 617 | 'format': { 618 | # TODO object property 619 | 'options': [], 620 | 'type': Atomic(Or(OfType(basestring), Object(FORMAT))) 621 | }, 622 | 'length': { 623 | 'options': [], 624 | 'type': Atomic(OfType(int)) 625 | }, 626 | 'minLength': { 627 | 'options': [], 628 | 'type': Atomic(OfType(int)) 629 | } 630 | # TODO datatype description object 631 | } 632 | 633 | INHERITED = { 634 | 'aboutUrl': { 635 | 'options': [], 636 | 'type': Uri() 637 | }, 638 | 'datatype': { 639 | 'options': [], 640 | 'type': Atomic(Or(IsBuiltinDatatype(warning_only=True), Object(DATATYPE))) 641 | }, 642 | 'default': { 643 | 'options': [], 644 | 'type': Atomic(OfType(basestring, warning_only=True)), 645 | 'default': '' 646 | }, 647 | 'lang': { 648 | 'options': [], 649 | 'type': Atomic(OfType(basestring)), 650 | 'default': 'und' 651 | }, 652 | 'null': { 653 | 'options': [], 654 | 'type': Atomic(OfType(basestring, warning_only=True)), 655 | 'default': '' 656 | }, 657 | 'ordered': { 658 | 'options': [], 659 | 'type': Atomic(OfType(bool, warning_only=True)), 660 | 'default': False 661 | }, 662 | 'propertyUrl': { 663 | 'options': [], 664 | 'type': Uri() 665 | }, 666 | 'required': { 667 | 'options': [], 668 | 'type': Atomic(OfType(bool)), 669 | 'default': False 670 | }, 671 | 'separator': { 672 | 'options': [], 673 | 'type': Atomic(OfType(basestring, warning_only=True)), 674 | 'default': None 675 | }, 676 | 'textDirection': { 677 | 'options': [], 678 | 'type': Atomic(SetOrDefault('ltr', 'rtl')), 679 | 'default': 'ltr' 680 | }, 681 | 'valueUrl': { 682 | 'options': [], 683 | 'type': Uri() 684 | } 685 | } 686 | 687 | COLUMN = { 688 | 'name': { 689 | 'options': [], 690 | 'type': Atomic(OfType(basestring, warning_only=True)) 691 | }, 692 | 'suppressOutput': { 693 | 'options': [], 694 | 'type': Atomic(OfType(bool, warning_only=True)), 695 | 'default': False 696 | }, 697 | 'titles': { 698 | 'options': [], 699 | 'type': NaturalLanguage() 700 | }, 701 | 'virtual': { 702 | 'options': [], 703 | 'type': Atomic(OfType(bool, warning_only=True)), 704 | 'default': False 705 | }, 706 | '@id': { 707 | 'options': [], 708 | 'type': Link('@id') 709 | }, 710 | '@type': { 711 | 'options': [], 712 | 'type': Atomic('Column') 713 | } 714 | } 715 | 716 | FOREIGN_KEY = { 717 | 'columnReference': { 718 | 'options': [Option.Required], 719 | 'type': ColumnReference() 720 | }, 721 | 'reference': { 722 | 'options': [], 723 | 'type': Object({ 724 | 'resource': { 725 | 'options': [], 726 | 'type': Link('resources') 727 | }, 728 | 'schemaReference': { 729 | 'options': [], 730 | 'type': Link('schemaReference') 731 | }, 732 | 'columnReference': { 733 | 'options': [Option.Required], 734 | 'type': ColumnReference() 735 | } 736 | }) 737 | } 738 | } 739 | 740 | SCHEMA = { 741 | 'foreignKeys': { 742 | 'options': [], 743 | 'type': Array(All(Object(FOREIGN_KEY))) 744 | }, 745 | 'columns': { 746 | 'options': [], 747 | 'type': Array(And(All(Object(COLUMN, inherited_obj=INHERITED, common_properties=True)), 748 | AllDiff('name'))) 749 | }, 750 | 'primaryKey': { 751 | 'options': [], 752 | 'type': ColumnReference() 753 | }, 754 | '@id': { 755 | 'options': [], 756 | 'type': Link('@id') 757 | }, 758 | '@type': { 759 | 'options': [], 760 | 'type': Atomic('Schema') 761 | } 762 | } 763 | 764 | DIALECT = { 765 | 'encoding': { 766 | 'options': [], 767 | 'type': Atomic(OfType(basestring)), 768 | 'default': 'utf-8' 769 | }, 770 | 'lineTerminators': { 771 | 'options': [], 772 | 'type': Atomic(OfType(list, warning_only=True)), 773 | 'default': ["\r\n", "\n"] 774 | }, 775 | 'quoteChar': { 776 | 'options': [], 777 | 'type': Atomic(Or(OfType(basestring), None)), 778 | 'default': '"' 779 | }, 780 | 'doubleQuote': { 781 | 'options': [], 782 | 'type': Atomic(OfType(bool, warning_only=True)), 783 | 'default': True 784 | }, 785 | 'skipRows': { 786 | 'options': [], 787 | 'type': Atomic(OfType(int, warning_only=True)), 788 | 'default': 0 789 | }, 790 | 'commentPrefix': { 791 | 'options': [], 792 | 'type': Atomic(OfType(basestring, warning_only=True)), 793 | 'default': '#' 794 | }, 795 | 'header': { 796 | 'options': [], 797 | 'type': Atomic(OfType(bool, warning_only=True)), 798 | 'default': True 799 | }, 800 | 'headerRowCount': { 801 | 'options': [], 802 | 'type': Atomic(OfType(int, warning_only=True)), 803 | 'default': 1 804 | }, 805 | 'delimiter': { 806 | 'options': [], 807 | 'type': Atomic(OfType(basestring, warning_only=True)), 808 | 'default': ',' 809 | }, 810 | 'skipColumns': { 811 | 'options': [], 812 | 'type': Atomic(OfType(int, warning_only=True)), 813 | 'default': 0 814 | }, 815 | 'skipBlankRows': { 816 | 'options': [], 817 | 'type': Atomic(OfType(bool, warning_only=True)), 818 | 'default': False 819 | }, 820 | 'skipInitialSpace': { 821 | 'options': [], 822 | 'type': Atomic(OfType(bool, warning_only=True)), 823 | 'default': False 824 | }, 825 | 'trim': { 826 | 'options': [], 827 | 'type': Atomic(SetOrDefault(True, False, 'start', 'end')), 828 | 'default': 'false' 829 | }, 830 | '@id': { 831 | 'options': [], 832 | 'type': Link('@id') 833 | }, 834 | '@type': { 835 | 'options': [], 836 | 'type': Atomic('Dialect') 837 | } 838 | } 839 | 840 | 841 | 842 | TRANSFORMATION = { 843 | 'url': { 844 | 'options': [Option.Required], 845 | 'type': Link('url') 846 | }, 847 | 'targetFormat': { 848 | 'options': [Option.Required], 849 | 'type': Link('targetFormat') 850 | }, 851 | 'scriptFormat': { 852 | 'options': [Option.Required], 853 | 'type': Link('scriptFormat') 854 | }, 855 | 'titles': { 856 | 'options': [], 857 | 'type': NaturalLanguage() 858 | }, 859 | 'source': { 860 | 'options': [], 861 | 'type': Atomic(OfType(basestring)) 862 | }, 863 | '@id': { 864 | 'options': [], 865 | 'type': Link('@id') 866 | }, 867 | '@type': { 868 | 'options': [], 869 | 'type': Atomic('Template') 870 | } 871 | } 872 | 873 | 874 | CONTEXT = Selection(Atomic('http://www.w3.org/ns/csvw'), 875 | Array(And(Some(Atomic('http://www.w3.org/ns/csvw')), 876 | Some(Or(Atomic(Base()), Atomic(Language())))) 877 | ) 878 | ) 879 | 880 | TABLE = { 881 | 'url': { 882 | 'options': [Option.Required], 883 | 'type': Link('url') 884 | }, 885 | 'transformations': { 886 | 'options': [], 887 | 'type': Array(All(Object(TRANSFORMATION), warning_only=True), warning_only=True) 888 | }, 889 | 'tableDirection': { 890 | 'options': [], 891 | 'type': Atomic(SetOrDefault('rtl', 'ltr', 'default')), 892 | 'default': 'default' 893 | }, 894 | 'tableSchema': { 895 | 'options': [], 896 | 'type': Object(SCHEMA, inherited_obj=INHERITED, common_properties=True) 897 | }, 898 | 'dialect': { 899 | 'options': [], 900 | 'type': Object(DIALECT, warning_only=True) 901 | }, 902 | 'notes': { 903 | 'options': [], 904 | 'type': Array(All(Object({}, common_properties=True))) 905 | }, 906 | 'suppressOutput': { 907 | 'options': [], 908 | 'type': Atomic(OfType(bool, warning_only=True)), 909 | 'default': False 910 | }, 911 | '@id': { 912 | 'options': [], 913 | 'type': Link('@id') 914 | }, 915 | '@type': { 916 | 'options': [], 917 | 'type': Atomic('Table') 918 | }, 919 | '@context': { 920 | 'options': [], 921 | 'type': CONTEXT 922 | } 923 | } 924 | 925 | 926 | TABLE_GROUP = { 927 | '@context': { 928 | 'options': [Option.Required], 929 | 'type': CONTEXT 930 | }, 931 | 'tables': { 932 | 'options': [Option.Required, Option.NonEmpty], 933 | 'type': Array(All(Object(TABLE, inherited_obj=INHERITED, common_properties=True), warning_only=True)) 934 | }, 935 | 'transformations': { 936 | 'options': [], 937 | 'type': Array(All(Object(TRANSFORMATION))) 938 | }, 939 | 'tableDirection': { 940 | 'options': [], 941 | 'type': Atomic(SetOrDefault('rtl', 'ltr', 'default')), 942 | 'default': 'default' 943 | }, 944 | 'tableSchema': { 945 | 'options': [], 946 | 'type': Object(SCHEMA, inherited_obj=INHERITED, common_properties=True) 947 | }, 948 | 'dialect': { 949 | 'options': [], 950 | 'type': Object(DIALECT, warning_only=True) 951 | }, 952 | 'notes': { 953 | 'options': [], 954 | 'type': Array(Property()) 955 | }, 956 | '@id': { 957 | 'options': [], 958 | 'type': Link('@id') 959 | }, 960 | '@type': { 961 | 'options': [], 962 | 'type': Atomic('TableGroup') 963 | }, 964 | } 965 | 966 | 967 | def _validate(line, meta, params, schema, common_properties): 968 | model = {} 969 | remove_props = [] 970 | default = None 971 | for prop in meta: 972 | value = meta[prop] 973 | if prop in schema: 974 | opts = schema[prop]['options'] 975 | t = schema[prop]['type'] 976 | # check for default value 977 | if 'default' in schema[prop]: 978 | default = schema[prop]['default'] 979 | # check if not empty 980 | if value != None: 981 | prop_eval = t.evaluate(value, params, default, line) 982 | if prop_eval == Commands.Remove: 983 | remove_props.append(prop) 984 | elif prop_eval == Commands.Error: 985 | return prop_eval 986 | elif not prop_eval: 987 | return False 988 | model[prop] = prop_eval 989 | elif Option.NonEmpty in opts: 990 | logger.debug(line, 'Property is empty: ', prop) 991 | if prop == 'tables': 992 | logger.error(line, 'array does not contain one or more "table descriptions"') 993 | return False 994 | elif common_properties and is_common_property(prop): 995 | prop_eval = Common(prop).evaluate(value, params, default, line) 996 | if not prop_eval: 997 | return False 998 | model[prop] = prop_eval 999 | else: 1000 | logger.warning(line, 'Unknown property: ', prop) 1001 | model[prop] = Atomic(prop) 1002 | # check for missing props 1003 | for prop in schema: 1004 | if Option.Required in schema[prop]['options'] and prop not in meta: 1005 | logger.error(line, 'Property missing: ', prop) 1006 | return False 1007 | # remove props with warnings 1008 | for prop in remove_props: 1009 | del model[prop] 1010 | return model 1011 | 1012 | 1013 | def validate(metadata): 1014 | metadata = expand(metadata) 1015 | # outer_group = Or(Object(TABLE_GROUP, inherited_obj=INHERITED, common_properties=True), Object(TABLE, inherited_obj=INHERITED, common_properties=True)) 1016 | outer_group = Object(TABLE_GROUP, inherited_obj=INHERITED, common_properties=True) 1017 | params = {} 1018 | validated = outer_group.evaluate(metadata, params) 1019 | # TODO look for language, column references, ... 1020 | if not validated or validated == Commands.Error: 1021 | return False 1022 | return Model(validated, params) 1023 | 1024 | def expand(meta): 1025 | # turn into table group description 1026 | if 'tables' not in meta: 1027 | tmp = {'tables': [meta]} 1028 | context = meta.pop('@context', None) 1029 | if context: 1030 | tmp['@context'] = context 1031 | return tmp 1032 | return meta 1033 | 1034 | 1035 | class Model: 1036 | def __init__(self, obj, params): 1037 | self.params = params 1038 | self.object = obj 1039 | 1040 | def normalize(self): 1041 | self.object.normalize(self.params) 1042 | 1043 | def merge(self, B): 1044 | self.object.merge(B.object) 1045 | 1046 | def json(self): 1047 | return self.object.json() 1048 | 1049 | 1050 | def normalize(metadata): 1051 | """ 1052 | 1)If the property is a common property or notes the value must be normalized as follows: 1053 | 1.1)If the value is an array, each value within the array is normalized in place as described here. 1054 | 1.2)If the value is a string, replace it with an object with a @value property whose value is that string. If a default language is specified, add a @language property whose value is that default language. 1055 | 1.3)If the value is an object with a @value property, it remains as is. 1056 | 1.4)If the value is any other object, normalize each property of that object as follows: 1057 | 1.4.1)If the property is @id, expand any prefixed names and resolve its value against the base URL. 1058 | 1.4.2)If the property is @type, then its value remains as is. 1059 | 1.4.3)Otherwise, normalize the value of the property as if it were a common property, according to this algorithm. 1060 | 1.5)Otherwise, the value remains as is. 1061 | 2)If the property is an array property each element of the value is normalized using this algorithm. 1062 | 3)If the property is a link property the value is turned into an absolute URL using the base URL. 1063 | 4)If the property is an object property with a string value, the string is a URL referencing a JSON document containing a single object. Fetch this URL to retrieve an object, which may have a local @context. Raise an error if fetching this URL does not result in a JSON object. Normalize each property in the resulting object recursively using this algorithm and with its local @context then remove the local @context property. If the resulting object does not have an @id property, add an @id whose value is the original URL. This object becomes the value of the original object property. 1064 | 5)If the property is an object property with an object value, normalize each property recursively using this algorithm. 1065 | 6)If the property is a natural language property and the value is not already an object, it is turned into an object whose properties are language codes and where the values of those properties are arrays. The suitable language code for the values is determined through the default language; if it can't be determined the language code und must be used. 1066 | 7)If the property is an atomic property that can be a string or an object, normalize to the object form as described for that property. 1067 | Following this normalization process, the @base and @language properties within the @context are no longer relevant; the normalized metadata can have its @context set to http://www.w3.org/ns/csvw. 1068 | """ 1069 | model = validate(metadata) 1070 | if model: 1071 | model.normalize() 1072 | return model 1073 | 1074 | 1075 | def merge(meta_sources): 1076 | """ 1077 | from highest priority to lowest priority by merging the first two metadata files 1078 | """ 1079 | # at first normalize (and validate) the metadata objects 1080 | norm_sources = [] 1081 | for s in meta_sources: 1082 | norm = normalize(s) 1083 | if norm: 1084 | norm_sources.append(norm) 1085 | else: 1086 | raise ValidationException('validation failed for metadata: ' + str(s)) 1087 | 1088 | 1089 | # then merge them into one object 1090 | A = None 1091 | for m in norm_sources: 1092 | # check if m is a valid metadata object 1093 | if m: 1094 | B = m 1095 | # check if we are in the first iteration 1096 | if not A: 1097 | A = B 1098 | else: 1099 | A.merge(B) 1100 | return A 1101 | 1102 | 1103 | -------------------------------------------------------------------------------- /pycsvw/metadata_extractor.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import urllib2 3 | import os 4 | import simplejson 5 | import metadata 6 | 7 | __author__ = 'sebastian' 8 | logger = logging.getLogger(__name__) 9 | 10 | # 1. command-line option 11 | # 2. metadata embedded within the tabular data file itself 12 | # 3. metadata in a document linked to using a Link header associated with the tabular data file 13 | HEADER_LINK = ['link', 'Link'] 14 | # 4. file-specific metadata in a document located based on the location of the tabular data file 15 | FILE_SPECIFIC_METADATA = '-metadata.json' 16 | # 5. directory-specific metadata in a document located based on the location of the tabular data file 17 | DIRECTORY_METADATA = ['metadata.json', 'csv-metadata.json'] 18 | 19 | 20 | def parse_to_json(metadata_handle): 21 | meta_json = simplejson.load(metadata_handle) 22 | # meta = metadata.validate(meta_json) 23 | return meta_json 24 | 25 | 26 | def _parse_header_field(header_field): 27 | raise NotImplementedError() 28 | 29 | 30 | def metadata_extraction(url, metadata_handle, embedded_metadata=False): 31 | meta_sources = [] 32 | 33 | # case 1 34 | if metadata_handle is not None: 35 | meta_sources.append(parse_to_json(metadata_handle)) 36 | 37 | # case 2 38 | if embedded_metadata: 39 | meta_sources.append(embedded_metadata) 40 | 41 | if url: 42 | # case 3 43 | try: 44 | response = urllib2.urlopen(url) 45 | header = response.info() 46 | if header is not None: 47 | for link in HEADER_LINK: 48 | if link in header: 49 | header_field = header[link] 50 | logger.debug('found link in http header: %s', header_field) 51 | meta_sources.append(_parse_header_field(header_field)) 52 | except urllib2.URLError: 53 | pass 54 | 55 | # case 4 56 | try: 57 | meta_url = url + FILE_SPECIFIC_METADATA 58 | response = urllib2.urlopen(meta_url) 59 | if response.getcode() == 200: 60 | logger.debug('found file specific metadata: %s', meta_url) 61 | meta_sources.append(parse_to_json(response)) 62 | except urllib2.URLError: 63 | pass 64 | 65 | # case 5 66 | for dir_meta in DIRECTORY_METADATA: 67 | try: 68 | # split away the part after the last slash 69 | directory = url.rsplit('/', 1)[-2] 70 | meta_url = os.path.join(directory, dir_meta) 71 | response = urllib2.urlopen(meta_url) 72 | if response.getcode() == 200: 73 | logger.debug('found directory specific metadata: %s', meta_url) 74 | meta_sources.append(parse_to_json(response)) 75 | break 76 | except urllib2.URLError: 77 | pass 78 | 79 | return meta_sources 80 | -------------------------------------------------------------------------------- /pycsvw/parser_exceptions.py: -------------------------------------------------------------------------------- 1 | __author__ = 'sebastian' 2 | 3 | 4 | class ValidationException(Exception): 5 | pass 6 | 7 | class ParserException(Exception): 8 | pass -------------------------------------------------------------------------------- /pycsvw/test.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pprint 3 | from cStringIO import StringIO 4 | from pycsvw import CSVW 5 | 6 | 7 | def test(): 8 | 9 | t1 = 'GID,On Street,Species,Trim Cycle,Diameter at Breast Ht,Inventory Date,Comments,Protected,KML\n' \ 10 | '1,ADDISON AV,Celtis australis,Large Tree Routine Prune,11,10/18/2010,,,"-122.156485,37.440963"\n' \ 11 | '2,EMERSON ST,Liquidambar styraciflua,Large Tree Routine Prune,11,6/2/2010,,,"-122.156749,37.440958"\n' \ 12 | '6,ADDISON AV,Robinia pseudoacacia,Large Tree Routine Prune,29,6/1/2010,cavity or decay; trunk decay; codominant leaders; included bark; large leader or limb decay; previous failure root damage; root decay; beware of BEES,YES,"-122.156299,37.441151"' 13 | 14 | m1_dict = { 15 | "@context": ["http://www.w3.org/ns/csvw", {"@language": "en"}], 16 | "@id": "http://example.org/tree-ops-ext", 17 | "url": "tree-ops-ext.csv", 18 | "dc:title": "Tree Operations", 19 | "dcat:keyword": ["tree", "street", "maintenance"], 20 | "dc:publisher": [{ 21 | "schema:name": "Example Municipality", 22 | "schema:url": {"@id": "http://example.org"} 23 | }], 24 | "dc:license": {"@id": "http://opendefinition.org/licenses/cc-by/"}, 25 | "dc:modified": {"@value": "2010-12-31", "@type": "xsd:date"}, 26 | "notes": [{ 27 | "@type": "oa:Annotation", 28 | "oa:hasTarget": {"@id": "http://example.org/tree-ops-ext"}, 29 | "oa:hasBody": { 30 | "@type": "oa:EmbeddedContent", 31 | "rdf:value": "This is a very interesting comment about the table; it's a table!", 32 | "dc:format": {"@value": "text/plain"} 33 | } 34 | }], 35 | "dialect": {"trim": True}, 36 | "tableSchema": { 37 | "columns": [{ 38 | "name": "GID", 39 | "titles": [ 40 | "GID", 41 | "Generic Identifier" 42 | ], 43 | "dc:description": "An identifier for the operation on a tree.", 44 | "datatype": "string", 45 | "required": True, 46 | "suppressOutput": True 47 | }, { 48 | "name": "on_street", 49 | "titles": "On Street", 50 | "dc:description": "The street that the tree is on.", 51 | "datatype": "string" 52 | }, { 53 | "name": "species", 54 | "titles": "Species", 55 | "dc:description": "The species of the tree.", 56 | "datatype": "string" 57 | }, { 58 | "name": "trim_cycle", 59 | "titles": "Trim Cycle", 60 | "dc:description": "The operation performed on the tree.", 61 | "datatype": "string", 62 | "lang": "en" 63 | }, { 64 | "name": "dbh", 65 | "titles": "Diameter at Breast Ht", 66 | "dc:description": "Diameter at Breast Height (DBH) of the tree (in feet), measured 4.5ft above ground.", 67 | "datatype": "integer" 68 | }, { 69 | "name": "inventory_date", 70 | "titles": "Inventory Date", 71 | "dc:description": "The date of the operation that was performed.", 72 | "datatype": {"base": "date", "format": "M/d/yyyy"} 73 | }, { 74 | "name": "comments", 75 | "titles": "Comments", 76 | "dc:description": "Supplementary comments relating to the operation or tree.", 77 | "datatype": "string", 78 | "separator": ";" 79 | }, { 80 | "name": "protected", 81 | "titles": "Protected", 82 | "dc:description": "Indication (YES / NO) whether the tree is subject to a protection order.", 83 | "datatype": {"base": "boolean", "format": "YES|NO"}, 84 | "default": "NO" 85 | }, { 86 | "name": "kml", 87 | "titles": "KML", 88 | "dc:description": "KML-encoded description of tree location.", 89 | "datatype": "xml" 90 | }], 91 | "primaryKey": "GID", 92 | "aboutUrl": "http://example.org/tree-ops-ext#gid-{GID}" 93 | } 94 | } 95 | m1 = StringIO(json.dumps(m1_dict)) 96 | f = StringIO(t1) 97 | csvw = CSVW(handle=f, metadata_handle=m1, url='http://example.org/tree-ops-ext.csv') 98 | for col in csvw.table.columns: 99 | pprint.pprint(col.name) 100 | pprint.pprint(col.titles) 101 | pprint.pprint(col.cells) 102 | for c in col.cells: 103 | pprint.pprint(c.value) 104 | pprint.pprint(csvw.table.rows) 105 | 106 | pprint.pprint(csvw.metadata.json()) 107 | 108 | csvw.to_json() 109 | 110 | 111 | 112 | if __name__ == '__main__': 113 | test() -------------------------------------------------------------------------------- /pycsvw/testdata/csvw-template/LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS 10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM 11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED 12 | HEREUNDER. 13 | 14 | Statement of Purpose 15 | 16 | The laws of most jurisdictions throughout the world automatically confer 17 | exclusive Copyright and Related Rights (defined below) upon the creator 18 | and subsequent owner(s) (each and all, an "owner") of an original work of 19 | authorship and/or a database (each, a "Work"). 20 | 21 | Certain owners wish to permanently relinquish those rights to a Work for 22 | the purpose of contributing to a commons of creative, cultural and 23 | scientific works ("Commons") that the public can reliably and without fear 24 | of later claims of infringement build upon, modify, incorporate in other 25 | works, reuse and redistribute as freely as possible in any form whatsoever 26 | and for any purposes, including without limitation commercial purposes. 27 | These owners may contribute to the Commons to promote the ideal of a free 28 | culture and the further production of creative, cultural and scientific 29 | works, or to gain reputation or greater distribution for their Work in 30 | part through the use and efforts of others. 31 | 32 | For these and/or other purposes and motivations, and without any 33 | expectation of additional consideration or compensation, the person 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she 35 | is an owner of Copyright and Related Rights in the Work, voluntarily 36 | elects to apply CC0 to the Work and publicly distribute the Work under its 37 | terms, with knowledge of his or her Copyright and Related Rights in the 38 | Work and the meaning and intended legal effect of CC0 on those rights. 39 | 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be 41 | protected by copyright and related or neighboring rights ("Copyright and 42 | Related Rights"). Copyright and Related Rights include, but are not 43 | limited to, the following: 44 | 45 | i. the right to reproduce, adapt, distribute, perform, display, 46 | communicate, and translate a Work; 47 | ii. moral rights retained by the original author(s) and/or performer(s); 48 | iii. publicity and privacy rights pertaining to a person's image or 49 | likeness depicted in a Work; 50 | iv. rights protecting against unfair competition in regards to a Work, 51 | subject to the limitations in paragraph 4(a), below; 52 | v. rights protecting the extraction, dissemination, use and reuse of data 53 | in a Work; 54 | vi. database rights (such as those arising under Directive 96/9/EC of the 55 | European Parliament and of the Council of 11 March 1996 on the legal 56 | protection of databases, and under any national implementation 57 | thereof, including any amended or successor version of such 58 | directive); and 59 | vii. other similar, equivalent or corresponding rights throughout the 60 | world based on applicable law or treaty, and any national 61 | implementations thereof. 62 | 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention 64 | of, applicable law, Affirmer hereby overtly, fully, permanently, 65 | irrevocably and unconditionally waives, abandons, and surrenders all of 66 | Affirmer's Copyright and Related Rights and associated claims and causes 67 | of action, whether now known or unknown (including existing as well as 68 | future claims and causes of action), in the Work (i) in all territories 69 | worldwide, (ii) for the maximum duration provided by applicable law or 70 | treaty (including future time extensions), (iii) in any current or future 71 | medium and for any number of copies, and (iv) for any purpose whatsoever, 72 | including without limitation commercial, advertising or promotional 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each 74 | member of the public at large and to the detriment of Affirmer's heirs and 75 | successors, fully intending that such Waiver shall not be subject to 76 | revocation, rescission, cancellation, termination, or any other legal or 77 | equitable action to disrupt the quiet enjoyment of the Work by the public 78 | as contemplated by Affirmer's express Statement of Purpose. 79 | 80 | 3. Public License Fallback. Should any part of the Waiver for any reason 81 | be judged legally invalid or ineffective under applicable law, then the 82 | Waiver shall be preserved to the maximum extent permitted taking into 83 | account Affirmer's express Statement of Purpose. In addition, to the 84 | extent the Waiver is so judged Affirmer hereby grants to each affected 85 | person a royalty-free, non transferable, non sublicensable, non exclusive, 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the 88 | maximum duration provided by applicable law or treaty (including future 89 | time extensions), (iii) in any current or future medium and for any number 90 | of copies, and (iv) for any purpose whatsoever, including without 91 | limitation commercial, advertising or promotional purposes (the 92 | "License"). The License shall be deemed effective as of the date CC0 was 93 | applied by Affirmer to the Work. Should any part of the License for any 94 | reason be judged legally invalid or ineffective under applicable law, such 95 | partial invalidity or ineffectiveness shall not invalidate the remainder 96 | of the License, and in such case Affirmer hereby affirms that he or she 97 | will not (i) exercise any of his or her remaining Copyright and Related 98 | Rights in the Work or (ii) assert any associated claims and causes of 99 | action with respect to the Work, in either case contrary to Affirmer's 100 | express Statement of Purpose. 101 | 102 | 4. Limitations and Disclaimers. 103 | 104 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 105 | surrendered, licensed or otherwise affected by this document. 106 | b. Affirmer offers the Work as-is and makes no representations or 107 | warranties of any kind concerning the Work, express, implied, 108 | statutory or otherwise, including without limitation warranties of 109 | title, merchantability, fitness for a particular purpose, non 110 | infringement, or the absence of latent or other defects, accuracy, or 111 | the present or absence of errors, whether or not discoverable, all to 112 | the greatest extent permissible under applicable law. 113 | c. Affirmer disclaims responsibility for clearing rights of other persons 114 | that may apply to the Work or any use thereof, including without 115 | limitation any person's Copyright and Related Rights in the Work. 116 | Further, Affirmer disclaims responsibility for obtaining any necessary 117 | consents, permissions or other rights required for any use of the 118 | Work. 119 | d. Affirmer understands and acknowledges that Creative Commons is not a 120 | party to this document and has no duty or obligation with respect to 121 | this CC0 or use of the Work. 122 | -------------------------------------------------------------------------------- /pycsvw/testdata/csvw-template/README.md: -------------------------------------------------------------------------------- 1 | This is a simple example of using [CSV on the Web] to document the semantics 2 | of a CSV file. Fork it, and change it for your CSV data. With apologies 3 | to [Dan Bricklin]. 4 | 5 | [CSV on the Web]: http://www.w3.org/2013/csvw/wiki/Main_Page 6 | [Dan Bricklin]: https://en.wikipedia.org/wiki/Dan_Bricklin 7 | -------------------------------------------------------------------------------- /pycsvw/testdata/csvw-template/example.csv: -------------------------------------------------------------------------------- 1 | isbn,title,author 2 | "0470402377","Bricklin on Technology","Dan Bricklin" 3 | -------------------------------------------------------------------------------- /pycsvw/testdata/csvw-template/example.csv-metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": { 3 | "@vocab": "http://www.w3.org/ns/csvw#", 4 | "dc": "http://purl.org/dc/terms/" 5 | }, 6 | "@type": "Table", 7 | "url": "example.csv", 8 | "dc:creator": "Dan Bricklin", 9 | "dc:title": "My Spreadsheet", 10 | "dc:modified": "2014-05-09T15:44:58Z", 11 | "dc:publisher": "My Books", 12 | "tableSchema": { 13 | "aboutUrl": "http://librarything.com/isbn/{isbn}", 14 | "primaryKey": "isbn", 15 | "columns": [ 16 | { 17 | "name": "isbn", 18 | "titles": "ISBN-10", 19 | "datatype": "string", 20 | "unique": true, 21 | "propertyUrl": "http://purl.org/dc/terms/identifier" 22 | }, 23 | { 24 | "name": "title", 25 | "titles": "Book Title", 26 | "datatype": "string", 27 | "propertyUrl": "http://purl.org/dc/terms/title" 28 | }, 29 | { 30 | "name": "author", 31 | "titles": "Book Author", 32 | "datatype": "string", 33 | "propertyUrl": "http://purl.org/dc/terms/creator" 34 | } 35 | ] 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /pycsvw/testdata/test124-user-metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "http://www.w3.org/ns/csvw", 3 | "rdfs:comment": "If not validating, and one schema has a name property but not a titles property, and the other has a titles property but not a name property.", 4 | "url": "tree-ops.csv", 5 | "tableSchema": { 6 | "columns": [ 7 | {"name": "GID1"}, 8 | {"name": "on_street1"}, 9 | {"name": "species1"}, 10 | {"name": "trim_cycle1"}, 11 | {"name": "inventory_date1"} 12 | ] 13 | } 14 | } -------------------------------------------------------------------------------- /pycsvw/testdata/test125-metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "http://www.w3.org/ns/csvw", 3 | "rdfs:comment": "If the column required annotation is true, add an error to the list of errors for the cell.", 4 | "tables": [{ 5 | "url": "test125.csv", 6 | "tableSchema": { 7 | "columns": [{ 8 | "name": "countryCode", 9 | "titles": "countryCode", 10 | "required": true 11 | }, { 12 | "name": "latitude", 13 | "titles": "latitude", 14 | "datatype": "number", 15 | "required": true 16 | }, { 17 | "name": "longitude", 18 | "titles": "longitude", 19 | "datatype": "number", 20 | "required": true 21 | }, { 22 | "name": "name", 23 | "titles": "name", 24 | "datatype": "string", 25 | "required": true 26 | }] 27 | } 28 | }] 29 | } 30 | -------------------------------------------------------------------------------- /pycsvw/testdata/test125.csv: -------------------------------------------------------------------------------- 1 | countryCode,latitude,longitude,name 2 | AD,42.546245,1.601554,Andorra 3 | AE,23.424076,53.847818,"United Arab Emirates" 4 | AF,,67.709953,Afghanistan 5 | -------------------------------------------------------------------------------- /pycsvw/testdata/test234-metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "http://www.w3.org/ns/csvw", 3 | "rdfs:comment": "Validators MUST raise errors if there is more than one row with the same primary key.", 4 | "rdfs:label": "multiple column primaryKey violation", 5 | "url": "test234.csv", 6 | "tableSchema": { 7 | "columns": [ 8 | {"name": "PK1", "titles": "PK1"}, 9 | {"name": "PK2", "titles": "PK2"} 10 | ], 11 | "primaryKey": ["PK1", "PK2"] 12 | } 13 | } -------------------------------------------------------------------------------- /pycsvw/testdata/test234.csv: -------------------------------------------------------------------------------- 1 | PK1,PK2 2 | foo,bar 3 | foo,bar 4 | -------------------------------------------------------------------------------- /pycsvw/testdata/tree-ops.csv: -------------------------------------------------------------------------------- 1 | GID,On Street,Species,Trim Cycle,Inventory Date 2 | 1,ADDISON AV,Celtis australis,Large Tree Routine Prune,10/18/2010 3 | 2,EMERSON ST,Liquidambar styraciflua,Large Tree Routine Prune,6/2/2010 4 | -------------------------------------------------------------------------------- /pycsvw/testdata/validate-result-missing-column.txt: -------------------------------------------------------------------------------- 1 | Column: GID1 defined in schema, but not found in csv table! 2 | Column: on_street1 defined in schema, but not found in csv table! 3 | Column: species1 defined in schema, but not found in csv table! 4 | Column: trim_cycle1 defined in schema, but not found in csv table! 5 | Column: inventory_date1 defined in schema, but not found in csv table! 6 | 7 | -------------------------------------------------------------------------------- /pycsvw/testdata/validate-result-primary-key-fail.txt: -------------------------------------------------------------------------------- 1 | Error in Row 2: duplicated value: ('foo', 'bar') for primary key columns: ['PK1', 'PK2'] 2 | -------------------------------------------------------------------------------- /pycsvw/testdata/validate-result-required-fail.txt: -------------------------------------------------------------------------------- 1 | Error in Cell (Row 3 Column 2): Column latitude is required! 2 | 3 | -------------------------------------------------------------------------------- /pycsvw/validator.py: -------------------------------------------------------------------------------- 1 | import ntpath 2 | import simplejson 3 | import csv_parser 4 | 5 | def validate_file(csv_file_path, schema_file_path): 6 | csv_handle = open(csv_file_path, 'rb') 7 | csv_file_name = ntpath.basename(csv_file_path) 8 | schema_handle = open(schema_file_path, 'rb') 9 | return validate_handle(csv_handle, csv_file_name, schema_handle) 10 | 11 | 12 | def validate_handle(csv_handle, csv_file_name, schema_handle): 13 | table, embedded_schema = csv_parser.parse(csv_handle, None) 14 | schema = simplejson.load(schema_handle) 15 | tableSchema = None 16 | if "tables" in schema: 17 | talbes = schema["tables"] 18 | for i, current_table in enumerate(talbes): 19 | if "url" in current_table and current_table["url"] == csv_file_name: 20 | tableSchema = current_table 21 | break 22 | else: 23 | tableSchema = schema 24 | 25 | if not tableSchema: 26 | return (False, "Could not find schema for table %s: " % csv_file_name ) 27 | 28 | valid, error_message = validate_columns_name(embedded_schema, tableSchema) 29 | if valid: 30 | return validate_table_data(table, tableSchema) 31 | else: 32 | return valid, error_message 33 | 34 | def validate_columns_name(embedded_schema, schema): 35 | columns_in_table = embedded_schema["tableSchema"]["columns"] 36 | columns_in_schema = schema["tableSchema"]["columns"] 37 | 38 | valid = True; 39 | error_message = "" 40 | if len(columns_in_schema) != len(columns_in_table): 41 | error_message += "Column number mismatch! Csv has %s columns, but schema has %s columns.\n" % (len(columns_in_table), len(columns_in_schema)) 42 | return (False, error_message) 43 | 44 | for i, column in enumerate(columns_in_schema): 45 | if "name" in column and not column["name"] in columns_in_table[i]["titles"]: 46 | error_message += "Column: %s defined in schema, but not found in csv table!\n" % column["name"] 47 | valid = False 48 | 49 | return (valid, error_message) 50 | 51 | def validate_table_data(table, schema): 52 | table_schema = schema["tableSchema"] 53 | columns_in_schema = table_schema["columns"] 54 | 55 | valid = True; 56 | error_message = "" 57 | pk_value_set = set() 58 | 59 | pk_column_list = list() 60 | if "primaryKey" in table_schema: 61 | pk_json = table_schema["primaryKey"] 62 | if pk_json: 63 | if isinstance(pk_json, list): 64 | pk_column_list = pk_json 65 | else: 66 | pk_column_list.append(pk_json); 67 | pk_column_index_list = get_column_index(columns_in_schema, pk_column_list) 68 | 69 | for row in table.rows: 70 | # check required 71 | for i, cell in enumerate(row.cells): 72 | if not cell.value: 73 | column = columns_in_schema[i] 74 | if "required" in column and column["required"]==True: 75 | error_message += "Error in %s: Column %s is required!\n" % (str(cell), column["name"]) 76 | valid = False 77 | # check primary key 78 | if len(pk_column_index_list) > 0: 79 | pk_value = concatenate_pk_value(row, pk_column_index_list) 80 | if pk_value in pk_value_set: 81 | valid = False 82 | error_message += "Error in %s: duplicated value: %s for primary key columns: %s\n" % (str(row), pk_value, pk_column_list) 83 | else: 84 | pk_value_set.add(pk_value) 85 | 86 | return (valid, error_message) 87 | 88 | def concatenate_pk_value(row, pk_column_index_list): 89 | value_list = list() 90 | for cell in row.cells: 91 | if cell.column.number in pk_column_index_list: 92 | value_list.append(cell.value) 93 | pk_value_tuple = tuple(value_list) 94 | return pk_value_tuple 95 | 96 | def get_column_index(columns_in_schema, pk_column_list): 97 | pk_column_index_list = list() 98 | for i, column in enumerate(columns_in_schema): 99 | if column["name"] in pk_column_list: 100 | pk_column_index_list.append(i+1) 101 | 102 | return pk_column_index_list 103 | 104 | 105 | def test_validate(): 106 | table_path = "F:\WorkRecord\Feature\MCT\CsvSchema\AdyenAcquirerCode.csv" 107 | schema_path = "F:\WorkRecord\Feature\MCT\CsvSchema\AdyenAcquirerCode.schema" 108 | (ret, error_message) = validate_file(table_path, schema_path) 109 | print("Is valid: %s\nError message: \n%s\n" % (ret, error_message)) 110 | 111 | 112 | 113 | if __name__ == '__main__': 114 | test_validate() 115 | 116 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='pycsvw', 5 | version='0.1', 6 | packages=find_packages(), 7 | install_requires=[ 8 | 'language_tags', 9 | 'rdflib', 10 | 'uritemplate' 11 | ], 12 | url='https://github.com/sebneu/csvw-parser', 13 | license='', 14 | author='Sebastian Neumaier', 15 | author_email='sebastian.neumaier@wu.ac.at', 16 | description='Python implementation of the W3C CSV on the Web specification, cf. http://w3c.github.io/csvw/.' 17 | ) 18 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'sebastian' 2 | -------------------------------------------------------------------------------- /test/csvw_implementation_report.py: -------------------------------------------------------------------------------- 1 | from csvw_validation_test_cases import implementation_report 2 | 3 | __author__ = 'sebastian' 4 | import rdflib 5 | from rdflib.namespace import FOAF 6 | EARL = rdflib.Namespace("http://www.w3.org/ns/earl#") 7 | 8 | 9 | 10 | class ImplementationReport(): 11 | def __init__(self): 12 | self.g = rdflib.Graph() 13 | self.g.parse(location='test/doap.ttl', format='turtle') 14 | for person in self.g.subjects(rdflib.RDF.type, FOAF.Person): 15 | self.assertor = person 16 | break 17 | for subj in self.g.subjects(rdflib.RDF.type, EARL.TestSubject): 18 | self.subject = subj 19 | break 20 | 21 | def run_validation_test(self): 22 | implementation_report(self.g, self.subject, self.assertor) 23 | 24 | def getResult(self): 25 | return self.g.serialize(format='turtle') 26 | 27 | 28 | if __name__ == '__main__': 29 | rep = ImplementationReport() 30 | rep.run_validation_test() 31 | res = rep.getResult() 32 | print res 33 | with open('earl.ttl', 'w') as f: 34 | f.write(res) 35 | -------------------------------------------------------------------------------- /test/csvw_json_test_cases.py: -------------------------------------------------------------------------------- 1 | import urlparse 2 | import traceback 3 | import unittest 4 | import json 5 | import os 6 | parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 7 | os.sys.path.insert(0,parentdir) 8 | from pycsvw import CSVW 9 | import urllib2 10 | 11 | MAX_TESTS = 5 12 | MANIFEST = 'http://w3c.github.io/csvw/tests/manifest-json.jsonld' 13 | BASE = 'http://w3c.github.io/csvw/tests/' 14 | TYPES = { 15 | 'csvt:ToJsonTest': True, 16 | 'csvt:ToJsonTestWithWarnings': True, 17 | 'csvt:NegativeJsonTest': False 18 | } 19 | 20 | 21 | def get_manifest(): 22 | response = urllib2.urlopen(MANIFEST) 23 | return json.loads(response.read()) 24 | 25 | 26 | class CSVWJSONTestCases(unittest.TestCase): 27 | pass 28 | 29 | 30 | def test_generator(csv_file, result_url, implicit, type, option): 31 | def test(self): 32 | metadata = None 33 | if 'metadata' in option: 34 | metadata = option['metadata'] 35 | 36 | try: 37 | csvw = CSVW(csv_file, metadata_url=metadata) 38 | except Exception as e: 39 | # this should be a negative test 40 | if TYPES[type]: 41 | traceback.print_exc() 42 | self.assertFalse(TYPES[type]) 43 | return 44 | 45 | # if we get here this should be a positive test 46 | self.assertTrue(TYPES[type]) 47 | 48 | # if we can parse it we should at least produce some embedded metadata 49 | self.assertNotEqual(csvw.metadata, None) 50 | # and the result should exists 51 | self.assertNotEqual(result_url, None) 52 | 53 | # test the json result 54 | 55 | resp = urllib2.urlopen(result_url) 56 | result = json.loads(resp.read()) 57 | self.assertEqual(csvw.to_json(), result) 58 | 59 | return test 60 | 61 | 62 | 63 | if __name__ == '__main__': 64 | manifest = get_manifest() 65 | for i, t in enumerate(manifest['entries']): 66 | test_name = 'test ' + t['type'] + ': ' + t['name'] 67 | csv_file = t['action'] 68 | csv_file = urlparse.urljoin(BASE, csv_file) 69 | 70 | result = None 71 | if 'result' in t: 72 | result = urlparse.urljoin(BASE, t['result']) 73 | 74 | implicit = [] 75 | if 'implicit' in t: 76 | for f in t['implicit']: 77 | implicit.append(urlparse.urljoin(BASE, f)) 78 | 79 | if 'metadata' in t['option']: 80 | t['option']['metadata'] = urlparse.urljoin(BASE, t['option']['metadata']) 81 | 82 | test = test_generator(csv_file, result, implicit, t['type'], t['option']) 83 | setattr(CSVWJSONTestCases, test_name, test) 84 | 85 | if i > MAX_TESTS: 86 | break 87 | 88 | unittest.main() 89 | -------------------------------------------------------------------------------- /test/csvw_validation_single_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import urlparse 3 | from csvw_validation_test_cases import get_manifest, BASE, test_generator, CSVWValidationTestCases, test_generator_metadata 4 | 5 | __author__ = 'sebastian' 6 | 7 | if __name__ == '__main__': 8 | test_no = input('Test No.: ') 9 | test_id = '#test' + str(test_no).zfill(3) 10 | manifest = get_manifest() 11 | for i, t in enumerate(manifest['entries']): 12 | if t['id'].endswith(test_id): 13 | test_name = ' '.join(['test', t['id'], t['type'], t['name']]) 14 | action_url = t['action'] 15 | action_url = urlparse.urljoin(BASE, action_url) 16 | implicit = [] 17 | if 'implicit' in t: 18 | for f in t['implicit']: 19 | implicit.append(urlparse.urljoin(BASE, f)) 20 | 21 | if 'metadata' in t['option']: 22 | t['option']['metadata'] = urlparse.urljoin(BASE, t['option']['metadata']) 23 | 24 | if action_url.endswith('.csv'): 25 | test = test_generator(action_url, implicit, t['type'], t['option']) 26 | setattr(CSVWValidationTestCases, test_name, test) 27 | elif action_url.endswith('.json'): 28 | test = test_generator_metadata(action_url, implicit, t['type'], t['option']) 29 | setattr(CSVWValidationTestCases, test_name, test) 30 | break 31 | 32 | unittest.main() 33 | -------------------------------------------------------------------------------- /test/csvw_validation_test_cases.py: -------------------------------------------------------------------------------- 1 | import urlparse 2 | import traceback 3 | import unittest 4 | import json 5 | from StringIO import StringIO 6 | import datetime 7 | import rdflib 8 | import os 9 | parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 10 | os.sys.path.insert(0,parentdir) 11 | from pycsvw import metadata 12 | from pycsvw.main import CSVW 13 | import urllib2 14 | 15 | MAX_TESTS = -1 16 | MANIFEST = 'http://w3c.github.io/csvw/tests/manifest-validation.jsonld' 17 | BASE = 'http://w3c.github.io/csvw/tests/' 18 | TYPES = { 19 | 'csvt:WarningValidationTest': True, 20 | 'csvt:PositiveValidationTest': True, 21 | 'csvt:NegativeValidationTest': False 22 | } 23 | 24 | 25 | def get_manifest(): 26 | response = urllib2.urlopen(MANIFEST) 27 | return json.loads(response.read()) 28 | 29 | 30 | class CSVWValidationTestCases(unittest.TestCase): 31 | pass 32 | 33 | 34 | def test_generator(csv_url, implicit, type, option): 35 | def test(self): 36 | metadata = option.get('metadata') 37 | 38 | try: 39 | csvw = CSVW(url=csv_url, metadata_url=metadata) 40 | except Exception as e: 41 | # this should be a negative test 42 | if TYPES[type]: 43 | raise e 44 | self.assertFalse(TYPES[type]) 45 | return 46 | 47 | # if we get here this should be a positive test 48 | self.assertTrue(TYPES[type]) 49 | 50 | # if we can parse it we should at least produce a table and some embedded metadata 51 | self.assertNotEqual(csvw.table, None) 52 | self.assertNotEqual(csvw.metadata, None) 53 | 54 | result_table = csvw.table 55 | result_meta = csvw.metadata.json() 56 | 57 | return test 58 | 59 | def test_generator_metadata(metadata_url, implicit, type, option): 60 | def test(self): 61 | csv_url = None 62 | if implicit: 63 | for url in implicit: 64 | if url.endswith('.csv'): 65 | csv_url = url 66 | break 67 | 68 | if csv_url: 69 | try: 70 | csvw = CSVW(url=csv_url, metadata_url=metadata_url) 71 | except Exception as e: 72 | # this should be a negative test 73 | if TYPES[type]: 74 | raise 75 | self.assertFalse(TYPES[type]) 76 | return 77 | self.assertTrue(TYPES[type]) 78 | self.assertNotEqual(csvw.table, None) 79 | self.assertNotEqual(csvw.metadata, None) 80 | 81 | result_table = csvw.table 82 | result_meta = csvw.metadata.json() 83 | 84 | else: 85 | try: 86 | url_resp = urllib2.urlopen(metadata_url) 87 | handle = StringIO(url_resp.read()) 88 | meta = json.load(handle) 89 | meta_model = metadata.normalize(meta) 90 | except Exception as e: 91 | if TYPES[type]: 92 | raise e 93 | self.assertFalse(TYPES[type]) 94 | return 95 | self.assertTrue(TYPES[type]) 96 | 97 | self.assertNotEqual(meta_model, None) 98 | result_meta = meta_model.json() 99 | 100 | return test 101 | 102 | 103 | 104 | 105 | def get_test_method(i, t): 106 | action_url = t['action'] 107 | action_url = urlparse.urljoin(BASE, action_url) 108 | implicit = [] 109 | if 'implicit' in t: 110 | for f in t['implicit']: 111 | implicit.append(urlparse.urljoin(BASE, f)) 112 | 113 | if 'metadata' in t['option']: 114 | t['option']['metadata'] = urlparse.urljoin(BASE, t['option']['metadata']) 115 | 116 | test = None 117 | if action_url.endswith('.csv'): 118 | test = test_generator(action_url, implicit, t['type'], t['option']) 119 | elif action_url.endswith('.json'): 120 | test = test_generator_metadata(action_url, implicit, t['type'], t['option']) 121 | return test 122 | 123 | 124 | def implementation_report(graph, subject, assertor): 125 | from rdflib.namespace import XSD, DC, FOAF 126 | EARL = rdflib.Namespace("http://www.w3.org/ns/earl#") 127 | 128 | validation_html = "http://www.w3.org/2013/csvw/tests/" 129 | manifest = get_manifest() 130 | for i, t in enumerate(manifest['entries']): 131 | # add the properties for a test case 132 | assertion = rdflib.BNode() 133 | graph.add( (assertion, rdflib.RDF.type, EARL.Assertion) ) 134 | graph.add( (assertion, EARL.assertedBy, assertor) ) 135 | graph.add( (assertion, EARL.subject, subject) ) 136 | graph.add( (assertion, EARL.test, rdflib.URIRef(validation_html + t['id'])) ) 137 | result = rdflib.BNode() 138 | graph.add( (assertion, EARL.result, result) ) 139 | graph.add( (result, rdflib.RDF.type, EARL.TestResult) ) 140 | graph.add( (result, EARL.mode, EARL.automatic) ) 141 | 142 | # TODO edit this hack... 143 | # run test case 144 | test_name = 'tmp' 145 | test = get_test_method(i, t) 146 | setattr(CSVWValidationTestCases, test_name, test) 147 | 148 | suite = unittest.TestSuite() 149 | suite.addTest(CSVWValidationTestCases(test_name)) 150 | runner = unittest.TextTestRunner() 151 | test_result = runner.run(suite) 152 | 153 | delattr(CSVWValidationTestCases, test_name) 154 | 155 | # check for failures 156 | if len(test_result.failures) == 0: 157 | outcome = EARL.passed 158 | else: 159 | outcome = EARL.failed 160 | graph.add( (result, EARL.outcome, outcome) ) 161 | 162 | # add timestamp 163 | now = datetime.datetime.now().isoformat() 164 | graph.add( (result, DC.date, rdflib.Literal(now, datatype=XSD.date))) 165 | 166 | 167 | if __name__ == '__main__': 168 | manifest = get_manifest() 169 | for i, t in enumerate(manifest['entries']): 170 | test_name = ' '.join(['test', t['id'], t['type'], t['name']]) 171 | meth = get_test_method(i, t) 172 | if meth: 173 | setattr(CSVWValidationTestCases, test_name, meth) 174 | if 0 < MAX_TESTS < i: 175 | break 176 | 177 | unittest.main() 178 | -------------------------------------------------------------------------------- /test/doap.ttl: -------------------------------------------------------------------------------- 1 | @prefix rdf: . 2 | @prefix rdfs: . 3 | @prefix dc: . 4 | @prefix earl: . 5 | @prefix foaf: . 6 | @prefix doap: . 7 | 8 | a doap:Project, earl:TestSubject, earl:Software ; 9 | doap:name "pycsvw" ; 10 | doap:description "Python implementation of the W3C CSV on the Web specification, cf. http://w3c.github.io/csvw/" ; 11 | doap:homepage ; 12 | doap:programming-language "Python" ; 13 | doap:implements , 14 | ; 15 | doap:download-page ; 16 | doap:developer _:sebneu ; 17 | doap:maintainer _:sebneu ; 18 | doap:documenter _:sebneu ; 19 | foaf:maker _:sebneu ; 20 | dc:title "pycsvw" ; 21 | dc:creator _:sebneu . 22 | 23 | _:sebneu a foaf:Person, earl:Assertor; 24 | foaf:name "Sebastian Neumaier"; 25 | foaf:homepage ; 26 | foaf:title "Implementor". -------------------------------------------------------------------------------- /test/errors_10-12-15.txt: -------------------------------------------------------------------------------- 1 | ====================================================================== 2 | ERROR: test manifest-validation#test046 csvt:WarningValidationTest invalid dataype (__main__.CSVWValidationTestCases) 3 | ---------------------------------------------------------------------- 4 | Traceback (most recent call last): 5 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 64, in test 6 | csvw = CSVW(url=csv_url, metadata_url=metadata_url) 7 | File "/home/sebastian/Repositories/csvw-parser/csvwparser/main.py", line 50, in __init__ 8 | self.metadata = metadata.merge(sources) 9 | File "/home/sebastian/Repositories/csvw-parser/csvwparser/metadata.py", line 1064, in merge 10 | raise ValidationException('validation failed for metadata: ' + str(s)) 11 | ValidationException: validation failed for metadata: {u'datatype': u'anySimpleType', u'@context': u'http://www.w3.org/ns/csvw', u'tables': [{u'url': u'test046.csv', u'tableSchema': {u'columns': [{u'titles': u'null'}, {u'titles': u'lang'}, {u'titles': u'textDirection'}, {u'titles': u'separator'}, {u'titles': u'ordered'}, {u'titles': u'default'}, {u'titles': u'datatype'}, {u'titles': u'aboutUrl'}, {u'titles': u'propertyUrl'}, {u'titles': u'valueUrl'}]}}]} 12 | 13 | ====================================================================== 14 | ERROR: test manifest-validation#test096 csvt:WarningValidationTest inconsistent array values: columns (__main__.CSVWValidationTestCases) 15 | ---------------------------------------------------------------------- 16 | Traceback (most recent call last): 17 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 64, in test 18 | csvw = CSVW(url=csv_url, metadata_url=metadata_url) 19 | File "/home/sebastian/Repositories/csvw-parser/csvwparser/main.py", line 50, in __init__ 20 | self.metadata = metadata.merge(sources) 21 | File "/home/sebastian/Repositories/csvw-parser/csvwparser/metadata.py", line 1064, in merge 22 | raise ValidationException('validation failed for metadata: ' + str(s)) 23 | ValidationException: validation failed for metadata: {u'url': u'tree-ops.csv', u'rdfs:comment': u'last column is datatype, not column', u'tableSchema': {u'columns': [{u'titles': u'GID', u'name': u'GID'}, {u'titles': u'On Street', u'name': u'on_street'}, {u'titles': u'Species', u'name': u'species'}, {u'titles': u'Trim Cycle', u'name': u'trim_cycle'}, {u'titles': u'Inventory Date', u'name': u'inventory_date'}, 1]}} 24 | 25 | ====================================================================== 26 | ERROR: test manifest-validation#test150 csvt:WarningValidationTest non-builtin datatype (datatype value) (__main__.CSVWValidationTestCases) 27 | ---------------------------------------------------------------------- 28 | Traceback (most recent call last): 29 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 64, in test 30 | csvw = CSVW(url=csv_url, metadata_url=metadata_url) 31 | File "/home/sebastian/Repositories/csvw-parser/csvwparser/main.py", line 50, in __init__ 32 | self.metadata = metadata.merge(sources) 33 | File "/home/sebastian/Repositories/csvw-parser/csvwparser/metadata.py", line 1064, in merge 34 | raise ValidationException('validation failed for metadata: ' + str(s)) 35 | ValidationException: validation failed for metadata: {u'url': u'tree-ops.csv', u'rdfs:comment': u'If the value of this property is a string, it MUST be one of the built-in datatypes', u'tableSchema': {u'columns': [{u'datatype': u'foo', u'titles': u'GID'}, {u'titles': u'On Street'}, {u'titles': u'Species'}, {u'titles': u'Trim Cycle'}, {u'titles': u'Inventory Date'}]}, u'rdfs:label': u'non-builtin datatype (datatype value)'} 36 | 37 | ====================================================================== 38 | ERROR: test manifest-validation#test238 csvt:WarningValidationTest datatype value an absolute URL (__main__.CSVWValidationTestCases) 39 | ---------------------------------------------------------------------- 40 | Traceback (most recent call last): 41 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 64, in test 42 | csvw = CSVW(url=csv_url, metadata_url=metadata_url) 43 | File "/home/sebastian/Repositories/csvw-parser/csvwparser/main.py", line 50, in __init__ 44 | self.metadata = metadata.merge(sources) 45 | File "/home/sebastian/Repositories/csvw-parser/csvwparser/metadata.py", line 1064, in merge 46 | raise ValidationException('validation failed for metadata: ' + str(s)) 47 | ValidationException: validation failed for metadata: {u'url': u'test238.csv', u'rdfs:comment': u'If the value of this property is a string, it MUST be one of the built-in datatypes defined in section 5.11.1 Built-in Datatypes or an absolute URL.', u'tableSchema': {u'columns': [{u'datatype': u'http://example.org/datatype', u'titles': u'string'}]}, u'rdfs:label': u'datatype value an absolute URL that does not resolve'} 48 | 49 | ====================================================================== 50 | FAIL: test manifest-validation#test081 csvt:NegativeValidationTest invalid dialect @id (__main__.CSVWValidationTestCases) 51 | ---------------------------------------------------------------------- 52 | Traceback (most recent call last): 53 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 54 | self.assertTrue(TYPES[type]) 55 | AssertionError: False is not true 56 | 57 | ====================================================================== 58 | FAIL: test manifest-validation#test082 csvt:NegativeValidationTest invalid template @id (__main__.CSVWValidationTestCases) 59 | ---------------------------------------------------------------------- 60 | Traceback (most recent call last): 61 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 62 | self.assertTrue(TYPES[type]) 63 | AssertionError: False is not true 64 | 65 | ====================================================================== 66 | FAIL: test manifest-validation#test087 csvt:NegativeValidationTest invalid dialect @type (__main__.CSVWValidationTestCases) 67 | ---------------------------------------------------------------------- 68 | Traceback (most recent call last): 69 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 70 | self.assertTrue(TYPES[type]) 71 | AssertionError: False is not true 72 | 73 | ====================================================================== 74 | FAIL: test manifest-validation#test088 csvt:NegativeValidationTest invalid transformation @type (__main__.CSVWValidationTestCases) 75 | ---------------------------------------------------------------------- 76 | Traceback (most recent call last): 77 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 78 | self.assertTrue(TYPES[type]) 79 | AssertionError: False is not true 80 | 81 | ====================================================================== 82 | FAIL: test manifest-validation#test103 csvt:NegativeValidationTest inconsistent link values: url (__main__.CSVWValidationTestCases) 83 | ---------------------------------------------------------------------- 84 | Traceback (most recent call last): 85 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 86 | self.assertTrue(TYPES[type]) 87 | AssertionError: False is not true 88 | 89 | ====================================================================== 90 | FAIL: test manifest-validation#test104 csvt:NegativeValidationTest invalid columnReference (__main__.CSVWValidationTestCases) 91 | ---------------------------------------------------------------------- 92 | Traceback (most recent call last): 93 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 94 | self.assertTrue(TYPES[type]) 95 | AssertionError: False is not true 96 | 97 | ====================================================================== 98 | FAIL: test manifest-validation#test108 csvt:NegativeValidationTest invalid reference (__main__.CSVWValidationTestCases) 99 | ---------------------------------------------------------------------- 100 | Traceback (most recent call last): 101 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 102 | self.assertTrue(TYPES[type]) 103 | AssertionError: False is not true 104 | 105 | ====================================================================== 106 | FAIL: test manifest-validation#test124 csvt:NegativeValidationTest metadata with columns not matching csv titles (__main__.CSVWValidationTestCases) 107 | ---------------------------------------------------------------------- 108 | Traceback (most recent call last): 109 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 42, in test 110 | self.assertTrue(TYPES[type]) 111 | AssertionError: False is not true 112 | 113 | ====================================================================== 114 | FAIL: test manifest-validation#test125 csvt:NegativeValidationTest required column with empty cell (__main__.CSVWValidationTestCases) 115 | ---------------------------------------------------------------------- 116 | Traceback (most recent call last): 117 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 118 | self.assertTrue(TYPES[type]) 119 | AssertionError: False is not true 120 | 121 | ====================================================================== 122 | FAIL: test manifest-validation#test126 csvt:NegativeValidationTest required column with cell matching null (__main__.CSVWValidationTestCases) 123 | ---------------------------------------------------------------------- 124 | Traceback (most recent call last): 125 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 126 | self.assertTrue(TYPES[type]) 127 | AssertionError: False is not true 128 | 129 | ====================================================================== 130 | FAIL: test manifest-validation#test127 csvt:NegativeValidationTest incompatible table (__main__.CSVWValidationTestCases) 131 | ---------------------------------------------------------------------- 132 | Traceback (most recent call last): 133 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 134 | self.assertTrue(TYPES[type]) 135 | AssertionError: False is not true 136 | 137 | ====================================================================== 138 | FAIL: test manifest-validation#test133 csvt:NegativeValidationTest virtual before non-virtual (__main__.CSVWValidationTestCases) 139 | ---------------------------------------------------------------------- 140 | Traceback (most recent call last): 141 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 142 | self.assertTrue(TYPES[type]) 143 | AssertionError: False is not true 144 | 145 | ====================================================================== 146 | FAIL: test manifest-validation#test134 csvt:NegativeValidationTest context in common property (__main__.CSVWValidationTestCases) 147 | ---------------------------------------------------------------------- 148 | Traceback (most recent call last): 149 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 150 | self.assertTrue(TYPES[type]) 151 | AssertionError: False is not true 152 | 153 | ====================================================================== 154 | FAIL: test manifest-validation#test135 csvt:NegativeValidationTest @list value (__main__.CSVWValidationTestCases) 155 | ---------------------------------------------------------------------- 156 | Traceback (most recent call last): 157 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 158 | self.assertTrue(TYPES[type]) 159 | AssertionError: False is not true 160 | 161 | ====================================================================== 162 | FAIL: test manifest-validation#test136 csvt:NegativeValidationTest @set value (__main__.CSVWValidationTestCases) 163 | ---------------------------------------------------------------------- 164 | Traceback (most recent call last): 165 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 166 | self.assertTrue(TYPES[type]) 167 | AssertionError: False is not true 168 | 169 | ====================================================================== 170 | FAIL: test manifest-validation#test137 csvt:NegativeValidationTest @type out of range (as datatype) (__main__.CSVWValidationTestCases) 171 | ---------------------------------------------------------------------- 172 | Traceback (most recent call last): 173 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 174 | self.assertTrue(TYPES[type]) 175 | AssertionError: False is not true 176 | 177 | ====================================================================== 178 | FAIL: test manifest-validation#test138 csvt:NegativeValidationTest @type out of range (as node type) (__main__.CSVWValidationTestCases) 179 | ---------------------------------------------------------------------- 180 | Traceback (most recent call last): 181 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 182 | self.assertTrue(TYPES[type]) 183 | AssertionError: False is not true 184 | 185 | ====================================================================== 186 | FAIL: test manifest-validation#test139 csvt:NegativeValidationTest @type out of range (as node type) - string (__main__.CSVWValidationTestCases) 187 | ---------------------------------------------------------------------- 188 | Traceback (most recent call last): 189 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 190 | self.assertTrue(TYPES[type]) 191 | AssertionError: False is not true 192 | 193 | ====================================================================== 194 | FAIL: test manifest-validation#test140 csvt:NegativeValidationTest @type out of range (as node type) - integer (__main__.CSVWValidationTestCases) 195 | ---------------------------------------------------------------------- 196 | Traceback (most recent call last): 197 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 198 | self.assertTrue(TYPES[type]) 199 | AssertionError: False is not true 200 | 201 | ====================================================================== 202 | FAIL: test manifest-validation#test141 csvt:NegativeValidationTest @id out of range (as node type) - bnode (__main__.CSVWValidationTestCases) 203 | ---------------------------------------------------------------------- 204 | Traceback (most recent call last): 205 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 206 | self.assertTrue(TYPES[type]) 207 | AssertionError: False is not true 208 | 209 | ====================================================================== 210 | FAIL: test manifest-validation#test142 csvt:NegativeValidationTest @value with @language and @type (__main__.CSVWValidationTestCases) 211 | ---------------------------------------------------------------------- 212 | Traceback (most recent call last): 213 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 214 | self.assertTrue(TYPES[type]) 215 | AssertionError: False is not true 216 | 217 | ====================================================================== 218 | FAIL: test manifest-validation#test143 csvt:NegativeValidationTest @value with extra properties (__main__.CSVWValidationTestCases) 219 | ---------------------------------------------------------------------- 220 | Traceback (most recent call last): 221 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 222 | self.assertTrue(TYPES[type]) 223 | AssertionError: False is not true 224 | 225 | ====================================================================== 226 | FAIL: test manifest-validation#test144 csvt:NegativeValidationTest @language outside of @value (__main__.CSVWValidationTestCases) 227 | ---------------------------------------------------------------------- 228 | Traceback (most recent call last): 229 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 230 | self.assertTrue(TYPES[type]) 231 | AssertionError: False is not true 232 | 233 | ====================================================================== 234 | FAIL: test manifest-validation#test145 csvt:NegativeValidationTest @value with invalid @language (__main__.CSVWValidationTestCases) 235 | ---------------------------------------------------------------------- 236 | Traceback (most recent call last): 237 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 238 | self.assertTrue(TYPES[type]) 239 | AssertionError: False is not true 240 | 241 | ====================================================================== 242 | FAIL: test manifest-validation#test146 csvt:NegativeValidationTest Invalid faux-keyword (__main__.CSVWValidationTestCases) 243 | ---------------------------------------------------------------------- 244 | Traceback (most recent call last): 245 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 246 | self.assertTrue(TYPES[type]) 247 | AssertionError: False is not true 248 | 249 | ====================================================================== 250 | FAIL: test manifest-validation#test147 csvt:NegativeValidationTest title incompatible with title on case (__main__.CSVWValidationTestCases) 251 | ---------------------------------------------------------------------- 252 | Traceback (most recent call last): 253 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 254 | self.assertTrue(TYPES[type]) 255 | AssertionError: False is not true 256 | 257 | ====================================================================== 258 | FAIL: test manifest-validation#test148 csvt:NegativeValidationTest title incompatible with title on language (__main__.CSVWValidationTestCases) 259 | ---------------------------------------------------------------------- 260 | Traceback (most recent call last): 261 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 262 | self.assertTrue(TYPES[type]) 263 | AssertionError: False is not true 264 | 265 | ====================================================================== 266 | FAIL: test manifest-validation#test154 csvt:NegativeValidationTest string format (value not matching format) (__main__.CSVWValidationTestCases) 267 | ---------------------------------------------------------------------- 268 | Traceback (most recent call last): 269 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 270 | self.assertTrue(TYPES[type]) 271 | AssertionError: False is not true 272 | 273 | ====================================================================== 274 | FAIL: test manifest-validation#test157 csvt:NegativeValidationTest number format (value not matching format) (__main__.CSVWValidationTestCases) 275 | ---------------------------------------------------------------------- 276 | Traceback (most recent call last): 277 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 278 | self.assertTrue(TYPES[type]) 279 | AssertionError: False is not true 280 | 281 | ====================================================================== 282 | FAIL: test manifest-validation#test160 csvt:NegativeValidationTest number format (not matching values with pattern) (__main__.CSVWValidationTestCases) 283 | ---------------------------------------------------------------------- 284 | Traceback (most recent call last): 285 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 286 | self.assertTrue(TYPES[type]) 287 | AssertionError: False is not true 288 | 289 | ====================================================================== 290 | FAIL: test manifest-validation#test161 csvt:NegativeValidationTest number format (not matching values without pattern) (__main__.CSVWValidationTestCases) 291 | ---------------------------------------------------------------------- 292 | Traceback (most recent call last): 293 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 294 | self.assertTrue(TYPES[type]) 295 | AssertionError: False is not true 296 | 297 | ====================================================================== 298 | FAIL: test manifest-validation#test162 csvt:NegativeValidationTest numeric format (consecutive groupChar) (__main__.CSVWValidationTestCases) 299 | ---------------------------------------------------------------------- 300 | Traceback (most recent call last): 301 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 302 | self.assertTrue(TYPES[type]) 303 | AssertionError: False is not true 304 | 305 | ====================================================================== 306 | FAIL: test manifest-validation#test163 csvt:NegativeValidationTest integer datatype with decimalChar (__main__.CSVWValidationTestCases) 307 | ---------------------------------------------------------------------- 308 | Traceback (most recent call last): 309 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 310 | self.assertTrue(TYPES[type]) 311 | AssertionError: False is not true 312 | 313 | ====================================================================== 314 | FAIL: test manifest-validation#test164 csvt:NegativeValidationTest decimal datatype with exponent (__main__.CSVWValidationTestCases) 315 | ---------------------------------------------------------------------- 316 | Traceback (most recent call last): 317 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 318 | self.assertTrue(TYPES[type]) 319 | AssertionError: False is not true 320 | 321 | ====================================================================== 322 | FAIL: test manifest-validation#test165 csvt:NegativeValidationTest decimal type with NaN (__main__.CSVWValidationTestCases) 323 | ---------------------------------------------------------------------- 324 | Traceback (most recent call last): 325 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 326 | self.assertTrue(TYPES[type]) 327 | AssertionError: False is not true 328 | 329 | ====================================================================== 330 | FAIL: test manifest-validation#test166 csvt:NegativeValidationTest decimal type with INF (__main__.CSVWValidationTestCases) 331 | ---------------------------------------------------------------------- 332 | Traceback (most recent call last): 333 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 334 | self.assertTrue(TYPES[type]) 335 | AssertionError: False is not true 336 | 337 | ====================================================================== 338 | FAIL: test manifest-validation#test167 csvt:NegativeValidationTest decimal type with -INF (__main__.CSVWValidationTestCases) 339 | ---------------------------------------------------------------------- 340 | Traceback (most recent call last): 341 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 342 | self.assertTrue(TYPES[type]) 343 | AssertionError: False is not true 344 | 345 | ====================================================================== 346 | FAIL: test manifest-validation#test169 csvt:NegativeValidationTest invalid decimal (__main__.CSVWValidationTestCases) 347 | ---------------------------------------------------------------------- 348 | Traceback (most recent call last): 349 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 350 | self.assertTrue(TYPES[type]) 351 | AssertionError: False is not true 352 | 353 | ====================================================================== 354 | FAIL: test manifest-validation#test172 csvt:NegativeValidationTest invalid byte (__main__.CSVWValidationTestCases) 355 | ---------------------------------------------------------------------- 356 | Traceback (most recent call last): 357 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 358 | self.assertTrue(TYPES[type]) 359 | AssertionError: False is not true 360 | 361 | ====================================================================== 362 | FAIL: test manifest-validation#test173 csvt:NegativeValidationTest invald unsignedLong (__main__.CSVWValidationTestCases) 363 | ---------------------------------------------------------------------- 364 | Traceback (most recent call last): 365 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 366 | self.assertTrue(TYPES[type]) 367 | AssertionError: False is not true 368 | 369 | ====================================================================== 370 | FAIL: test manifest-validation#test174 csvt:NegativeValidationTest invalid unsignedShort (__main__.CSVWValidationTestCases) 371 | ---------------------------------------------------------------------- 372 | Traceback (most recent call last): 373 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 374 | self.assertTrue(TYPES[type]) 375 | AssertionError: False is not true 376 | 377 | ====================================================================== 378 | FAIL: test manifest-validation#test175 csvt:NegativeValidationTest invalid unsignedByte (__main__.CSVWValidationTestCases) 379 | ---------------------------------------------------------------------- 380 | Traceback (most recent call last): 381 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 382 | self.assertTrue(TYPES[type]) 383 | AssertionError: False is not true 384 | 385 | ====================================================================== 386 | FAIL: test manifest-validation#test176 csvt:NegativeValidationTest invalid positiveInteger (__main__.CSVWValidationTestCases) 387 | ---------------------------------------------------------------------- 388 | Traceback (most recent call last): 389 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 390 | self.assertTrue(TYPES[type]) 391 | AssertionError: False is not true 392 | 393 | ====================================================================== 394 | FAIL: test manifest-validation#test177 csvt:NegativeValidationTest invalid negativeInteger (__main__.CSVWValidationTestCases) 395 | ---------------------------------------------------------------------- 396 | Traceback (most recent call last): 397 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 398 | self.assertTrue(TYPES[type]) 399 | AssertionError: False is not true 400 | 401 | ====================================================================== 402 | FAIL: test manifest-validation#test178 csvt:NegativeValidationTest invalid nonPositiveInteger (__main__.CSVWValidationTestCases) 403 | ---------------------------------------------------------------------- 404 | Traceback (most recent call last): 405 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 406 | self.assertTrue(TYPES[type]) 407 | AssertionError: False is not true 408 | 409 | ====================================================================== 410 | FAIL: test manifest-validation#test179 csvt:NegativeValidationTest invalid nonNegativeInteger (__main__.CSVWValidationTestCases) 411 | ---------------------------------------------------------------------- 412 | Traceback (most recent call last): 413 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 414 | self.assertTrue(TYPES[type]) 415 | AssertionError: False is not true 416 | 417 | ====================================================================== 418 | FAIL: test manifest-validation#test180 csvt:NegativeValidationTest invalid double (__main__.CSVWValidationTestCases) 419 | ---------------------------------------------------------------------- 420 | Traceback (most recent call last): 421 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 422 | self.assertTrue(TYPES[type]) 423 | AssertionError: False is not true 424 | 425 | ====================================================================== 426 | FAIL: test manifest-validation#test181 csvt:NegativeValidationTest invalid number (__main__.CSVWValidationTestCases) 427 | ---------------------------------------------------------------------- 428 | Traceback (most recent call last): 429 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 430 | self.assertTrue(TYPES[type]) 431 | AssertionError: False is not true 432 | 433 | ====================================================================== 434 | FAIL: test manifest-validation#test182 csvt:NegativeValidationTest invalid float (__main__.CSVWValidationTestCases) 435 | ---------------------------------------------------------------------- 436 | Traceback (most recent call last): 437 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 438 | self.assertTrue(TYPES[type]) 439 | AssertionError: False is not true 440 | 441 | ====================================================================== 442 | FAIL: test manifest-validation#test185 csvt:NegativeValidationTest boolean format (value not matching format) (__main__.CSVWValidationTestCases) 443 | ---------------------------------------------------------------------- 444 | Traceback (most recent call last): 445 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 446 | self.assertTrue(TYPES[type]) 447 | AssertionError: False is not true 448 | 449 | ====================================================================== 450 | FAIL: test manifest-validation#test186 csvt:NegativeValidationTest boolean format (not matching datatype) (__main__.CSVWValidationTestCases) 451 | ---------------------------------------------------------------------- 452 | Traceback (most recent call last): 453 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 454 | self.assertTrue(TYPES[type]) 455 | AssertionError: False is not true 456 | 457 | ====================================================================== 458 | FAIL: test manifest-validation#test191 csvt:NegativeValidationTest date format (bad format string) (__main__.CSVWValidationTestCases) 459 | ---------------------------------------------------------------------- 460 | Traceback (most recent call last): 461 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 462 | self.assertTrue(TYPES[type]) 463 | AssertionError: False is not true 464 | 465 | ====================================================================== 466 | FAIL: test manifest-validation#test192 csvt:NegativeValidationTest date format (value not matching format) (__main__.CSVWValidationTestCases) 467 | ---------------------------------------------------------------------- 468 | Traceback (most recent call last): 469 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 470 | self.assertTrue(TYPES[type]) 471 | AssertionError: False is not true 472 | 473 | ====================================================================== 474 | FAIL: test manifest-validation#test194 csvt:NegativeValidationTest duration format (value not matching format) (__main__.CSVWValidationTestCases) 475 | ---------------------------------------------------------------------- 476 | Traceback (most recent call last): 477 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 478 | self.assertTrue(TYPES[type]) 479 | AssertionError: False is not true 480 | 481 | ====================================================================== 482 | FAIL: test manifest-validation#test196 csvt:NegativeValidationTest values with wrong length (__main__.CSVWValidationTestCases) 483 | ---------------------------------------------------------------------- 484 | Traceback (most recent call last): 485 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 486 | self.assertTrue(TYPES[type]) 487 | AssertionError: False is not true 488 | 489 | ====================================================================== 490 | FAIL: test manifest-validation#test197 csvt:NegativeValidationTest values with wrong maxLength (__main__.CSVWValidationTestCases) 491 | ---------------------------------------------------------------------- 492 | Traceback (most recent call last): 493 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 494 | self.assertTrue(TYPES[type]) 495 | AssertionError: False is not true 496 | 497 | ====================================================================== 498 | FAIL: test manifest-validation#test198 csvt:NegativeValidationTest values with wrong minLength (__main__.CSVWValidationTestCases) 499 | ---------------------------------------------------------------------- 500 | Traceback (most recent call last): 501 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 502 | self.assertTrue(TYPES[type]) 503 | AssertionError: False is not true 504 | 505 | ====================================================================== 506 | FAIL: test manifest-validation#test199 csvt:NegativeValidationTest length < minLength (__main__.CSVWValidationTestCases) 507 | ---------------------------------------------------------------------- 508 | Traceback (most recent call last): 509 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 510 | self.assertTrue(TYPES[type]) 511 | AssertionError: False is not true 512 | 513 | ====================================================================== 514 | FAIL: test manifest-validation#test200 csvt:NegativeValidationTest length > maxLength (__main__.CSVWValidationTestCases) 515 | ---------------------------------------------------------------------- 516 | Traceback (most recent call last): 517 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 518 | self.assertTrue(TYPES[type]) 519 | AssertionError: False is not true 520 | 521 | ====================================================================== 522 | FAIL: test manifest-validation#test201 csvt:NegativeValidationTest length on date (__main__.CSVWValidationTestCases) 523 | ---------------------------------------------------------------------- 524 | Traceback (most recent call last): 525 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 526 | self.assertTrue(TYPES[type]) 527 | AssertionError: False is not true 528 | 529 | ====================================================================== 530 | FAIL: test manifest-validation#test203 csvt:NegativeValidationTest float value constraint not matching minimum (__main__.CSVWValidationTestCases) 531 | ---------------------------------------------------------------------- 532 | Traceback (most recent call last): 533 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 534 | self.assertTrue(TYPES[type]) 535 | AssertionError: False is not true 536 | 537 | ====================================================================== 538 | FAIL: test manifest-validation#test204 csvt:NegativeValidationTest float value constraint not matching maximum (__main__.CSVWValidationTestCases) 539 | ---------------------------------------------------------------------- 540 | Traceback (most recent call last): 541 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 542 | self.assertTrue(TYPES[type]) 543 | AssertionError: False is not true 544 | 545 | ====================================================================== 546 | FAIL: test manifest-validation#test205 csvt:NegativeValidationTest float value constraint not matching minInclusive (__main__.CSVWValidationTestCases) 547 | ---------------------------------------------------------------------- 548 | Traceback (most recent call last): 549 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 550 | self.assertTrue(TYPES[type]) 551 | AssertionError: False is not true 552 | 553 | ====================================================================== 554 | FAIL: test manifest-validation#test206 csvt:NegativeValidationTest float value constraint not matching minExclusive (__main__.CSVWValidationTestCases) 555 | ---------------------------------------------------------------------- 556 | Traceback (most recent call last): 557 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 558 | self.assertTrue(TYPES[type]) 559 | AssertionError: False is not true 560 | 561 | ====================================================================== 562 | FAIL: test manifest-validation#test207 csvt:NegativeValidationTest float value constraint not matching maxInclusive (__main__.CSVWValidationTestCases) 563 | ---------------------------------------------------------------------- 564 | Traceback (most recent call last): 565 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 566 | self.assertTrue(TYPES[type]) 567 | AssertionError: False is not true 568 | 569 | ====================================================================== 570 | FAIL: test manifest-validation#test208 csvt:NegativeValidationTest float value constraint not matching maxExclusive (__main__.CSVWValidationTestCases) 571 | ---------------------------------------------------------------------- 572 | Traceback (most recent call last): 573 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 574 | self.assertTrue(TYPES[type]) 575 | AssertionError: False is not true 576 | 577 | ====================================================================== 578 | FAIL: test manifest-validation#test210 csvt:NegativeValidationTest date value constraint not matching minimum (__main__.CSVWValidationTestCases) 579 | ---------------------------------------------------------------------- 580 | Traceback (most recent call last): 581 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 582 | self.assertTrue(TYPES[type]) 583 | AssertionError: False is not true 584 | 585 | ====================================================================== 586 | FAIL: test manifest-validation#test211 csvt:NegativeValidationTest date value constraint not matching maximum (__main__.CSVWValidationTestCases) 587 | ---------------------------------------------------------------------- 588 | Traceback (most recent call last): 589 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 590 | self.assertTrue(TYPES[type]) 591 | AssertionError: False is not true 592 | 593 | ====================================================================== 594 | FAIL: test manifest-validation#test212 csvt:NegativeValidationTest date value constraint not matching minInclusive (__main__.CSVWValidationTestCases) 595 | ---------------------------------------------------------------------- 596 | Traceback (most recent call last): 597 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 598 | self.assertTrue(TYPES[type]) 599 | AssertionError: False is not true 600 | 601 | ====================================================================== 602 | FAIL: test manifest-validation#test213 csvt:NegativeValidationTest date value constraint not matching minExclusive (__main__.CSVWValidationTestCases) 603 | ---------------------------------------------------------------------- 604 | Traceback (most recent call last): 605 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 606 | self.assertTrue(TYPES[type]) 607 | AssertionError: False is not true 608 | 609 | ====================================================================== 610 | FAIL: test manifest-validation#test214 csvt:NegativeValidationTest date value constraint not matching maxInclusive (__main__.CSVWValidationTestCases) 611 | ---------------------------------------------------------------------- 612 | Traceback (most recent call last): 613 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 614 | self.assertTrue(TYPES[type]) 615 | AssertionError: False is not true 616 | 617 | ====================================================================== 618 | FAIL: test manifest-validation#test215 csvt:NegativeValidationTest date value constraint not matching maxExclusive (__main__.CSVWValidationTestCases) 619 | ---------------------------------------------------------------------- 620 | Traceback (most recent call last): 621 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 622 | self.assertTrue(TYPES[type]) 623 | AssertionError: False is not true 624 | 625 | ====================================================================== 626 | FAIL: test manifest-validation#test216 csvt:NegativeValidationTest minInclusive and minExclusive (__main__.CSVWValidationTestCases) 627 | ---------------------------------------------------------------------- 628 | Traceback (most recent call last): 629 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 630 | self.assertTrue(TYPES[type]) 631 | AssertionError: False is not true 632 | 633 | ====================================================================== 634 | FAIL: test manifest-validation#test217 csvt:NegativeValidationTest maxInclusive and maxExclusive (__main__.CSVWValidationTestCases) 635 | ---------------------------------------------------------------------- 636 | Traceback (most recent call last): 637 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 638 | self.assertTrue(TYPES[type]) 639 | AssertionError: False is not true 640 | 641 | ====================================================================== 642 | FAIL: test manifest-validation#test218 csvt:NegativeValidationTest maxInclusive < minInclusive (__main__.CSVWValidationTestCases) 643 | ---------------------------------------------------------------------- 644 | Traceback (most recent call last): 645 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 646 | self.assertTrue(TYPES[type]) 647 | AssertionError: False is not true 648 | 649 | ====================================================================== 650 | FAIL: test manifest-validation#test219 csvt:NegativeValidationTest maxExclusive = minInclusive (__main__.CSVWValidationTestCases) 651 | ---------------------------------------------------------------------- 652 | Traceback (most recent call last): 653 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 654 | self.assertTrue(TYPES[type]) 655 | AssertionError: False is not true 656 | 657 | ====================================================================== 658 | FAIL: test manifest-validation#test220 csvt:NegativeValidationTest maxExclusive < minExclusive (__main__.CSVWValidationTestCases) 659 | ---------------------------------------------------------------------- 660 | Traceback (most recent call last): 661 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 662 | self.assertTrue(TYPES[type]) 663 | AssertionError: False is not true 664 | 665 | ====================================================================== 666 | FAIL: test manifest-validation#test221 csvt:NegativeValidationTest maxInclusive = minExclusive (__main__.CSVWValidationTestCases) 667 | ---------------------------------------------------------------------- 668 | Traceback (most recent call last): 669 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 670 | self.assertTrue(TYPES[type]) 671 | AssertionError: False is not true 672 | 673 | ====================================================================== 674 | FAIL: test manifest-validation#test222 csvt:NegativeValidationTest string datatype with minimum (__main__.CSVWValidationTestCases) 675 | ---------------------------------------------------------------------- 676 | Traceback (most recent call last): 677 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 678 | self.assertTrue(TYPES[type]) 679 | AssertionError: False is not true 680 | 681 | ====================================================================== 682 | FAIL: test manifest-validation#test223 csvt:NegativeValidationTest string datatype with maxium (__main__.CSVWValidationTestCases) 683 | ---------------------------------------------------------------------- 684 | Traceback (most recent call last): 685 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 686 | self.assertTrue(TYPES[type]) 687 | AssertionError: False is not true 688 | 689 | ====================================================================== 690 | FAIL: test manifest-validation#test224 csvt:NegativeValidationTest string datatype with minInclusive (__main__.CSVWValidationTestCases) 691 | ---------------------------------------------------------------------- 692 | Traceback (most recent call last): 693 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 694 | self.assertTrue(TYPES[type]) 695 | AssertionError: False is not true 696 | 697 | ====================================================================== 698 | FAIL: test manifest-validation#test225 csvt:NegativeValidationTest string datatype with maxInclusive (__main__.CSVWValidationTestCases) 699 | ---------------------------------------------------------------------- 700 | Traceback (most recent call last): 701 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 702 | self.assertTrue(TYPES[type]) 703 | AssertionError: False is not true 704 | 705 | ====================================================================== 706 | FAIL: test manifest-validation#test226 csvt:NegativeValidationTest string datatype with minExclusive (__main__.CSVWValidationTestCases) 707 | ---------------------------------------------------------------------- 708 | Traceback (most recent call last): 709 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 710 | self.assertTrue(TYPES[type]) 711 | AssertionError: False is not true 712 | 713 | ====================================================================== 714 | FAIL: test manifest-validation#test227 csvt:NegativeValidationTest string datatype with maxExclusive (__main__.CSVWValidationTestCases) 715 | ---------------------------------------------------------------------- 716 | Traceback (most recent call last): 717 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 718 | self.assertTrue(TYPES[type]) 719 | AssertionError: False is not true 720 | 721 | ====================================================================== 722 | FAIL: test manifest-validation#test230 csvt:NegativeValidationTest failing minLength with separator (__main__.CSVWValidationTestCases) 723 | ---------------------------------------------------------------------- 724 | Traceback (most recent call last): 725 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 726 | self.assertTrue(TYPES[type]) 727 | AssertionError: False is not true 728 | 729 | ====================================================================== 730 | FAIL: test manifest-validation#test232 csvt:NegativeValidationTest single column primaryKey violation (__main__.CSVWValidationTestCases) 731 | ---------------------------------------------------------------------- 732 | Traceback (most recent call last): 733 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 734 | self.assertTrue(TYPES[type]) 735 | AssertionError: False is not true 736 | 737 | ====================================================================== 738 | FAIL: test manifest-validation#test234 csvt:NegativeValidationTest multiple column primaryKey violation (__main__.CSVWValidationTestCases) 739 | ---------------------------------------------------------------------- 740 | Traceback (most recent call last): 741 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 742 | self.assertTrue(TYPES[type]) 743 | AssertionError: False is not true 744 | 745 | ====================================================================== 746 | FAIL: test manifest-validation#test243 csvt:NegativeValidationTest invalid datatype @id (__main__.CSVWValidationTestCases) 747 | ---------------------------------------------------------------------- 748 | Traceback (most recent call last): 749 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 750 | self.assertTrue(TYPES[type]) 751 | AssertionError: False is not true 752 | 753 | ====================================================================== 754 | FAIL: test manifest-validation#test244 csvt:NegativeValidationTest invalid datatype @id (__main__.CSVWValidationTestCases) 755 | ---------------------------------------------------------------------- 756 | Traceback (most recent call last): 757 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 758 | self.assertTrue(TYPES[type]) 759 | AssertionError: False is not true 760 | 761 | ====================================================================== 762 | FAIL: test manifest-validation#test247 csvt:NegativeValidationTest date format (extra milliseconds) (__main__.CSVWValidationTestCases) 763 | ---------------------------------------------------------------------- 764 | Traceback (most recent call last): 765 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 766 | self.assertTrue(TYPES[type]) 767 | AssertionError: False is not true 768 | 769 | ====================================================================== 770 | FAIL: test manifest-validation#test251 csvt:NegativeValidationTest missing source reference (__main__.CSVWValidationTestCases) 771 | ---------------------------------------------------------------------- 772 | Traceback (most recent call last): 773 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 774 | self.assertTrue(TYPES[type]) 775 | AssertionError: False is not true 776 | 777 | ====================================================================== 778 | FAIL: test manifest-validation#test252 csvt:NegativeValidationTest missing destination reference column (__main__.CSVWValidationTestCases) 779 | ---------------------------------------------------------------------- 780 | Traceback (most recent call last): 781 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 782 | self.assertTrue(TYPES[type]) 783 | AssertionError: False is not true 784 | 785 | ====================================================================== 786 | FAIL: test manifest-validation#test253 csvt:NegativeValidationTest missing destination table (__main__.CSVWValidationTestCases) 787 | ---------------------------------------------------------------------- 788 | Traceback (most recent call last): 789 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 790 | self.assertTrue(TYPES[type]) 791 | AssertionError: False is not true 792 | 793 | ====================================================================== 794 | FAIL: test manifest-validation#test257 csvt:NegativeValidationTest foreign key no referenced row (__main__.CSVWValidationTestCases) 795 | ---------------------------------------------------------------------- 796 | Traceback (most recent call last): 797 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 798 | self.assertTrue(TYPES[type]) 799 | AssertionError: False is not true 800 | 801 | ====================================================================== 802 | FAIL: test manifest-validation#test258 csvt:NegativeValidationTest foreign key multiple referenced rows (__main__.CSVWValidationTestCases) 803 | ---------------------------------------------------------------------- 804 | Traceback (most recent call last): 805 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 806 | self.assertTrue(TYPES[type]) 807 | AssertionError: False is not true 808 | 809 | ====================================================================== 810 | FAIL: test manifest-validation#test261 csvt:NegativeValidationTest maxLength < minLength (__main__.CSVWValidationTestCases) 811 | ---------------------------------------------------------------------- 812 | Traceback (most recent call last): 813 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 814 | self.assertTrue(TYPES[type]) 815 | AssertionError: False is not true 816 | 817 | ====================================================================== 818 | FAIL: test manifest-validation#test267 csvt:NegativeValidationTest @id on datatype is invalid (eg starts with _:) (__main__.CSVWValidationTestCases) 819 | ---------------------------------------------------------------------- 820 | Traceback (most recent call last): 821 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 822 | self.assertTrue(TYPES[type]) 823 | AssertionError: False is not true 824 | 825 | ====================================================================== 826 | FAIL: test manifest-validation#test269 csvt:NegativeValidationTest `format` for a boolean datatype is a string but in the wrong form (eg YN) (__main__.CSVWValidationTestCases) 827 | ---------------------------------------------------------------------- 828 | Traceback (most recent call last): 829 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 830 | self.assertTrue(TYPES[type]) 831 | AssertionError: False is not true 832 | 833 | ====================================================================== 834 | FAIL: test manifest-validation#test271 csvt:NegativeValidationTest foreign key includes an invalid property (eg `dc:description`) (__main__.CSVWValidationTestCases) 835 | ---------------------------------------------------------------------- 836 | Traceback (most recent call last): 837 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 838 | self.assertTrue(TYPES[type]) 839 | AssertionError: False is not true 840 | 841 | ====================================================================== 842 | FAIL: test manifest-validation#test272 csvt:NegativeValidationTest foreign key reference includes an invalid property (eg `dc:description`) (__main__.CSVWValidationTestCases) 843 | ---------------------------------------------------------------------- 844 | Traceback (most recent call last): 845 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 846 | self.assertTrue(TYPES[type]) 847 | AssertionError: False is not true 848 | 849 | ====================================================================== 850 | FAIL: test manifest-validation#test278 csvt:NegativeValidationTest CSV has more headers than there are columns in the metadata (__main__.CSVWValidationTestCases) 851 | ---------------------------------------------------------------------- 852 | Traceback (most recent call last): 853 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 854 | self.assertTrue(TYPES[type]) 855 | AssertionError: False is not true 856 | 857 | ====================================================================== 858 | FAIL: test manifest-validation#test279 csvt:NegativeValidationTest duration not matching xsd pattern (__main__.CSVWValidationTestCases) 859 | ---------------------------------------------------------------------- 860 | Traceback (most recent call last): 861 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 862 | self.assertTrue(TYPES[type]) 863 | AssertionError: False is not true 864 | 865 | ====================================================================== 866 | FAIL: test manifest-validation#test280 csvt:NegativeValidationTest dayTimeDuration not matching xsd pattern (__main__.CSVWValidationTestCases) 867 | ---------------------------------------------------------------------- 868 | Traceback (most recent call last): 869 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 870 | self.assertTrue(TYPES[type]) 871 | AssertionError: False is not true 872 | 873 | ====================================================================== 874 | FAIL: test manifest-validation#test281 csvt:NegativeValidationTest yearMonthDuration not matching xsd pattern (__main__.CSVWValidationTestCases) 875 | ---------------------------------------------------------------------- 876 | Traceback (most recent call last): 877 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 878 | self.assertTrue(TYPES[type]) 879 | AssertionError: False is not true 880 | 881 | ====================================================================== 882 | FAIL: test manifest-validation#test286 csvt:NegativeValidationTest invalid ##0 1,234 (__main__.CSVWValidationTestCases) 883 | ---------------------------------------------------------------------- 884 | Traceback (most recent call last): 885 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 886 | self.assertTrue(TYPES[type]) 887 | AssertionError: False is not true 888 | 889 | ====================================================================== 890 | FAIL: test manifest-validation#test287 csvt:NegativeValidationTest invalid ##0 123.4 (__main__.CSVWValidationTestCases) 891 | ---------------------------------------------------------------------- 892 | Traceback (most recent call last): 893 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 894 | self.assertTrue(TYPES[type]) 895 | AssertionError: False is not true 896 | 897 | ====================================================================== 898 | FAIL: test manifest-validation#test288 csvt:NegativeValidationTest invalid #,#00 1 (__main__.CSVWValidationTestCases) 899 | ---------------------------------------------------------------------- 900 | Traceback (most recent call last): 901 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 902 | self.assertTrue(TYPES[type]) 903 | AssertionError: False is not true 904 | 905 | ====================================================================== 906 | FAIL: test manifest-validation#test289 csvt:NegativeValidationTest invalid #,#00 1234 (__main__.CSVWValidationTestCases) 907 | ---------------------------------------------------------------------- 908 | Traceback (most recent call last): 909 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 910 | self.assertTrue(TYPES[type]) 911 | AssertionError: False is not true 912 | 913 | ====================================================================== 914 | FAIL: test manifest-validation#test290 csvt:NegativeValidationTest invalid #,#00 12,34 (__main__.CSVWValidationTestCases) 915 | ---------------------------------------------------------------------- 916 | Traceback (most recent call last): 917 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 918 | self.assertTrue(TYPES[type]) 919 | AssertionError: False is not true 920 | 921 | ====================================================================== 922 | FAIL: test manifest-validation#test291 csvt:NegativeValidationTest invalid #,#00 12,34,567 (__main__.CSVWValidationTestCases) 923 | ---------------------------------------------------------------------- 924 | Traceback (most recent call last): 925 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 926 | self.assertTrue(TYPES[type]) 927 | AssertionError: False is not true 928 | 929 | ====================================================================== 930 | FAIL: test manifest-validation#test292 csvt:NegativeValidationTest invalid #,##,#00 1 (__main__.CSVWValidationTestCases) 931 | ---------------------------------------------------------------------- 932 | Traceback (most recent call last): 933 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 934 | self.assertTrue(TYPES[type]) 935 | AssertionError: False is not true 936 | 937 | ====================================================================== 938 | FAIL: test manifest-validation#test293 csvt:NegativeValidationTest invalid #,##,#00 1234 (__main__.CSVWValidationTestCases) 939 | ---------------------------------------------------------------------- 940 | Traceback (most recent call last): 941 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 942 | self.assertTrue(TYPES[type]) 943 | AssertionError: False is not true 944 | 945 | ====================================================================== 946 | FAIL: test manifest-validation#test294 csvt:NegativeValidationTest invalid #,##,#00 12,34 (__main__.CSVWValidationTestCases) 947 | ---------------------------------------------------------------------- 948 | Traceback (most recent call last): 949 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 950 | self.assertTrue(TYPES[type]) 951 | AssertionError: False is not true 952 | 953 | ====================================================================== 954 | FAIL: test manifest-validation#test295 csvt:NegativeValidationTest invalid #,##,#00 1,234,567 (__main__.CSVWValidationTestCases) 955 | ---------------------------------------------------------------------- 956 | Traceback (most recent call last): 957 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 958 | self.assertTrue(TYPES[type]) 959 | AssertionError: False is not true 960 | 961 | ====================================================================== 962 | FAIL: test manifest-validation#test296 csvt:NegativeValidationTest invalid #0.# 12.34 (__main__.CSVWValidationTestCases) 963 | ---------------------------------------------------------------------- 964 | Traceback (most recent call last): 965 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 966 | self.assertTrue(TYPES[type]) 967 | AssertionError: False is not true 968 | 969 | ====================================================================== 970 | FAIL: test manifest-validation#test297 csvt:NegativeValidationTest invalid #0.# 1,234.5 (__main__.CSVWValidationTestCases) 971 | ---------------------------------------------------------------------- 972 | Traceback (most recent call last): 973 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 974 | self.assertTrue(TYPES[type]) 975 | AssertionError: False is not true 976 | 977 | ====================================================================== 978 | FAIL: test manifest-validation#test298 csvt:NegativeValidationTest invalid #0.0 1 (__main__.CSVWValidationTestCases) 979 | ---------------------------------------------------------------------- 980 | Traceback (most recent call last): 981 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 982 | self.assertTrue(TYPES[type]) 983 | AssertionError: False is not true 984 | 985 | ====================================================================== 986 | FAIL: test manifest-validation#test299 csvt:NegativeValidationTest invalid #0.0 12.34 (__main__.CSVWValidationTestCases) 987 | ---------------------------------------------------------------------- 988 | Traceback (most recent call last): 989 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 990 | self.assertTrue(TYPES[type]) 991 | AssertionError: False is not true 992 | 993 | ====================================================================== 994 | FAIL: test manifest-validation#test300 csvt:NegativeValidationTest invalid #0.0# 1 (__main__.CSVWValidationTestCases) 995 | ---------------------------------------------------------------------- 996 | Traceback (most recent call last): 997 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 998 | self.assertTrue(TYPES[type]) 999 | AssertionError: False is not true 1000 | 1001 | ====================================================================== 1002 | FAIL: test manifest-validation#test301 csvt:NegativeValidationTest invalid #0.0# 12.345 (__main__.CSVWValidationTestCases) 1003 | ---------------------------------------------------------------------- 1004 | Traceback (most recent call last): 1005 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 1006 | self.assertTrue(TYPES[type]) 1007 | AssertionError: False is not true 1008 | 1009 | ====================================================================== 1010 | FAIL: test manifest-validation#test302 csvt:NegativeValidationTest invalid #0.0#,# 1 (__main__.CSVWValidationTestCases) 1011 | ---------------------------------------------------------------------- 1012 | Traceback (most recent call last): 1013 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 1014 | self.assertTrue(TYPES[type]) 1015 | AssertionError: False is not true 1016 | 1017 | ====================================================================== 1018 | FAIL: test manifest-validation#test303 csvt:NegativeValidationTest invalid #0.0#,# 12.345 (__main__.CSVWValidationTestCases) 1019 | ---------------------------------------------------------------------- 1020 | Traceback (most recent call last): 1021 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 1022 | self.assertTrue(TYPES[type]) 1023 | AssertionError: False is not true 1024 | 1025 | ====================================================================== 1026 | FAIL: test manifest-validation#test304 csvt:NegativeValidationTest invalid #0.0#,# 12.34,567 (__main__.CSVWValidationTestCases) 1027 | ---------------------------------------------------------------------- 1028 | Traceback (most recent call last): 1029 | File "/home/sebastian/Repositories/csvw-parser/test/csvw_validation_test_cases.py", line 71, in test 1030 | self.assertTrue(TYPES[type]) 1031 | AssertionError: False is not true 1032 | 1033 | ---------------------------------------------------------------------- 1034 | Ran 276 tests in 139.484s 1035 | 1036 | FAILED (failures=123, errors=4) 1037 | -------------------------------------------------------------------------------- /test/metadata_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 4 | os.sys.path.insert(0,parentdir) 5 | import pprint 6 | from pycsvw import metadata 7 | from pycsvw.metadata import Model 8 | 9 | __author__ = 'sebastian' 10 | 11 | import unittest 12 | 13 | 14 | class DanBrickleyCase(unittest.TestCase): 15 | @unittest.skip("@context appears to be not up-to-date") 16 | def test_dan_brickley(self): 17 | testfile = 'csvwparser/testdata/csvw-template/example.csv' 18 | metafile = 'csvwparser/testdata/csvw-template/example.csv-metadata.json' 19 | csvw = CSVW(path=testfile, metadata_path=metafile) 20 | self.assertNotEqual(csvw, None) 21 | self.assertNotEqual(csvw.metadata, None) 22 | title = csvw.metadata['dc:title'] 23 | self.assertEqual(title, "My Spreadsheet") 24 | # TODO write tests 25 | 26 | def test_positive_context(self): 27 | A = { 28 | "@context": [ "http://www.w3.org/ns/csvw", { "@language": "en" } ], 29 | "tables": [{ 30 | "url": "http://example.org/countries.csv", 31 | "tableSchema": { 32 | "columns": [{ 33 | "name": "countryCode", 34 | "datatype": "string", 35 | "propertyUrl": "http://www.geonames.org/ontology{#_name}" 36 | }, { 37 | "name": "latitude", 38 | "datatype": "number" 39 | }, { 40 | "name": "longitude", 41 | "datatype": "number" 42 | }, { 43 | "name": "name", 44 | "datatype": "string" 45 | }], 46 | "aboutUrl": "http://example.org/countries.csv{#countryCode}", 47 | "propertyUrl": "http://schema.org/{_name}", 48 | "primaryKey": "countryCode" 49 | } 50 | }] 51 | } 52 | result = metadata.validate(A) 53 | self.assertTrue(isinstance(result, Model)) 54 | # context is string only 55 | A = { 56 | "@context": "http://www.w3.org/ns/csvw", 57 | "tables": [{ 58 | "url": "http://example.org/countries.csv", 59 | "tableSchema": { 60 | "columns": [{ 61 | "name": "countryCode", 62 | "datatype": "string", 63 | "propertyUrl": "http://www.geonames.org/ontology{#_name}" 64 | }, { 65 | "name": "latitude", 66 | "datatype": "number" 67 | }, { 68 | "name": "longitude", 69 | "datatype": "number" 70 | }, { 71 | "name": "name", 72 | "datatype": "string" 73 | }], 74 | "aboutUrl": "http://example.org/countries.csv{#countryCode}", 75 | "propertyUrl": "http://schema.org/{_name}", 76 | "primaryKey": "countryCode" 77 | } 78 | }] 79 | } 80 | result = metadata.validate(A) 81 | self.assertTrue(isinstance(result, Model)) 82 | 83 | def test_negative_context(self): 84 | # context is missing 85 | A = { 86 | "tables": [{ 87 | "url": "http://example.org/countries.csv", 88 | "tableSchema": { 89 | "columns": [{ 90 | "name": "countryCode", 91 | "datatype": "string", 92 | "propertyUrl": "http://www.geonames.org/ontology{#_name}" 93 | }, { 94 | "name": "latitude", 95 | "datatype": "number" 96 | }, { 97 | "name": "longitude", 98 | "datatype": "number" 99 | }, { 100 | "name": "name", 101 | "datatype": "string" 102 | }], 103 | "aboutUrl": "http://example.org/countries.csv{#countryCode}", 104 | "propertyUrl": "http://schema.org/{_name}", 105 | "primaryKey": "countryCode" 106 | } 107 | }] 108 | } 109 | result = metadata.validate(A) 110 | self.assertFalse(result) 111 | # wrong context 112 | A = { 113 | "@context": [ "http://www.w3.org/ns/csvw", { "somethingwrong": "en" } ], 114 | "tables": [{ 115 | "url": "http://example.org/countries.csv", 116 | "tableSchema": { 117 | "columns": [{ 118 | "name": "countryCode", 119 | "datatype": "string", 120 | "propertyUrl": "http://www.geonames.org/ontology{#_name}" 121 | }, { 122 | "name": "latitude", 123 | "datatype": "number" 124 | }, { 125 | "name": "longitude", 126 | "datatype": "number" 127 | }, { 128 | "name": "name", 129 | "datatype": "string" 130 | }], 131 | "aboutUrl": "http://example.org/countries.csv{#countryCode}", 132 | "propertyUrl": "http://schema.org/{_name}", 133 | "primaryKey": "countryCode" 134 | } 135 | }] 136 | } 137 | result = metadata.validate(A) 138 | self.assertFalse(result) 139 | # wrong context 140 | A = { 141 | "@context": "http://www.w3.org/ns/csv", 142 | "tables": [{ 143 | "url": "http://example.org/countries.csv", 144 | "tableSchema": { 145 | "columns": [{ 146 | "name": "countryCode", 147 | "datatype": "string", 148 | "propertyUrl": "http://www.geonames.org/ontology{#_name}" 149 | }, { 150 | "name": "latitude", 151 | "datatype": "number" 152 | }, { 153 | "name": "longitude", 154 | "datatype": "number" 155 | }, { 156 | "name": "name", 157 | "datatype": "string" 158 | }], 159 | "aboutUrl": "http://example.org/countries.csv{#countryCode}", 160 | "propertyUrl": "http://schema.org/{_name}", 161 | "primaryKey": "countryCode" 162 | } 163 | }] 164 | } 165 | result = metadata.validate(A) 166 | self.assertFalse(result) 167 | 168 | def test_negative_validate(self): 169 | # url is missing 170 | A = { 171 | "@context": [ "http://www.w3.org/ns/csvw", { "@language": "en" } ], 172 | "tables": [{ 173 | # "url": "http://example.org/countries.csv", 174 | "tableSchema": { 175 | "columns": [{ 176 | "name": "countryCode", 177 | "datatype": "string", 178 | "propertyUrl": "http://www.geonames.org/ontology{#_name}" 179 | }, { 180 | "name": "latitude", 181 | "datatype": "number" 182 | }, { 183 | "name": "longitude", 184 | "datatype": "number" 185 | }, { 186 | "name": "name", 187 | "datatype": "string" 188 | }], 189 | "aboutUrl": "http://example.org/countries.csv{#countryCode}", 190 | "propertyUrl": "http://schema.org/{_name}", 191 | "primaryKey": "countryCode" 192 | } 193 | }] 194 | } 195 | result = metadata.validate(A) 196 | self.assertFalse(result) 197 | 198 | def test_positive_validate(self): 199 | self.maxDiff = None 200 | A = { 201 | "@context": "http://www.w3.org/ns/csvw", 202 | "tables": [{ 203 | "url": "http://example.org/countries.csv", 204 | "tableSchema": { 205 | "columns": [{ 206 | "name": "countryCode", 207 | "datatype": "string", 208 | "propertyUrl": "http://www.geonames.org/ontology{#_name}" 209 | }, { 210 | "name": "latitude", 211 | "datatype": "number" 212 | }, { 213 | "name": "longitude", 214 | "datatype": "number" 215 | }, { 216 | "name": "name", 217 | "datatype": "string" 218 | }], 219 | "aboutUrl": "http://example.org/countries.csv{#countryCode}", 220 | "propertyUrl": "http://schema.org/{_name}", 221 | "primaryKey": "countryCode" 222 | } 223 | }, { 224 | "url": "http://example.org/country_slice.csv", 225 | "tableSchema": { 226 | "columns": [{ 227 | "name": "countryRef", 228 | "valueUrl": "http://example.org/countries.csv{#countryRef}" 229 | }, { 230 | "name": "year", 231 | "datatype": "gYear" 232 | }, { 233 | "name": "population", 234 | "datatype": "integer" 235 | }], 236 | "foreignKeys": [{ 237 | "columnReference": "countryRef", 238 | "reference": { 239 | "resource": "http://example.org/countries.csv", 240 | "columnReference": "countryCode" 241 | } 242 | }] 243 | } 244 | }] 245 | } 246 | result = metadata.validate(A) 247 | self.assertTrue(isinstance(result, Model)) 248 | json_res = result.json() 249 | print json_res 250 | self.assertEqual(json_res, A) 251 | 252 | def test_normalize(self): 253 | self.maxDiff = None 254 | A = { 255 | "@context": [ "http://www.w3.org/ns/csvw", { "@language": "en" } ], 256 | "@type": "Table", 257 | "url": "http://example.com/table.csv", 258 | "dc:title": [ 259 | "The title of this Table", 260 | {"@value": "Der Titel dieser Tabelle", "@language": "de"} 261 | ] 262 | } 263 | norm = { 264 | "@context": "http://www.w3.org/ns/csvw", 265 | "tables": [ 266 | { 267 | "@type": "Table", 268 | "url": "http://example.com/table.csv", 269 | "dc:title": [ 270 | {"@value": "The title of this Table", "@language": "en"}, 271 | {"@value": "Der Titel dieser Tabelle", "@language": "de"} 272 | ] 273 | } 274 | ], 275 | } 276 | val = metadata.validate(A) 277 | #print val.json() 278 | #self.assertEqual(val.json(), A) 279 | val.normalize() 280 | json_res = val.json() 281 | print json_res 282 | self.assertEqual(json_res, norm) 283 | 284 | def test_normalize2(self): 285 | self.maxDiff = None 286 | A = { 287 | "@context": [ "http://www.w3.org/ns/csvw", { "@base": "http://example.com/" } ], 288 | "@type": "Table", 289 | "url": "table.csv", 290 | "schema:url": {"@id": "table.csv"} 291 | } 292 | norm = { 293 | "@context": "http://www.w3.org/ns/csvw", 294 | "tables": [ 295 | { 296 | "@type": "Table", 297 | "url": "http://example.com/table.csv", 298 | "schema:url": {"@id": "http://example.com/table.csv"} 299 | } 300 | ] 301 | } 302 | val = metadata.validate(A) 303 | #print val.json() 304 | #self.assertEqual(val.json(), A) 305 | val.normalize() 306 | json_res = val.json() 307 | print json_res 308 | self.assertEqual(json_res, norm) 309 | 310 | def test_merge(self): 311 | self.maxDiff = None 312 | A = { 313 | "@context": ["http://www.w3.org/ns/csvw", {"@language": "en", 314 | "@base": "http://example.com/"} 315 | ], 316 | "tables": [{ 317 | "url": "doc1.csv", 318 | "dc:title": "foo", 319 | "tableDirection": "ltr", 320 | "tableSchema": { 321 | "aboutUrl": "{#foo}", 322 | "columns": [{ 323 | "name": "foo", 324 | "titles": "Foo", 325 | "required": True 326 | }, { 327 | "name": "bar" 328 | }] 329 | } 330 | }, { 331 | "url": "doc2.csv" 332 | }] 333 | } 334 | 335 | B = { 336 | "@context": "http://www.w3.org/ns/csvw", 337 | "url": "http://example.com/doc1.csv", 338 | "dc:description": "bar", 339 | "tableSchema": { 340 | "propertyUrl": "{#_name}", 341 | "columns": [{ 342 | "titles": "Foo", 343 | "required": False 344 | }, { 345 | "name": "bar" 346 | }, { 347 | }] 348 | } 349 | } 350 | 351 | merged = { 352 | "@context": "http://www.w3.org/ns/csvw", 353 | "tables": [{ 354 | "url": "http://example.com/doc1.csv", 355 | "dc:title": {"@value": "foo", "@language": "en"}, 356 | "dc:description": {"@value": "bar"}, 357 | "tableDirection": "ltr", 358 | "tableSchema": { 359 | "aboutUrl": "{#foo}", 360 | "propertyUrl": "{#_name}", 361 | "columns": [{ 362 | "name": "foo", 363 | "titles": { "en": [ "Foo" ]}, 364 | "required": True 365 | },{ 366 | "name": "bar" 367 | }] 368 | } 369 | }, { 370 | "url": "http://example.com/doc2.csv" 371 | }] 372 | } 373 | 374 | # normalizing a 375 | #norm_a = metadata.normalize(A) 376 | #norm_b = metadata.normalize(B) 377 | #pprint.pprint(norm_a.json()) 378 | #pprint.pprint(norm_b.json()) 379 | 380 | result = metadata.merge([A, B]) 381 | pprint.pprint(result.json()) 382 | self.assertEqual(merged, result.json()) 383 | 384 | 385 | if __name__ == '__main__': 386 | unittest.main() 387 | -------------------------------------------------------------------------------- /test/validator_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | import os 4 | parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 5 | os.sys.path.insert(0,parentdir) 6 | from pycsvw import validator 7 | 8 | 9 | class TestCsvValidator(unittest.TestCase): 10 | 11 | def test_validate_csv_pass(self): 12 | csvPath = os.path.join(parentdir, r"pycsvw/testdata/csvw-template/example.csv") 13 | schemaPath = os.path.join(parentdir, r"pycsvw/testdata/csvw-template/example.csv-metadata.json") 14 | (ret, error_message) = validator.validate_file(csvPath, schemaPath) 15 | self.assertTrue(ret) 16 | 17 | def test_validate_csv_column_missing(self): 18 | csvPath = os.path.join(parentdir, "pycsvw/testdata/tree-ops.csv") 19 | schemaPath = os.path.join(parentdir, r"pycsvw/testdata/test124-user-metadata.json") 20 | expectedResultPath = os.path.join(parentdir, r"pycsvw/testdata/validate-result-missing-column.txt") 21 | 22 | with open(expectedResultPath, 'r') as myfile: 23 | expectedResult=myfile.read() 24 | 25 | (ret, error_message) = validator.validate_file(csvPath, schemaPath) 26 | self.assertEqual(expectedResult.rstrip(), error_message.rstrip()) 27 | 28 | def test_validate_csv_required_missing(self): 29 | csvPath = os.path.join(parentdir, "pycsvw/testdata/test125.csv") 30 | schemaPath = os.path.join(parentdir, r"pycsvw/testdata/test125-metadata.json") 31 | expectedResultPath = os.path.join(parentdir, r"pycsvw/testdata/validate-result-required-fail.txt") 32 | 33 | with open(expectedResultPath, 'r') as myfile: 34 | expectedResult=myfile.read() 35 | 36 | (ret, error_message) = validator.validate_file(csvPath, schemaPath) 37 | self.assertEqual(expectedResult.rstrip(), error_message.rstrip()) 38 | 39 | def test_validate_csv_primary_key_fail(self): 40 | csvPath = os.path.join(parentdir, "pycsvw/testdata/test234.csv") 41 | schemaPath = os.path.join(parentdir, r"pycsvw/testdata/test234-metadata.json") 42 | expectedResultPath = os.path.join(parentdir, r"pycsvw/testdata/validate-result-primary-key-fail.txt") 43 | 44 | with open(expectedResultPath, 'r') as myfile: 45 | expectedResult=myfile.read() 46 | 47 | (ret, error_message) = validator.validate_file(csvPath, schemaPath) 48 | self.assertEqual(expectedResult.rstrip(), error_message.rstrip()) 49 | 50 | if __name__ == '__main__': 51 | unittest.main() 52 | --------------------------------------------------------------------------------