├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── flexidate ├── __init__.py └── test_flexidate.py ├── pyproject.toml └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Custom ignores 2 | .*.swp 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # celery beat schedule file 89 | celerybeat-schedule 90 | 91 | # SageMath parsed files 92 | *.sage.py 93 | 94 | # Environments 95 | .env 96 | .venv 97 | env/ 98 | venv/ 99 | ENV/ 100 | env.bak/ 101 | venv.bak/ 102 | 103 | # Spyder project settings 104 | .spyderproject 105 | .spyproject 106 | 107 | # Rope project settings 108 | .ropeproject 109 | 110 | # mkdocs documentation 111 | /site 112 | 113 | # mypy 114 | .mypy_cache/ 115 | .dmypy.json 116 | dmypy.json 117 | 118 | # Pyre type checker 119 | .pyre/ 120 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "2.7" 5 | - "3.3" 6 | - "3.4" 7 | - "3.5" 8 | - "3.6" 9 | - "3.7" 10 | 11 | before_script: 12 | - pip install --upgrade pip 13 | - pip install -r requirements.txt 14 | # - pip install codecov 15 | # - pip install datetime 16 | # - pip install python-dateutil 17 | 18 | script: 19 | # Notes on nose: 20 | # Travis CI pre-installs `nose` 21 | # https://github.com/coagulant/coveralls-python#nosetests 22 | # http://nose.readthedocs.org/en/latest/plugins/skip.html 23 | - nosetests --no-skip --with-coverage test_flexidate.py 24 | 25 | #after_success: 26 | # - codecov 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Open Knowledge 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # About 5 | 6 | The `flexidate` library supports date parsing and normalization using the `FlexiDate` class. It provides functionality to: 7 | 8 | 1. Cast dates according to the [Anno Domini](https://en.wikipedia.org/wiki/Anno_Domini) notation system (e.g., 399 BC, AD 417) as well as the [Common Era](https://en.wikipedia.org/wiki/Common_Era) notation system (e.g., 399 B.C.E, 417 CE) 9 | 1. Handle dates before AD 1 or 1 CE 10 | 1. Cast imprecise dates (c.1860, 18??, fl. 1534, etc) 11 | 1. Normalize dates to machine-readable data types 12 | 1. Create sortable date objects 13 | 14 | Flexidate builds on the excellent [dateutil](https://dateutil.readthedocs.org/en/latest/). 15 | 16 | For more information see [this blog post](http://www.rufuspollock.org/2009/06/18/flexible-dates-in-python/). 17 | 18 | 19 | # Examples 20 | 21 | First load a string into the `parse` function, which returns a `FlexiDate` object: 22 | 23 | ``` python 24 | >>> from flexidate import parse 25 | >>> fd = parse('Jan 1890') 26 | ``` 27 | 28 | Once you have your date in a `FlexiDate` object, you can get access to attributes: 29 | 30 | ``` python 31 | >>> fd.year # u'1890' 32 | '1890' 33 | >>> fd.month # u'01' 34 | '01' 35 | ``` 36 | 37 | Note how all fields are retained as strings, which prevents the loss of original input data. 38 | 39 | The `FlexiDate` object exports to other formats (e.g., `int` or `datetime`): 40 | 41 | ``` python 42 | >>> fd.as_float() 43 | 1890.0833333333333 44 | >>> fd.as_datetime() 45 | datetime.datetime(1890, 1, 1, 0, 0) 46 | ``` 47 | 48 | 49 | To cast years before AD 1: 50 | 51 | To case dates before Christ (i.e., Anno Domini or Common Era): 52 | 53 | ``` python 54 | >>> fd = parse('399 BC') 55 | >>> fd 56 | -0399 57 | >>> fd.year 58 | '-0399' 59 | ``` 60 | 61 | Or after: 62 | ``` python 63 | >>> fd = parse('AD 417') 64 | >>> fd 65 | 0417 66 | ``` 67 | 68 | Including with Common Era notation: 69 | ``` python 70 | >>> fd_ce = parse('399 BCE') 71 | >>> fd_ce 72 | -0399 73 | >>> fd_ad = parse('399 BC') 74 | >>> fd_ce.year == fd_ad.year 75 | True 76 | ``` 77 | 78 | ``` python 79 | >>> fd_ce = parse('417 CE') 80 | >>> fd_ce 81 | 0417 82 | >>> fd_ad = parse('AD 417') 83 | >>> fd_ce.year == fd_ad.year 84 | True 85 | ``` 86 | 87 | `FlexiDate` supports hour, minute, second, and microsecond: 88 | 89 | ``` python 90 | >>> fd = parse('417-06-01 10') 91 | 2016-01-06 10 92 | >>> fd.hour 93 | '10' 94 | >>> fd.minute 95 | '' 96 | ``` 97 | 98 | `parse` can capture various fuzzy date attributes. In `FlexiDate` this becomes available as the attribute `qualifier`: 99 | 100 | ``` python 101 | >>> fd = parse('417?') 102 | >>> fd 103 | [b'UNPARSED: 417?'] 104 | >>> fd.qualifier 105 | b'UNPARSED: 417?' 106 | ``` 107 | 108 | ``` python 109 | >>> fd = parse('c. 417') 110 | >>> fd 111 | 0417 [Note 'circa' : c. 417] 112 | >>> fd.qualifier 113 | "Note 'circa' : c. 417" 114 | ``` 115 | 116 | ``` python 117 | >>> fd = parse('177?') 118 | >>> fd 119 | [b'UNPARSED: 177?'] 120 | >>> fd.qualifier 121 | b'UNPARSED: 177?' 122 | ``` 123 | 124 | Comparison of dates: 125 | 126 | ``` python 127 | >>> fd1 = parse('399 BC') 128 | >>> fd2 = parse('AD 200') 129 | >>> fd1.year < fd2.year 130 | True 131 | >>> fd1.year > fd2.year 132 | False 133 | ``` 134 | 135 | 136 | # Developers 137 | 138 | To install required development dependencies: `pip install -r requirements.txt`. 139 | 140 | Patches are welcome. Please include additional tests where relevant. 141 | 142 | ## Run Tests 143 | 144 | Tests can be found in `flexidate/test_flexidate.py`. Run using `python flexidate/test_flexidate.py` or, for a full coverage report, `nosetests --no-skip --with-coverage`. 145 | 146 | ## Package 147 | 148 | To build locally: `python -m build` (you need [`build`](https://github.com/pypa/build) for it). 149 | 150 | 151 | ## TODO 152 | 153 | * ~~Cast dates written in the [Common Era](https://en.wikipedia.org/wiki/Common_Era) notation system (e.g., 399 BCE, 417 CE)~~ 154 | 155 | 156 | # License 157 | 158 | MIT. See `LICENSE`. 159 | -------------------------------------------------------------------------------- /flexidate/__init__.py: -------------------------------------------------------------------------------- 1 | import re 2 | import datetime 3 | # we will try to use dateutil if is available 4 | try: 5 | import dateutil.parser 6 | dateutil_parser = dateutil.parser.parser() 7 | except: 8 | dateutil_parser = None 9 | import sys 10 | 11 | 12 | class FlexiDate(object): 13 | """Store dates as strings and present them in a slightly extended version 14 | of ISO8601. 15 | 16 | Modifications: 17 | * Allow a trailing qualifiers e.g. fl. 18 | * Allow replacement of unknown values by ? e.g. if sometime in 1800s 19 | can do 18?? 20 | 21 | Restriction on ISO8601: 22 | * Truncation (e.g. of centuries) is *not* permitted. 23 | * No week and day representation e.g. 1999-W01 24 | """ 25 | # pass 26 | 27 | def __init__(self, year=None, month=None, day=None, hour=None, minute=None, second=None, microsecond=None, qualifier=''): 28 | # force = month or day or qualifier 29 | force = False 30 | self.year = self._cvt(year, rjust=4, force=force) 31 | self.month = self._cvt(month) 32 | self.day = self._cvt(day) 33 | self.hour = self._cvt(hour) 34 | self.minute = self._cvt(minute) 35 | self.second = self._cvt(second) 36 | self.microsecond = self._cvt(microsecond) 37 | self.qualifier = qualifier 38 | 39 | def _cvt(self, val, rjust=2, force=False): 40 | # Changed from simple check to allow 0 values for minutes or seconds 41 | if val is not None: 42 | tmp = str(val).strip() 43 | if tmp.startswith('-'): 44 | tmp = '-' + tmp[1:].rjust(rjust, '0') 45 | else: 46 | tmp = tmp.rjust(rjust, '0') 47 | return tmp 48 | elif force: 49 | # use '!' rather than '?' as '!' < '1' while '?' > '1' 50 | return rjust * '!' 51 | else: 52 | return '' 53 | 54 | def __str__(self): 55 | out = self.isoformat() 56 | if self.qualifier: 57 | # leading space is important as ensures when no year sort in right 58 | # order as ' ' < '1' 59 | out += u' [%s]' % self.qualifier 60 | return out 61 | 62 | def __repr__(self): 63 | return u'%s %s' % (self.__class__, self.__str__()) 64 | 65 | def isoformat(self, strict=False): 66 | '''Return date in isoformat (same as __str__ but without qualifier). 67 | 68 | WARNING: does not replace '?' in dates unless strict=True. 69 | ''' 70 | out = self.year 71 | # what do we do when no year ... 72 | for val in [self.month, self.day]: 73 | if not val: 74 | break 75 | out += u'-' + val 76 | if strict: 77 | out = out.replace('?', '0') 78 | 79 | if self.hour: 80 | out += u' ' 81 | out += self.hour 82 | for val in [self.minute, self.second]: 83 | if not val: 84 | break 85 | out += u':' + val 86 | if self.microsecond: 87 | out += u'.' + self.microsecond 88 | return out 89 | 90 | our_re_pat = ''' 91 | (?P -?[\d?]+) 92 | (?: 93 | \s* - (?P [\d?]{1,2}) 94 | (?: \s* - (?P [\d?]{1,2}) )? 95 | (?: \s* - (?P [\d?]{1,2}) )? 96 | (?: \s* - (?P [\d?]{1,2}) )? 97 | (?: \s* - (?P [\d?]{1,2}) )? 98 | (?: \s* - (?P [\d?]{1,2}) )? 99 | )? 100 | \s* 101 | (?: \[ (?P[^]]*) \])? 102 | ''' 103 | our_re = re.compile(our_re_pat, re.VERBOSE) 104 | 105 | @classmethod 106 | def from_str(self, instr): 107 | '''Undo affect of __str__''' 108 | if not instr: 109 | return FlexiDate() 110 | 111 | out = self.our_re.match(instr) 112 | if out is None: # no match TODO: raise Exception? 113 | return None 114 | else: 115 | return FlexiDate( 116 | out.group('year'), 117 | out.group('month'), 118 | out.group('day'), 119 | out.group('hour'), 120 | out.group('minute'), 121 | out.group('second'), 122 | out.group('microsecond'), 123 | qualifier=out.group('qualifier') 124 | ) 125 | 126 | def as_float(self): 127 | '''Get as a float (year being the integer part). 128 | 129 | Replace '?' in year with 9 so as to be conservative (e.g. 19?? becomes 130 | 1999) and elsewhere (month, day) with 0 131 | 132 | @return: float. 133 | ''' 134 | if not self.year: 135 | return None 136 | out = float(self.year.replace('?', '9')) 137 | if self.month: 138 | # TODO: we are assuming months are of equal length 139 | out += float(self.month.replace('?', '0')) / 12.0 140 | if self.day: 141 | out += float(self.day.replace('?', '0')) / 365.0 142 | return out 143 | 144 | def as_datetime(self): 145 | '''Get as python datetime.datetime. 146 | 147 | Require year to be a valid datetime year. Default month and day to 1 if 148 | do not exist. 149 | 150 | @return: datetime.datetime object. 151 | ''' 152 | year = int(self.year) 153 | month = int(self.month) if self.month else 1 154 | day = int(self.day) if self.day else 1 155 | hour = int(self.hour) if self.hour else 0 156 | minute = int(self.minute) if self.minute else 0 157 | second = int(self.second) if self.second else 0 158 | microsecond = int(self.microsecond) if self.microsecond else 0 159 | return datetime.datetime(year, month, day, hour, minute, second, microsecond) 160 | 161 | 162 | def parse(date, dayfirst=True): 163 | '''Parse a `date` into a `FlexiDate`. 164 | 165 | @param date: the date to parse - may be a string, datetime.date, 166 | datetime.datetime or FlexiDate. 167 | 168 | TODO: support for quarters e.g. Q4 1980 or 1954 Q3 169 | TODO: support latin stuff like M.DCC.LIII 170 | TODO: convert '-' to '?' when used that way 171 | e.g. had this date [181-] 172 | ''' 173 | if not date: 174 | return None 175 | if isinstance(date, FlexiDate): 176 | return date 177 | if isinstance(date, int): 178 | return FlexiDate(year=date) 179 | elif isinstance(date, datetime.datetime): 180 | parser = PythonDateTimeParser() 181 | return parser.parse(date) 182 | elif isinstance(date, datetime.date): 183 | parser = PythonDateParser() 184 | return parser.parse(date) 185 | else: # assuming its a string 186 | parser = DateutilDateParser() 187 | out = parser.parse(date, **{'dayfirst': dayfirst}) 188 | if out is not None: 189 | return out 190 | # msg = 'Unable to parse %s' % date 191 | # raise ValueError(date) 192 | val = 'UNPARSED: %s' % date 193 | val = val.encode('ascii', 'ignore') 194 | return FlexiDate(qualifier=val) 195 | 196 | 197 | class DateParserBase(object): 198 | 199 | def parse(self, date): 200 | raise NotImplementedError 201 | 202 | def norm(self, date): 203 | return str(self.parse(date)) 204 | 205 | 206 | class PythonDateParser(object): 207 | 208 | def parse(self, date): 209 | return FlexiDate(date.year, date.month, date.day, 0, 0, 0, 0) 210 | 211 | 212 | class PythonDateTimeParser(object): 213 | 214 | def parse(self, date): 215 | return FlexiDate(date.year, date.month, date.day, date.hour, date.minute, date.second, date.microsecond) 216 | 217 | 218 | class DateutilDateParser(DateParserBase): 219 | _numeric = re.compile("^[0-9]+$") 220 | 221 | def parse(self, date, **kwargs): 222 | ''' 223 | :param **kwargs: any kwargs accepted by dateutil.parse function. 224 | ''' 225 | qualifiers = [] 226 | if dateutil_parser is None: 227 | return None 228 | date = orig_date = date.strip() 229 | 230 | # various normalizations 231 | # TODO: call .lower() first 232 | date = date.replace('B.C.E.', 'BC') 233 | date = date.replace('BCE', 'BC') 234 | date = date.replace('B.C.', 'BC') 235 | date = date.replace('A.D.', 'AD') 236 | date = date.replace('C.E.', 'AD') 237 | date = date.replace('CE', 'AD') 238 | 239 | # deal with pre 0AD dates 240 | if date.startswith('-') or 'BC' in date or 'B.C.' in date: 241 | pre0AD = True 242 | else: 243 | pre0AD = False 244 | # BC seems to mess up parser 245 | date = date.replace('BC', '') 246 | 247 | # deal with circa: 'c.1950' or 'c1950' 248 | circa_match = re.match('([^a-zA-Z]*)c\.?\s*(\d+.*)', date) 249 | if circa_match: 250 | # remove circa bit 251 | qualifiers.append("Note 'circa'") 252 | date = ''.join(circa_match.groups()) 253 | 254 | # deal with p1980 (what does this mean? it can appear in 255 | # field 008 of MARC records 256 | p_match = re.match("^p(\d+)", date) 257 | if p_match: 258 | date = date[1:] 259 | 260 | # Deal with uncertainty: '1985?' 261 | uncertainty_match = re.match('([0-9xX]{4})\?', date) 262 | if uncertainty_match: 263 | # remove the ? 264 | date = date[:-1] 265 | qualifiers.append('Uncertainty') 266 | 267 | # Parse the numbers intelligently 268 | # do not use std parser function as creates lots of default data 269 | res = dateutil_parser._parse(date, **kwargs) 270 | try: 271 | res = res[0] 272 | except: 273 | res = res 274 | if res is None: 275 | # Couldn't parse it 276 | return None 277 | # Note: Years of less than 3 digits not interpreted by 278 | # dateutil correctly 279 | # e.g. 87 -> 1987 280 | # 4 -> day 4 (no year) 281 | # Both cases are handled in this routine 282 | if res.year is None and res.day: 283 | year = res.day 284 | # If the whole date is simply two digits then dateutil_parser makes 285 | # it '86' -> '1986'. So strip off the '19'. (If the date specified 286 | # day/month then a two digit year is more likely to be this century 287 | # and so allow the '19' prefix to it.) 288 | elif self._numeric.match(date) and (len(date) == 2 or date.startswith('00')): 289 | year = res.year % 100 290 | else: 291 | year = res.year 292 | 293 | # finally add back in BC stuff 294 | if pre0AD: 295 | year = -year 296 | 297 | if not qualifiers: 298 | qualifier = '' 299 | else: 300 | qualifier = ', '.join(qualifiers) + (' : %s' % orig_date) 301 | return FlexiDate(year, res.month, res.day, res.hour, res.minute, res.second, res.microsecond, qualifier=qualifier) 302 | -------------------------------------------------------------------------------- /flexidate/test_flexidate.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import unittest 3 | 4 | from flexidate import parse, FlexiDate, PythonDateParser, DateutilDateParser 5 | 6 | 7 | class TestPythonStringOrdering(unittest.TestCase): 8 | # It is impossible to find a string format such that +ve and -ve numbers 9 | # sort correctly as strings: 10 | # if (in string ordering) X < Y => -X < -Y (False!) 11 | def test_ordering(self): 12 | assert '0' < '1' 13 | assert '-10' < '10' 14 | assert '-' < '@' 15 | assert '-' < '0' 16 | assert '-100' < '-X10' 17 | assert '10' < '1000' 18 | assert '02000' < '10000' 19 | assert ' 2000' < '10000' 20 | 21 | def test_bad_ordering(self): 22 | assert ' ' < '0' 23 | assert ' ' < '-' 24 | assert not '-' < '+' 25 | assert '-100' > '-10' 26 | assert not '-100' < '-010' 27 | assert not '-100' < '- 10' 28 | assert not '-100' < ' -10' 29 | assert '10000' < '2000' 30 | assert not '-10' < ' 1' 31 | 32 | 33 | class TestFlexiDate(unittest.TestCase): 34 | def test_init(self): 35 | fd = FlexiDate() 36 | assert fd.year == '', fd 37 | assert fd.month == '', fd 38 | 39 | fd = FlexiDate(2000, 1, 1) 40 | assert fd.month == '01', fd 41 | assert fd.day == '01', fd 42 | 43 | fd = FlexiDate(2004, 3, 2, 10) 44 | assert fd.month == '03', fd 45 | assert fd.day == '02', fd 46 | assert fd.hour == '10', fd 47 | 48 | fd = FlexiDate(2004, 3, 2, 10, 11) 49 | assert fd.month == '03', fd 50 | assert fd.day == '02', fd 51 | assert fd.hour == '10', fd 52 | assert fd.minute == '11', fd 53 | 54 | fd = FlexiDate(2004, 3, 2, 10, 11, 12) 55 | assert fd.month == '03', fd 56 | assert fd.day == '02', fd 57 | assert fd.hour == '10', fd 58 | assert fd.minute == '11', fd 59 | assert fd.second == '12', fd 60 | 61 | fd = FlexiDate(2004, 3, 2, 10, 11, 12, 123456) 62 | assert fd.month == '03', fd 63 | assert fd.day == '02', fd 64 | assert fd.hour == '10', fd 65 | assert fd.minute == '11', fd 66 | assert fd.second == '12', fd 67 | assert fd.microsecond == '123456', fd 68 | 69 | def test_str(self): 70 | fd = FlexiDate(2000, 1, 23) 71 | assert str(fd) == '2000-01-23', '"%s"' % fd 72 | fd = FlexiDate(-2000, 1, 23) 73 | assert str(fd) == '-2000-01-23' 74 | fd = FlexiDate(2000) 75 | assert str(fd) == '2000' 76 | fd = FlexiDate(1760, qualifier='fl.') 77 | assert str(fd) == '1760 [fl.]', fd 78 | 79 | fd = FlexiDate(qualifier='anything') 80 | assert str(fd) == ' [anything]' 81 | 82 | def test_repr(self): 83 | fd = FlexiDate(2016, 3, 15) 84 | assert repr(fd) == " 2016-03-15" 85 | 86 | def test_cvt(self): 87 | fd = FlexiDate(2016, 3, 16) 88 | assert fd._cvt(None, 4, True) == '!!!!' 89 | 90 | def test_isoformat(self): 91 | fd = FlexiDate(2000, 1, 24) 92 | assert str(fd.isoformat()) == '2000-01-24' 93 | 94 | def test_from_str(self): 95 | def dotest(fd): 96 | out = FlexiDate.from_str(str(fd)) 97 | assert str(out) == str(fd) 98 | 99 | def dotest2(fd): 100 | out = FlexiDate.from_str("Not a date") 101 | assert str(out) == 'None' 102 | 103 | fd = FlexiDate(2000, 1, 23) 104 | dotest(fd) 105 | dotest2(fd) 106 | fd = FlexiDate(1760, qualifier='fl.') 107 | dotest(fd) 108 | dotest2(fd) 109 | fd = FlexiDate(-1760, 1, 3, qualifier='fl.') 110 | dotest(fd) 111 | dotest2(fd) 112 | 113 | def test_as_float(self): 114 | fd = FlexiDate(2000) 115 | assert fd.as_float() == float(2000), fd.as_float() 116 | fd = FlexiDate(1760, 1, 2) 117 | exp = 1760 + 1/12.0 + 2/365.0 118 | assert fd.as_float() == exp, fd.as_float() 119 | fd = FlexiDate(-1000) 120 | assert fd.as_float() == float(-1000) 121 | fd = FlexiDate() 122 | assert fd.as_float() == None 123 | 124 | def test_as_datetime(self): 125 | fd = FlexiDate(2000) 126 | out = fd.as_datetime() 127 | assert out == datetime.datetime(2000, 1, 1), out 128 | fd = FlexiDate(1760, 1, 2) 129 | out = fd.as_datetime() 130 | assert out == datetime.datetime(1760, 1, 2), out 131 | 132 | 133 | class TestDateParsers(object): 134 | def test_using_datetime(self): 135 | parser = PythonDateParser() 136 | 137 | d1 = datetime.date(2000, 1, 23) 138 | fd = parser.parse(d1) 139 | assert fd.year == '2000' 140 | 141 | d1 = datetime.datetime(2000, 1, 23) 142 | fd = parser.parse(d1) 143 | # assert str(fd) == '2000-01-23T00:00:00', fd 144 | assert str(fd) == '2000-01-23 00:00:00.00', fd 145 | 146 | def test_using_dateutil(self): 147 | parser = DateutilDateParser() 148 | 149 | in1 = '2016-06-03 10' 150 | fd = parser.parse(in1) 151 | assert str(fd) == in1, fd 152 | 153 | in1 = '86' 154 | fd = parser.parse(in1) 155 | assert str(fd) == '0086' 156 | 157 | in1 = '2001-02' 158 | fd = parser.parse(in1) 159 | assert str(fd) == in1, fd 160 | 161 | in1 = 'March 1762' 162 | fd = parser.parse(in1) 163 | assert str(fd) == '1762-03' 164 | 165 | in1 = 'March 1762' 166 | fd = parser.parse(in1) 167 | assert str(fd) == '1762-03' 168 | 169 | in1 = '1768 AD' 170 | fd = parser.parse(in1) 171 | assert str(fd) == '1768', fd 172 | 173 | in1 = '1768 A.D.' 174 | fd = parser.parse(in1) 175 | assert str(fd) == '1768', fd 176 | 177 | in1 = '1768 CE' 178 | fd = parser.parse(in1) 179 | assert str(fd) == '1768', fd 180 | 181 | in1 = '1768 C.E.' 182 | fd = parser.parse(in1) 183 | assert str(fd) == '1768', fd 184 | 185 | in1 = '-1850' 186 | fd = parser.parse(in1) 187 | assert str(fd) == '-1850', fd 188 | 189 | in1 = '1762 BC' 190 | fd = parser.parse(in1) 191 | assert str(fd) == '-1762', fd 192 | 193 | in1 = '4 BC' 194 | fd = parser.parse(in1) 195 | assert str(fd) == '-0004', fd 196 | 197 | in1 = '4 B.C.' 198 | fd = parser.parse(in1) 199 | assert str(fd) == '-0004', fd 200 | 201 | in1 = '4 BCE' 202 | fd = parser.parse(in1) 203 | assert str(fd) == '-0004', fd 204 | 205 | in1 = '4 B.C.E.' 206 | fd = parser.parse(in1) 207 | assert str(fd) == '-0004', fd 208 | 209 | in1 = 'Wed, 06 Jan 2010 09:30:00 GMT' 210 | fd = parser.parse(in1) 211 | assert str(fd) == '2010-01-06 09:30:00.00', fd 212 | 213 | in1 = 'Tue, 07 Dec 2010 10:00:00 GMT' 214 | fd = parser.parse(in1) 215 | assert str(fd) == '2010-12-07 10:00:00.00', fd 216 | 217 | in1 = '2015.03.01' 218 | fd = parser.parse(in1) 219 | assert str(fd) == '2015-03-01', fd 220 | 221 | def test_parse(self): 222 | d1 = datetime.date(2000, 1, 23) 223 | fd = parse(d1) 224 | assert fd.year == '2000' 225 | 226 | d1 = datetime.datetime(2002, 1, 23) 227 | fd = parse(d1) 228 | assert fd.year == '2002' 229 | 230 | fd = parse('March 1762') 231 | assert str(fd) == '1762-03' 232 | 233 | fd = parse(1966) 234 | assert str(fd) == '1966' 235 | 236 | fd = parse('22/07/2010') 237 | assert fd.month == '07', fd.month 238 | 239 | d2 = FlexiDate(1760, 1, 2) 240 | fd = parse(d2) 241 | assert fd.year == '1760' 242 | 243 | def test_parse_ambiguous_day_month(self): 244 | fd = parse('05/07/2010') 245 | assert fd.month == '07', fd.month 246 | assert fd.day == '05', fd.month 247 | 248 | def test_parse_with_none(self): 249 | d1 = parse(None) 250 | assert d1 is None 251 | 252 | def test_parse_wildcards(self): 253 | fd = parse('198?') 254 | assert fd.year == '', fd.year # expect this to not parse 255 | # TODO but we should have a float if possible 256 | # assert fd.as_float() == u'1980', fd.as_float() 257 | 258 | def test_parse_with_qualifiers(self): 259 | fd = parse('1985?') 260 | assert fd.year == u'1985', fd 261 | assert fd.qualifier == u'Uncertainty : 1985?', fd.qualifier 262 | 263 | fd = parse('c.1780') 264 | assert fd.year == u'1780', fd 265 | assert fd.qualifier == u"Note 'circa' : c.1780", fd 266 | 267 | fd = parse('c. 1780') 268 | assert fd.year == u'1780', fd 269 | assert fd.qualifier.startswith(u"Note 'circa'"), fd 270 | 271 | def test_ambiguous(self): 272 | # TODO: have to be careful here ... 273 | fd = parse('1068/1069') 274 | 275 | def test_small_years(self): 276 | in1 = '23' 277 | fd = parse(in1) 278 | assert str(fd) == '0023', fd 279 | assert fd.as_float() == 23, fd.as_float() 280 | 281 | def test_small_years_with_zeros(self): 282 | in1 = '0023' 283 | fd = parse(in1) 284 | assert str(fd) == '0023', fd 285 | assert fd.as_float() == 23, fd.as_float() 286 | 287 | def test_years_with_alpha_prefix(self): 288 | in1 = "p1980" 289 | fd = parse(in1) 290 | assert str(fd) == "1980", fd 291 | 292 | 293 | if __name__ == '__main__': 294 | unittest.main() 295 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.2.0", "setuptools_scm[toml]>=3.4.3"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "flexidate" 7 | authors = [{name = "Open Knowledge Foundation", email = "info@okfn.org"}] 8 | license = {text = "MIT"} 9 | description = "Very flexible date parsing and normalization utilities" 10 | readme = "README.md" 11 | classifiers = [ 12 | "Development Status :: 5 - Production/Stable", 13 | "Environment :: Console", 14 | "Intended Audience :: Developers", 15 | "Operating System :: OS Independent", 16 | "Programming Language :: Python :: 2.7", 17 | "Programming Language :: Python :: 3.3", 18 | "Programming Language :: Python :: 3.4", 19 | "Programming Language :: Python :: 3.5", 20 | "Programming Language :: Python :: 3.6", 21 | "Programming Language :: Python :: 3.7", 22 | "Programming Language :: Python :: 3.8", 23 | "Programming Language :: Python :: 3.9", 24 | "Programming Language :: Python :: 3.10", 25 | "Programming Language :: Python :: 3.11", 26 | "Topic :: Software Development :: Libraries :: Python Modules", 27 | ] 28 | urls = {Homepage = "https://github.com/datopian/flexidate"} 29 | dependencies = ["python-dateutil>=2.5"] 30 | dynamic = ["version"] 31 | 32 | [tool.setuptools] 33 | packages = ["flexidate"] 34 | zip-safe = false 35 | 36 | [tool.setuptools_scm] 37 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2022.12.7 2 | chardet==3.0.4 3 | codecov==2.0.16 4 | coverage==4.5.2 5 | idna==2.8 6 | nose==1.3.7 7 | python-dateutil==2.8.0 8 | requests==2.31.0 9 | six==1.12.0 10 | urllib3==1.26.17 11 | --------------------------------------------------------------------------------