├── tests ├── en │ ├── __init__.py │ ├── test_format_time.py │ ├── test_format_iso.py │ ├── test_random_0.py │ ├── test_format_middle_endian.py │ ├── test_format_slash.py │ ├── test_format_time_and_date.py │ └── test_format_little_endian.py ├── jp │ ├── __init__.py │ └── test_format_standard.py ├── __init__.py └── test_basic_operation.py ├── chrono ├── parsers │ ├── __init__.py │ ├── th │ │ └── __init__.py │ ├── jp │ │ ├── __init__.py │ │ ├── util.py │ │ └── standard_parser.py │ ├── en │ │ ├── __init__.py │ │ ├── iso_parser.py │ │ ├── slash_format.py │ │ ├── util.py │ │ ├── month_name_little_endian.py │ │ ├── month_name_middle_endian.py │ │ └── time_expression.py │ └── parser.py ├── refiners │ ├── __init__.py │ ├── en │ │ ├── __init__.py │ │ ├── remove_overlap.py │ │ ├── merge_date_range.py │ │ └── merge_date_time.py │ ├── refiner.py │ └── filter.py ├── __init__.py ├── options.py ├── chrono.py └── parsed_result.py ├── setup.cfg ├── MANIFEST.in ├── .gitignore ├── setup.py ├── test.py ├── README.md ├── watch.py └── LICENSE.txt /tests/en/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/jp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /chrono/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /chrono/refiners/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import en -------------------------------------------------------------------------------- /chrono/parsers/th/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md LICENSE.txt 2 | recursive-include chrono/parsers * -------------------------------------------------------------------------------- /chrono/parsers/jp/__init__.py: -------------------------------------------------------------------------------- 1 | from .standard_parser import JPStandartDateFormatParser -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.py[co] 3 | 4 | htmlcov/ 5 | reports/ 6 | *.egg 7 | *.egg-info 8 | .pypirc 9 | env 10 | .vscode -------------------------------------------------------------------------------- /chrono/refiners/en/__init__.py: -------------------------------------------------------------------------------- 1 | from .remove_overlap import ENRemoveOverlapRefiner 2 | from .merge_date_time import ENMergeDateTimeRefiner 3 | from .merge_date_range import ENMergeDateRangeRefiner 4 | -------------------------------------------------------------------------------- /tests/en/test_format_time.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import chrono 4 | 5 | from datetime import datetime 6 | 7 | 8 | class TimeExpessionTest(unittest.TestCase): 9 | def setUp(self): 10 | pass 11 | 12 | def test_time_parsing(self): 13 | pass 14 | -------------------------------------------------------------------------------- /chrono/refiners/refiner.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | from ..parsed_result import ParsedResult 5 | from ..parsed_result import ParsedComponent 6 | 7 | 8 | class Refiner(object): 9 | def refine(self, results, text, options): 10 | return results 11 | -------------------------------------------------------------------------------- /chrono/parsers/en/__init__.py: -------------------------------------------------------------------------------- 1 | from .iso_parser import ENInternationalStandardParser 2 | from .month_name_little_endian import ENMonthNameLittleEndianParser 3 | from .month_name_middle_endian import ENMonthNameMiddleEndianParser 4 | from .slash_format import ENSlashDateFormatParser 5 | from .time_expression import ENTimeExpressionParser -------------------------------------------------------------------------------- /chrono/__init__.py: -------------------------------------------------------------------------------- 1 | from .chrono import Chrono 2 | from .chrono import parse 3 | from .chrono import parse_date 4 | 5 | from .parsed_result import ParsedResult 6 | from .parsed_result import ParsedComponent 7 | from .parsers.parser import Parser 8 | from .refiners.refiner import Refiner 9 | from .refiners.filter import Filter 10 | 11 | from . import parsers 12 | from . import refiners -------------------------------------------------------------------------------- /chrono/refiners/filter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | from ..parsed_result import ParsedResult 5 | from ..parsed_result import ParsedComponent 6 | from .refiner import Refiner 7 | 8 | 9 | class Filter(Refiner): 10 | def verify(self, result): 11 | return True 12 | 13 | def refine(self, results, text, options): 14 | return [r for r in results if self.verify(r)] 15 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | 3 | setup( 4 | name='chrono', 5 | description='A natural language date parser', 6 | version='0.0.1', 7 | author='Wanasit Tanakitrungruang', 8 | license='LICENSE.txt', 9 | packages=['chrono', 'chrono.parsers', 'chrono.refiners'], 10 | package_data={'': ['parsers/*/*.py', 'refiners/*/*.py']}, 11 | include_package_data=True, 12 | url='https://github.com/wanasit/chrono-python', 13 | download_url='https://github.com/wanasit/chrono-python/tarball/0.0.1', 14 | keywords=['parser', 'time', 'date', 'natural'], 15 | install_requires=[]) 16 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import optparse 3 | import unittest 4 | 5 | USAGE = """%prog [TEST_PATH] [SDK_PATH] 6 | Run unit tests for App Engine apps. 7 | 8 | TEST_PATH Path to package containing test modules 9 | """ 10 | 11 | 12 | def main(test_path): 13 | suite = unittest.loader.TestLoader().discover(test_path) 14 | unittest.TextTestRunner(verbosity=2).run(suite) 15 | 16 | 17 | if __name__ == '__main__': 18 | parser = optparse.OptionParser(USAGE) 19 | options, args = parser.parse_args() 20 | 21 | TEST_PATH = './tests' 22 | 23 | if len(args) >= 1: 24 | TEST_PATH = args[0] 25 | 26 | main(TEST_PATH) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Chrono 2 | ====== 3 | 4 | A natural language date parser. 5 | 6 | Chrono extracts date expression using low-level pattern matching. Thus, it's fast and doesn't has any dependency. Currenly, the supported formats include: 7 | 8 | * 2014-12-13 12:00:00 9 | * 10/13/2013 10 | * Sat Aug 17 2013 18:40:39 11 | * Saturday, 17 August 2013 - Monday, 19 August 2013 12 | 13 | ### Installation 14 | 15 | The current recommended way is installing directly from Github. 16 | 17 | pip install git+git://github.com/wanasit/chrono-python.git 18 | 19 | 20 | ## USAGE 21 | 22 | Just pass a string to function `parse` or `parse_date`. 23 | 24 | ```python 25 | import chrono 26 | 27 | chrono.parse('12 June') 28 | # return an array of [chrono.ParsedResult] 29 | # [] 30 | 31 | chrono.parse_date('12 June') 32 | # return a Python's standard datetime.datetime 33 | # datetime.datetime(2014, 6, 12, 12, 0) 34 | ``` 35 | -------------------------------------------------------------------------------- /chrono/refiners/en/remove_overlap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | from ..refiner import Refiner 5 | from ..refiner import ParsedResult 6 | from ..refiner import ParsedComponent 7 | 8 | 9 | class ENRemoveOverlapRefiner(Refiner): 10 | def refine(self, results, text, options): 11 | 12 | if len(results) < 2: return results 13 | 14 | filtered_results = [] 15 | prev_result = results[0] 16 | 17 | for result in results[1:]: 18 | 19 | # If overlap, compare the length and discard the shorter one 20 | if result.index < prev_result.index + len(prev_result.text): 21 | if len(result.text) > len(prev_result.text): 22 | prev_result = result 23 | else: 24 | filtered_results.append(prev_result) 25 | prev_result = result 26 | 27 | # The last one 28 | if prev_result: 29 | filtered_results.append(prev_result) 30 | 31 | return filtered_results 32 | -------------------------------------------------------------------------------- /chrono/parsers/en/iso_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | import re 5 | from .util import date_exist 6 | 7 | from ..parser import Parser 8 | from ..parser import ParsedResult 9 | from ..parser import ParsedComponent 10 | 11 | from datetime import datetime 12 | 13 | 14 | class ENInternationalStandardParser(Parser): 15 | def pattern(self): 16 | return '(^|\W)([0-9]{4})\-([0-9]{1,2})\-([0-9]{1,2})(\W|T|$)' 17 | 18 | def extract(self, text, ref_date, match, options): 19 | 20 | text = match.group(0) 21 | text = text[len(match.groups()[0]):len(text) - len(match.groups()[-1])] 22 | 23 | year = int(match.group(2)) 24 | month = int(match.group(3)) 25 | day = int(match.group(4)) 26 | 27 | if not date_exist(year, month, day): return None 28 | 29 | result = ParsedResult() 30 | result.index = match.start() + len(match.groups()[0]) 31 | result.text = text 32 | result.start = ParsedComponent(year=year, month=month, day=day) 33 | 34 | return result 35 | -------------------------------------------------------------------------------- /watch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import os 3 | import sys 4 | import time 5 | import unittest 6 | import subprocess 7 | from watchdog.observers import Observer 8 | from watchdog.events import PatternMatchingEventHandler 9 | 10 | current_dir = os.path.dirname(os.path.realpath(__file__)) 11 | 12 | 13 | class SrcFileEventHandler(PatternMatchingEventHandler): 14 | 15 | patterns = ["*.py"] 16 | 17 | def run_test(self): 18 | print subprocess.check_output(["python", current_dir + "/test.py"] + 19 | sys.argv[1:]) 20 | 21 | def on_modified(self, event): 22 | self.run_test() 23 | 24 | def on_created(self, event): 25 | self.run_test() 26 | 27 | 28 | if __name__ == '__main__': 29 | 30 | handler = SrcFileEventHandler() 31 | 32 | observer = Observer() 33 | observer.schedule(handler, current_dir, recursive=True) 34 | observer.start() 35 | 36 | try: 37 | while True: 38 | time.sleep(1) 39 | except KeyboardInterrupt: 40 | observer.stop() 41 | observer.join() 42 | -------------------------------------------------------------------------------- /chrono/parsers/parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | import re 5 | from datetime import datetime 6 | 7 | from ..parsed_result import ParsedResult 8 | from ..parsed_result import ParsedComponent 9 | 10 | 11 | class Parser(object): 12 | def pattern(self): 13 | return '$' 14 | 15 | def extract(self, text, ref_date, match, options): 16 | return None 17 | 18 | def execute(self, text, ref_date, options): 19 | 20 | results = [] 21 | pattern = re.compile(self.pattern(), re.IGNORECASE | re.UNICODE) 22 | 23 | offset = 0 24 | 25 | while offset < len(text): 26 | 27 | result = None 28 | match = pattern.search(text, offset) 29 | 30 | if match is None: 31 | return results 32 | 33 | result = self.extract(text, ref_date, match, options) 34 | 35 | if result: 36 | results.append(result) 37 | offset += result.index + len(result.text) 38 | else: 39 | offset += 1 40 | 41 | return results 42 | -------------------------------------------------------------------------------- /tests/en/test_format_iso.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import chrono 4 | 5 | from datetime import datetime 6 | 7 | 8 | class ISOFormatTest(unittest.TestCase): 9 | def setUp(self): 10 | pass 11 | 12 | def test_iso_format(self): 13 | 14 | results = chrono.parse('Test : 2013-3-22', datetime(2013, 3, 22)) 15 | self.assertEqual(len(results), 1) 16 | 17 | result = results[0] 18 | self.assertEqual(result.index, 7) 19 | self.assertEqual(result.text, '2013-3-22') 20 | self.assertEqual(result.start.get('day'), 22) 21 | self.assertEqual(result.start.get('month'), 3) 22 | self.assertEqual(result.start.get('year'), 2013) 23 | self.assertEqual(result.start.date(), datetime(2013, 3, 22, 12)) 24 | 25 | def test_iso_format_with_imposible_date(self): 26 | 27 | results = chrono.parse("2013-8-32") 28 | self.assertEquals(len(results), 0) 29 | 30 | results = chrono.parse("2014-8-32") 31 | self.assertEquals(len(results), 0) 32 | 33 | results = chrono.parse("2014-2-29") 34 | self.assertEquals(len(results), 0) 35 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2013, Wanasit Tanakitrungruang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/en/test_random_0.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import chrono 4 | 5 | from datetime import datetime 6 | 7 | 8 | class RandomTest1(unittest.TestCase): 9 | def setUp(self): 10 | pass 11 | 12 | def test_en_random_parsing_1(self): 13 | 14 | results = chrono.parse( 15 | """A Wiki is a website which is editable over the web by it's users. 16 | This allows information to be more rapidly updated than traditional websites. 17 | Many Apache projects make active use of wikis for community support and for extra project information, 18 | in addition to their main project websites. 19 | This General Wiki is a top-level overview of other wikis at the Apache Software Foundation, 20 | as well as overall Foundation-level information, at the bottom of this page.""" 21 | ) 22 | 23 | self.assertEqual(len(results), 0) 24 | 25 | def test_en_random_parsing_1(self): 26 | 27 | results = chrono.parse( 28 | """An event on February 24, 2013, and another event on March 1, 2013""" 29 | ) 30 | 31 | self.assertEqual(len(results), 2) 32 | self.assertEqual(results[0].text, 'February 24, 2013') 33 | self.assertEqual(results[1].text, 'March 1, 2013') 34 | -------------------------------------------------------------------------------- /chrono/options.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | #from parsers.en import ENGeneralDateParser 5 | from .parsers.en import ENInternationalStandardParser 6 | from .parsers.en import ENMonthNameLittleEndianParser 7 | from .parsers.en import ENMonthNameMiddleEndianParser 8 | from .parsers.en import ENSlashDateFormatParser 9 | from .parsers.en import ENTimeExpressionParser 10 | 11 | from .parsers.jp import JPStandartDateFormatParser 12 | 13 | from .refiners.en import ENRemoveOverlapRefiner 14 | from .refiners.en import ENMergeDateTimeRefiner 15 | from .refiners.en import ENMergeDateRangeRefiner 16 | 17 | class Options(): 18 | 19 | def __init__(self): 20 | self.parsers = [] 21 | self.refiners = [] 22 | 23 | 24 | def standard_options(): 25 | 26 | options = Options() 27 | options.parsers.append(ENInternationalStandardParser()) 28 | options.parsers.append(ENMonthNameLittleEndianParser()) 29 | options.parsers.append(ENMonthNameMiddleEndianParser()) 30 | options.parsers.append(ENSlashDateFormatParser()) 31 | options.parsers.append(ENTimeExpressionParser()) 32 | options.parsers.append(JPStandartDateFormatParser()) 33 | 34 | options.refiners.append(ENRemoveOverlapRefiner()) 35 | options.refiners.append(ENMergeDateTimeRefiner()) 36 | options.refiners.append(ENMergeDateRangeRefiner()) 37 | 38 | return options 39 | -------------------------------------------------------------------------------- /chrono/chrono.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | from datetime import datetime 5 | 6 | from . import options 7 | 8 | from .parsed_result import ParsedResult 9 | from .parsed_result import ParsedComponent 10 | 11 | 12 | class Chrono: 13 | def __init__(self, options): 14 | self.options = options 15 | self.parsers = options.parsers[:] 16 | self.refiners = options.refiners[:] 17 | 18 | def parse(self, text, ref_date, options): 19 | 20 | if ref_date is None: ref_date = datetime.now() 21 | 22 | results = [] 23 | for parser in self.parsers: 24 | sub_results = parser.execute(text, ref_date, options) 25 | sub_results = self.refine_results(sub_results, text, options) 26 | results += sub_results 27 | 28 | results = sorted(results, key=lambda x: x.index) 29 | results = self.refine_results(results, text, options) 30 | return results 31 | 32 | def refine_results(self, results, text, options): 33 | 34 | for refiner in self.refiners: 35 | results = refiner.refine(results, text, options) 36 | 37 | return results 38 | 39 | 40 | shared_instance = Chrono(options.standard_options()) 41 | 42 | 43 | def parse(text, ref_date=None, options=None): 44 | results = shared_instance.parse(text, ref_date, options) 45 | return results 46 | 47 | 48 | def parse_date(text, ref_date=None, timezone=None): 49 | 50 | results = shared_instance.parse(text, ref_date, options) 51 | 52 | if len(results) == 0: return None 53 | return results[0].start.date() 54 | -------------------------------------------------------------------------------- /tests/test_basic_operation.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from datetime import datetime 3 | 4 | import chrono 5 | 6 | from chrono import Parser 7 | from chrono import ParsedResult 8 | from chrono import options 9 | from datetime import datetime 10 | 11 | 12 | class TestBesicOperations(unittest.TestCase): 13 | def setUp(self): 14 | pass 15 | 16 | def test_basic_0_plain_parser(self): 17 | 18 | parser = Parser() 19 | results = parser.execute('Hello World', datetime.now(), {}) 20 | self.assertEqual(results, []) 21 | 22 | def test_basic_1_exmple_parser(self): 23 | 24 | parser = options.ENInternationalStandardParser() 25 | results = parser.execute('Hello World', datetime.now(), {}) 26 | self.assertEqual(results, []) 27 | 28 | results = parser.execute('Test : 2013-2-27', datetime.now(), {}) 29 | self.assertGreater(len(results), 0) 30 | 31 | result = results[0] 32 | self.assertEqual(result.index, 7) 33 | self.assertEqual(result.text, '2013-2-27') 34 | 35 | def test_basic_2_chorono_functions(self): 36 | 37 | results = chrono.parse('Hello World') 38 | self.assertEqual(len(results), 0) 39 | 40 | result = chrono.parse_date('Hello World') 41 | self.assertEqual(result, None) 42 | 43 | results = chrono.parse('Test : 2013-2-27') 44 | self.assertEqual(len(results), 1) 45 | 46 | result = results[0] 47 | self.assertEqual(result.index, 7) 48 | self.assertEqual(result.text, '2013-2-27') 49 | 50 | 51 | if __name__ == '__main__': 52 | unittest.main() 53 | -------------------------------------------------------------------------------- /chrono/parsers/en/slash_format.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | import re 5 | from .util import date_exist 6 | 7 | from ..parser import Parser 8 | from ..parser import ParsedResult 9 | from ..parser import ParsedComponent 10 | 11 | from datetime import datetime 12 | 13 | 14 | class ENSlashDateFormatParser(Parser): 15 | def pattern(self): 16 | return '((?:\W|^)(Sun|Sunday|Mon|Monday|Tue|Tuesday|Wed|Wednesday|Thur|Thursday|Fri|Friday|Sat|Saturday)?\s*\,?\s*)([0-9]{1,2})/([0-9]{1,2})(/([0-9]{4}|[0-9]{2}))?(\W|$)' 17 | 18 | def extract(self, text, ref_date, match, options): 19 | 20 | text = match.group(0) 21 | text = text[len(match.groups()[0]):len(text) - len(match.groups()[-1])] 22 | 23 | year = ref_date.year 24 | month = int(match.group(3)) 25 | day = int(match.group(4)) 26 | 27 | if month < 1 or month > 12: return None 28 | if day < 1 or day > 31: return None 29 | 30 | if match.group(6): 31 | year = int(match.group(6)) 32 | if year < 100: 33 | if year > 50: 34 | year = year + 2500 - 543 #BE 35 | else: 36 | year = year + 2000 37 | 38 | elif year > 2500: 39 | year = year - 543 #BE 40 | 41 | if not date_exist(year, month, day): return None 42 | 43 | result = ParsedResult() 44 | result.index = match.start() + len(match.groups()[0]) 45 | result.text = text 46 | result.start = ParsedComponent(year=year, month=month, day=day) 47 | 48 | return result -------------------------------------------------------------------------------- /tests/jp/test_format_standard.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | import unittest 3 | import chrono 4 | 5 | from datetime import datetime 6 | 7 | 8 | class StandardDateFormatTest(unittest.TestCase): 9 | def setUp(self): 10 | pass 11 | 12 | def test_standard_parsing(self): 13 | 14 | results = chrono.parse("初めて動画が投稿されたのは 4月23日である", datetime(2012, 8, 10)) 15 | self.assertEqual(len(results), 1) 16 | 17 | result = results[0] 18 | self.assertEqual(result.index, len('初めて動画が投稿されたのは ')) 19 | self.assertEqual(result.text, '4月23日') 20 | self.assertEqual(result.start.get('day'), 23) 21 | self.assertEqual(result.start.get('month'), 4) 22 | self.assertEqual(result.start.get('year'), 2012) 23 | self.assertEqual(result.start.date(), datetime(2012, 4, 23, 12)) 24 | 25 | results = chrono.parse("主な株主(2012年9月3日現在)", datetime(2012, 8, 10)) 26 | self.assertEqual(len(results), 1) 27 | 28 | result = results[0] 29 | self.assertEqual(result.index, len('主な株主(')) 30 | self.assertEqual(result.text, '2012年9月3日') 31 | self.assertEqual(result.start.get('day'), 3) 32 | self.assertEqual(result.start.get('month'), 9) 33 | self.assertEqual(result.start.get('year'), 2012) 34 | self.assertEqual(result.start.date(), datetime(2012, 9, 3, 12)) 35 | 36 | results = chrono.parse("主な株主(2013年9月13日現在)", datetime(2012, 8, 10)) 37 | self.assertEqual(len(results), 1) 38 | 39 | result = results[0] 40 | self.assertEqual(result.index, len('主な株主(')) 41 | self.assertEqual(result.text, '2013年9月13日') 42 | self.assertEqual(result.start.get('day'), 13) 43 | self.assertEqual(result.start.get('month'), 9) 44 | self.assertEqual(result.start.get('year'), 2013) 45 | self.assertEqual(result.start.date(), datetime(2013, 9, 13, 12)) 46 | -------------------------------------------------------------------------------- /chrono/parsers/en/util.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | import re 5 | import datetime 6 | 7 | MONTH_NAMES = { 8 | "january": 1, 9 | "jan": 1, 10 | "february": 2, 11 | "feb": 2, 12 | "march": 3, 13 | "mar": 3, 14 | "april": 4, 15 | "apr": 4, 16 | "may": 5, 17 | "june": 6, 18 | "jun": 6, 19 | "july": 7, 20 | "jul": 7, 21 | "august": 8, 22 | "aug": 8, 23 | "september": 9, 24 | "sep": 9, 25 | "october": 10, 26 | "oct": 10, 27 | "november": 11, 28 | "nov": 11, 29 | "december": 12, 30 | "dec": 12 31 | } 32 | 33 | 34 | def month_index(month_name): 35 | return MONTH_NAMES[month_name.lower()] 36 | 37 | 38 | def date_exist(year, month, day): 39 | try: 40 | return datetime.date(year, month, day).day == day 41 | except ValueError: 42 | return False 43 | 44 | 45 | def find_closest_year(ref_date, month, day): 46 | 47 | year = None 48 | 49 | if date_exist(ref_date.year, month, day): 50 | year = ref_date.year 51 | 52 | if date_exist(ref_date.year - 1, month, day): 53 | if year: 54 | tdelta1 = datetime.datetime(year, month, day) - ref_date 55 | tdelta2 = datetime.datetime(ref_date.year - 1, month, 56 | day) - ref_date 57 | if abs(tdelta2) < abs(tdelta1): 58 | year = ref_date.year - 1 59 | else: 60 | year = ref_date.year - 1 61 | 62 | if date_exist(ref_date.year + 1, month, day): 63 | if year: 64 | tdelta1 = datetime.datetime(year, month, day) - ref_date 65 | tdelta2 = datetime.datetime(ref_date.year + 1, month, 66 | day) - ref_date 67 | if abs(tdelta2) < abs(tdelta1): 68 | year = ref_date.year + 1 69 | else: 70 | year = ref_date.year + 1 71 | 72 | return year 73 | -------------------------------------------------------------------------------- /chrono/parsers/jp/util.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | import re 5 | import unicodedata 6 | import datetime 7 | 8 | MONTH_NAMES = { 9 | "january": 1, 10 | "jan": 1, 11 | "february": 2, 12 | "feb": 2, 13 | "march": 3, 14 | "mar": 3, 15 | "april": 4, 16 | "apr": 4, 17 | "may": 5, 18 | "june": 6, 19 | "jun": 6, 20 | "july": 7, 21 | "jul": 7, 22 | "august": 8, 23 | "aug": 8, 24 | "september": 9, 25 | "sep": 9, 26 | "october": 10, 27 | "oct": 10, 28 | "november": 11, 29 | "nov": 11, 30 | "december": 12, 31 | "dec": 12 32 | } 33 | 34 | 35 | def normalize(text): 36 | return unicodedata.normalize('NFKC', text) 37 | 38 | 39 | def month_index(month_name): 40 | return MONTH_NAMES[month_name.lower()] 41 | 42 | 43 | def date_exist(year, month, day): 44 | try: 45 | return datetime.date(year, month, day).day == day 46 | except ValueError: 47 | return False 48 | 49 | 50 | def find_closest_year(ref_date, month, day): 51 | 52 | year = None 53 | 54 | if date_exist(ref_date.year, month, day): 55 | year = ref_date.year 56 | 57 | if date_exist(ref_date.year - 1, month, day): 58 | if year: 59 | tdelta1 = datetime.datetime(year, month, day) - ref_date 60 | tdelta2 = datetime.datetime(ref_date.year - 1, month, 61 | day) - ref_date 62 | if abs(tdelta2) < abs(tdelta1): 63 | year = ref_date.year - 1 64 | else: 65 | year = ref_date.year - 1 66 | 67 | if date_exist(ref_date.year + 1, month, day): 68 | if year: 69 | tdelta1 = datetime.datetime(year, month, day) - ref_date 70 | tdelta2 = datetime.datetime(ref_date.year + 1, month, 71 | day) - ref_date 72 | if abs(tdelta2) < abs(tdelta1): 73 | year = ref_date.year + 1 74 | else: 75 | year = ref_date.year + 1 76 | 77 | return year 78 | -------------------------------------------------------------------------------- /chrono/parsers/en/month_name_little_endian.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | import re 5 | 6 | from ..parser import Parser 7 | from ..parser import ParsedResult 8 | from ..parser import ParsedComponent 9 | 10 | from datetime import datetime 11 | from .util import month_index 12 | from .util import date_exist 13 | from .util import find_closest_year 14 | 15 | 16 | class ENMonthNameLittleEndianParser(Parser): 17 | def pattern(self): 18 | return '(\W|^)((Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sun|Mon|Tue|Wed|Thu|Fri|Sat)\s*,?\s*)?([0-9]{1,2})(st|nd|rd|th)?(\s*(to|\-|\s)\s*([0-9]{1,2})(st|nd|rd|th)?)?\s*(January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|July|Jul|August|Aug|September|Sep|October|Oct|November|Nov|December|Dec)((\s*[0-9]{2,4})(\s*BE)?)?(\W|$)' 19 | 20 | def extract(self, text, ref_date, match, options): 21 | 22 | text = match.group(0) 23 | text = text[len(match.groups()[0]):len(text) - len(match.groups()[-1])] 24 | 25 | month = month_index(match.group(10)) 26 | day = int(match.group(4)) 27 | 28 | year = None 29 | if match.group(11): 30 | year = int(match.group(12)) 31 | 32 | if year < 100: 33 | year = year + 2000 34 | elif match.group(13): 35 | year = year - 543 36 | 37 | result = ParsedResult() 38 | result.index = match.start() + len(match.groups()[0]) 39 | result.text = text 40 | result.start = ParsedComponent(month=month, day=day) 41 | 42 | if year: 43 | if not date_exist(year, month, day): return None 44 | result.start.assign('year', year) 45 | else: 46 | year = find_closest_year(ref_date=ref_date, month=month, day=day) 47 | if year is None: return None 48 | 49 | result.start.imply('year', year) 50 | 51 | if match.group(8): 52 | endDay = int(match.group(8)) 53 | result.end = ParsedComponent(year=year, month=month, day=endDay) 54 | 55 | return result 56 | -------------------------------------------------------------------------------- /chrono/parsers/jp/standard_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | import re 5 | import unicodedata 6 | from ..parser import Parser 7 | from ..parser import ParsedResult 8 | from ..parser import ParsedComponent 9 | 10 | from datetime import datetime 11 | from .util import date_exist 12 | from .util import find_closest_year 13 | from .util import normalize 14 | 15 | 16 | class JPStandartDateFormatParser(Parser): 17 | def pattern(self): 18 | return '(((平成|昭和)?([0-9]{2,4}|[0-9]{6,12})年|今年|去年|来年)|[^年]|^)([0-9]{1,2}|[0-9]{3,6}|今|先|来)月([0-9]{1,2}|[0-9]{3,6})日\s*(?:\((?:日|月|火|水|木|金|土)\))?' 19 | 20 | def extract(self, text, ref_date, match, options): 21 | 22 | result = ParsedResult() 23 | result.index = match.start() 24 | result.text = match.group(0) 25 | 26 | day = int(normalize(match.group(6))) 27 | month = ref_date.month 28 | if match.group(5) == '先': 29 | month -= 1 30 | elif match.group(5) == '来': 31 | month += 1 32 | elif match.group(5) != '今': 33 | month = int(normalize(match.group(5))) 34 | 35 | year = None 36 | if match.group(4): 37 | year = int(normalize(match.group(4))) 38 | 39 | if match.group(3) == '平成': 40 | year += 1989 41 | elif match.group(3) == '昭和': 42 | year += 1926 43 | else: 44 | 45 | if match.group(5) == '今年': 46 | year = ref_date.year 47 | elif match.group(5) == '去年': 48 | year = ref_date.year - 1 49 | elif match.group(5) == '来年': 50 | year = ref_date.year + 1 51 | else: 52 | result.index += len(match.group(1)) 53 | result.text = result.text[len(match.group(1)):] 54 | 55 | result.start = ParsedComponent(month=month, day=day) 56 | if year: 57 | if not date_exist(year, month, day): return None 58 | result.start.assign('year', year) 59 | else: 60 | year = find_closest_year(ref_date=ref_date, month=month, day=day) 61 | if year is None: return None 62 | 63 | result.start.imply('year', year) 64 | 65 | return result 66 | -------------------------------------------------------------------------------- /chrono/refiners/en/merge_date_range.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | import re 4 | 5 | from ..refiner import Refiner 6 | from ..refiner import ParsedResult 7 | from ..refiner import ParsedComponent 8 | 9 | 10 | class ENMergeDateRangeRefiner(Refiner): 11 | def refine(self, results, text, options): 12 | if len(results) < 2: return results 13 | 14 | merged_results = [] 15 | prev_result = None 16 | curr_result = None 17 | i = 1 18 | 19 | while i < len(results): 20 | prev_result = results[i - 1] 21 | curr_result = results[i] 22 | 23 | if prev_result.end is None and curr_result.end is None and is_able_to_merge( 24 | text, prev_result, curr_result): 25 | prev_result = merge_result(text, prev_result, curr_result) 26 | curr_result = None 27 | i += 1 28 | 29 | merged_results.append(prev_result) 30 | i += 1 31 | 32 | if curr_result: 33 | merged_results.append(curr_result) 34 | 35 | return merged_results 36 | 37 | 38 | def merge_result(text, from_result, to_result): 39 | 40 | to_component = to_result.start.copy() 41 | from_component = from_result.start.copy() 42 | 43 | for unknown in from_component.implied_values: 44 | if to_result.start.is_certain(unknown): 45 | from_component.imply(unknown, to_result.start.get(unknown)) 46 | 47 | for unknown in to_component.implied_values: 48 | if from_result.start.is_certain(unknown): 49 | to_component.imply(unknown, from_result.start.get(unknown)) 50 | 51 | if from_component.date() > to_component.date(): 52 | from_component, to_component = to_component, from_component 53 | 54 | result = from_result.copy() 55 | result.start = from_component 56 | result.end = to_component 57 | 58 | begin_index = min(from_result.index, to_result.index) 59 | end_index = max(from_result.index + len(from_result.text), 60 | to_result.index + len(to_result.text)) 61 | result.index = begin_index 62 | result.text = text[begin_index:end_index] 63 | 64 | return result 65 | 66 | 67 | def is_able_to_merge(text, result1, result2): 68 | pattern = re.compile("^\s*(and|to|-|ー)?\s*$", re.IGNORECASE) 69 | text_between = text[result1.index + len(result1.text):result2.index] 70 | return pattern.match(text_between) 71 | -------------------------------------------------------------------------------- /chrono/parsers/en/month_name_middle_endian.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | import re 5 | 6 | from ..parser import Parser 7 | from ..parser import ParsedResult 8 | from ..parser import ParsedComponent 9 | 10 | from datetime import datetime 11 | from .util import month_index 12 | from .util import date_exist 13 | from .util import find_closest_year 14 | 15 | FULL_PATTERN = "(\W|^)((Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sun|Mon|Tue|Wed|Thu|Fri|Sat)\s*,?\s*)?(Jan|January|Feb|February|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|August|Sep|September|Oct|October|Nov|November|Dec|December)\s*(([0-9]{1,2})(st|nd|rd|th)?\s*(to|\-)\s*)?([0-9]{1,2})(st|nd|rd|th)?(,)?(\s*[0-9]{4})(\s*BE)?(\W|$)" 16 | SHORT_PATTERN = "(\W|^)((Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sun|Mon|Tue|Wed|Thu|Fri|Sat)\s*,?\s*)?(Jan|January|Feb|February|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|August|Sep|September|Oct|October|Nov|November|Dec|December)\s*(([0-9]{1,2})(st|nd|rd|th)?\s*(to|\-)\s*)?([0-9]{1,2})(st|nd|rd|th)?([^0-9]|$)" 17 | 18 | 19 | class ENMonthNameMiddleEndianParser(Parser): 20 | def pattern(self): 21 | return SHORT_PATTERN 22 | 23 | def extract(self, text, ref_date, match, options): 24 | 25 | index = match.start() 26 | month = month_index(match.group(4)) 27 | day = int(match.group(9)) 28 | year = None 29 | 30 | pattern = re.compile(FULL_PATTERN, re.IGNORECASE) 31 | if pattern.match(text[match.start():]): 32 | match = pattern.match(text[match.start():]) 33 | year = int(match.group(12)) 34 | 35 | if match.group(13): 36 | year -= 543 37 | 38 | text = match.group(0) 39 | text = text[len(match.groups()[0]):len(text) - len(match.groups()[-1])] 40 | 41 | result = ParsedResult() 42 | result.index = index + len(match.groups()[0]) 43 | result.text = text 44 | result.start = ParsedComponent(month=month, day=day) 45 | 46 | if year: 47 | if not date_exist(year, month, day): return None 48 | result.start.assign('year', year) 49 | else: 50 | year = find_closest_year(ref_date=ref_date, month=month, day=day) 51 | if year is None: return None 52 | 53 | result.start.imply('year', year) 54 | 55 | if match.group(5): 56 | 57 | start_day = int(match.group(6)) 58 | result.end = result.start.copy() 59 | result.start.assign('day', start_day) 60 | 61 | return result 62 | -------------------------------------------------------------------------------- /chrono/parsed_result.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | from datetime import datetime 5 | 6 | 7 | class ParsedComponent(): 8 | def __init__(self, 9 | year=None, 10 | month=None, 11 | day=None, 12 | hour=None, 13 | minute=None, 14 | second=None): 15 | self.known_values = {} 16 | self.implied_values = {} 17 | 18 | if year: self.assign('year', year) 19 | if month: self.assign('month', month) 20 | if day: self.assign('day', day) 21 | 22 | self.imply('hour', 12) 23 | self.imply('minute', 0) 24 | self.imply('second', 0) 25 | 26 | def date(self): 27 | 28 | date = datetime.now() 29 | 30 | year = self.get('year') 31 | month = self.get('month') 32 | day = self.get('day') 33 | hour = self.get('hour') 34 | minute = self.get('minute') 35 | second = self.get('second') 36 | 37 | return date.replace(year, month, day, hour, minute, second, 0) 38 | 39 | def is_certain(self, component): 40 | return component in self.known_values 41 | 42 | def assign(self, component, value): 43 | if component in self.implied_values: 44 | del self.implied_values[component] 45 | self.known_values[component] = value 46 | 47 | def imply(self, component, value): 48 | self.implied_values[component] = value 49 | 50 | def get(self, component): 51 | if component in self.known_values: 52 | return self.known_values[component] 53 | if component in self.implied_values: 54 | return self.implied_values[component] 55 | 56 | def copy(self): 57 | other = ParsedComponent() 58 | other.known_values = self.known_values.copy() 59 | other.implied_values = self.implied_values.copy() 60 | return other 61 | 62 | 63 | class ParsedResult(): 64 | def __init__(self): 65 | self.index = 0 66 | self.text = None 67 | self.start = None 68 | self.end = None 69 | 70 | def __repr__(self): 71 | return self.__str__() 72 | 73 | def __str__(self): 74 | 75 | if self.end is None: 76 | return ''.format( 77 | self.text, self.start.date()) 78 | 79 | return ''.format( 80 | self.text, self.start.date(), self.end.date()) 81 | 82 | def copy(self): 83 | other = ParsedResult() 84 | other.index = self.index 85 | other.text = self.text 86 | 87 | if self.start: 88 | other.start = self.start.copy() 89 | 90 | if self.start: 91 | other.start = self.start.copy() 92 | 93 | return other 94 | -------------------------------------------------------------------------------- /tests/en/test_format_middle_endian.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import chrono 4 | 5 | from datetime import datetime 6 | 7 | 8 | class MiddleEndianFormatTest(unittest.TestCase): 9 | def setUp(self): 10 | pass 11 | 12 | def test_middle_endian(self): 13 | 14 | results = chrono.parse('Test : March 24, 2013') 15 | self.assertEqual(len(results), 1) 16 | 17 | result = results[0] 18 | self.assertEqual(result.index, 7) 19 | self.assertEqual(result.text, 'March 24, 2013') 20 | self.assertEqual(result.start.get('day'), 24) 21 | self.assertEqual(result.start.get('month'), 3) 22 | self.assertEqual(result.start.get('year'), 2013) 23 | self.assertEqual(result.start.date(), datetime(2013, 3, 24, 12)) 24 | 25 | results = chrono.parse('Test : mar 24 2013') 26 | self.assertEqual(len(results), 1) 27 | 28 | result = results[0] 29 | self.assertEqual(result.index, 7) 30 | self.assertEqual(result.text, 'mar 24 2013') 31 | self.assertEqual(result.start.get('day'), 24) 32 | self.assertEqual(result.start.get('month'), 3) 33 | self.assertEqual(result.start.get('year'), 2013) 34 | self.assertEqual(result.start.date(), datetime(2013, 3, 24, 12)) 35 | 36 | results = chrono.parse('Test : March 24, test', datetime(2000, 10, 1)) 37 | self.assertEqual(len(results), 1) 38 | 39 | result = results[0] 40 | self.assertEqual(result.index, 7) 41 | self.assertEqual(result.text, 'March 24') 42 | self.assertEqual(result.start.get('day'), 24) 43 | self.assertEqual(result.start.get('month'), 3) 44 | self.assertEqual(result.start.get('year'), 2001) 45 | self.assertEqual(result.start.date(), datetime(2001, 3, 24, 12)) 46 | 47 | def test_middle_endian_as_range(self): 48 | 49 | results = chrono.parse('Test : Mar 21 to 25, 2013') 50 | self.assertEqual(len(results), 1) 51 | 52 | result = results[0] 53 | self.assertEqual(result.index, 7) 54 | self.assertEqual(result.text, 'Mar 21 to 25, 2013') 55 | self.assertEqual(result.start.get('day'), 21) 56 | self.assertEqual(result.start.get('month'), 3) 57 | self.assertEqual(result.start.get('year'), 2013) 58 | self.assertEqual(result.start.date(), datetime(2013, 3, 21, 12)) 59 | self.assertEqual(result.end.get('day'), 25) 60 | self.assertEqual(result.end.get('month'), 3) 61 | self.assertEqual(result.end.get('year'), 2013) 62 | self.assertEqual(result.end.date(), datetime(2013, 3, 25, 12)) 63 | 64 | def test_middle_endian_with_imposible_date(self): 65 | results = chrono.parse("August 32") 66 | self.assertEquals(len(results), 0) 67 | 68 | results = chrono.parse("August 32, 2014") 69 | self.assertEquals(len(results), 0) 70 | 71 | results = chrono.parse("Feb 29, 2014") 72 | self.assertEquals(len(results), 0) 73 | -------------------------------------------------------------------------------- /chrono/refiners/en/merge_date_time.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | import re 4 | 5 | from ..refiner import Refiner 6 | from ..refiner import ParsedResult 7 | from ..refiner import ParsedComponent 8 | 9 | 10 | class ENMergeDateTimeRefiner(Refiner): 11 | def refine(self, results, text, options): 12 | 13 | if len(results) < 2: return results 14 | 15 | merged_results = [] 16 | prev_result = None 17 | curr_result = None 18 | i = 1 19 | 20 | while i < len(results): 21 | prev_result = results[i - 1] 22 | curr_result = results[i] 23 | 24 | if is_able_to_merge(text, prev_result, curr_result): 25 | 26 | if is_time_only(curr_result) and is_date_only(prev_result): 27 | prev_result = merge_result(text, prev_result, curr_result) 28 | curr_result = None 29 | i += 1 30 | 31 | elif is_time_only(prev_result) and is_date_only(curr_result): 32 | prev_result = merge_result(text, curr_result, prev_result) 33 | curr_result = None 34 | i += 1 35 | 36 | merged_results.append(prev_result) 37 | i += 1 38 | 39 | if curr_result: 40 | merged_results.append(curr_result) 41 | 42 | return merged_results 43 | 44 | 45 | def is_date_only(result): 46 | return not result.start.is_certain('hour') 47 | 48 | 49 | def is_time_only(result): 50 | return not result.start.is_certain('day') and not result.start.is_certain( 51 | 'day_of_week') 52 | 53 | 54 | def is_able_to_merge(text, result1, result2): 55 | pattern = re.compile("\s*(T|at|on|of|,)?\s*", re.IGNORECASE) 56 | text_between = text[result1.index + len(result1.text):result2.index] 57 | return pattern.match(text_between) 58 | 59 | 60 | def merge_result(text, date_result, time_result): 61 | result = ParsedResult() 62 | begin_index = min(date_result.index, time_result.index) 63 | end_index = max(date_result.index + len(date_result.text), 64 | time_result.index + len(time_result.text)) 65 | result.index = begin_index 66 | result.text = text[begin_index:end_index] 67 | 68 | result.start = date_result.start.copy() 69 | result.start.assign('hour', time_result.start.get('hour')) 70 | result.start.assign('minute', time_result.start.get('minute')) 71 | result.start.assign('second', time_result.start.get('second')) 72 | 73 | if time_result.end or date_result.end: 74 | time_result_end = time_result.end if time_result.end else time_result.start 75 | date_result_end = date_result.end if date_result.end else date_result.start 76 | 77 | result.end = date_result_end.copy() 78 | result.end.assign('hour', time_result_end.get('hour')) 79 | result.end.assign('minute', time_result_end.get('minute')) 80 | result.end.assign('second', time_result_end.get('second')) 81 | 82 | return result 83 | -------------------------------------------------------------------------------- /tests/en/test_format_slash.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import chrono 4 | 5 | from datetime import datetime 6 | 7 | 8 | class SlashFormatTest(unittest.TestCase): 9 | def setUp(self): 10 | pass 11 | 12 | def test_slash_format(self): 13 | 14 | results = chrono.parse('Test : 2/27/2013') 15 | self.assertEqual(len(results), 1) 16 | 17 | result = results[0] 18 | self.assertEqual(result.index, 7) 19 | self.assertEqual(result.text, '2/27/2013') 20 | self.assertEqual(result.start.get('day'), 27) 21 | self.assertEqual(result.start.get('month'), 2) 22 | self.assertEqual(result.start.get('year'), 2013) 23 | self.assertEqual(result.start.date(), datetime(2013, 2, 27, 12)) 24 | 25 | # short 26 | results = chrono.parse('Test : 2/27/13') 27 | self.assertEqual(len(results), 1) 28 | 29 | result = results[0] 30 | self.assertEqual(result.index, 7) 31 | self.assertEqual(result.text, '2/27/13') 32 | self.assertEqual(result.start.get('day'), 27) 33 | self.assertEqual(result.start.get('month'), 2) 34 | self.assertEqual(result.start.get('year'), 2013) 35 | self.assertEqual(result.start.date(), datetime(2013, 2, 27, 12)) 36 | 37 | def test_slash_format_bc(self): 38 | 39 | # BC years 40 | results = chrono.parse('Test : 2/27/2556') 41 | self.assertEqual(len(results), 1) 42 | 43 | result = results[0] 44 | self.assertEqual(result.index, 7) 45 | self.assertEqual(result.text, '2/27/2556') 46 | self.assertEqual(result.start.get('day'), 27) 47 | self.assertEqual(result.start.get('month'), 2) 48 | self.assertEqual(result.start.get('year'), 2013) 49 | self.assertEqual(result.start.date(), datetime(2013, 2, 27, 12)) 50 | 51 | # BC years short 52 | results = chrono.parse('Test : 2/27/56') 53 | self.assertEqual(len(results), 1) 54 | 55 | result = results[0] 56 | self.assertEqual(result.index, 7) 57 | self.assertEqual(result.text, '2/27/56') 58 | self.assertEqual(result.start.get('day'), 27) 59 | self.assertEqual(result.start.get('month'), 2) 60 | self.assertEqual(result.start.get('year'), 2013) 61 | self.assertEqual(result.start.date(), datetime(2013, 2, 27, 12)) 62 | 63 | def test_slash_format_range(self): 64 | results = chrono.parse(' 5/1/2013 - 5/10/2013') 65 | self.assertEqual(len(results), 1) 66 | 67 | result = results[0] 68 | self.assertEqual(result.index, 1) 69 | self.assertEqual(result.text, '5/1/2013 - 5/10/2013') 70 | self.assertEqual(result.start.get('day'), 1) 71 | self.assertEqual(result.start.get('month'), 5) 72 | self.assertEqual(result.start.get('year'), 2013) 73 | self.assertEqual(result.start.date(), datetime(2013, 5, 1, 12)) 74 | 75 | self.assertEqual(result.end.get('day'), 10) 76 | self.assertEqual(result.end.get('month'), 5) 77 | self.assertEqual(result.end.get('year'), 2013) 78 | self.assertEqual(result.end.date(), datetime(2013, 5, 10, 12)) 79 | 80 | def test_slash_format_impossible(self): 81 | 82 | results = chrono.parse('Impossible 2/29/2013') 83 | self.assertEqual(len(results), 0) 84 | -------------------------------------------------------------------------------- /chrono/parsers/en/time_expression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf8 -*- 3 | 4 | import re 5 | 6 | from ..parser import Parser 7 | from ..parser import ParsedResult 8 | from ..parser import ParsedComponent 9 | 10 | from datetime import datetime 11 | 12 | FIRST_REG_PATTERN = "((at|from|\W|^|T)\s*)([0-9]{1,2}|noon|midnight)((\.|\:|\:)([0-9]{2})((\.|\:|\:)([0-9]{2}))?)?(\s*(AM|PM))?(\W|$)" 13 | SECOND_REG_PATTERN = "\s*(\-|\~|\〜|to|\?)\s*([0-9]{1,2})((\.|\:|\:)([0-9]{2})((\.|\:|\:)([0-9]{2}))?)?(\s*(AM|PM))?\)?" 14 | 15 | 16 | class ENTimeExpressionParser(Parser): 17 | def pattern(self): 18 | return FIRST_REG_PATTERN 19 | 20 | def extract(self, text, ref_date, match, options): 21 | 22 | result = ParsedResult() 23 | result.start = ParsedComponent() 24 | result.start.imply('year', ref_date.year) 25 | result.start.imply('month', ref_date.month) 26 | result.start.imply('day', ref_date.day) 27 | 28 | hour = 0 29 | minute = 0 30 | second = 0 31 | meridiem = None 32 | 33 | if match.group(3).lower() == "noon": 34 | meridiem = 'pm' 35 | hour = 12 36 | elif match.group(3).lower() == "midnight": 37 | meridiem = 'am' 38 | hour = 0 39 | else: 40 | hour = int(match.group(3)) 41 | 42 | if match.group(6): 43 | minute = int(match.group(6)) 44 | if (minute >= 60): return None 45 | elif hour > 100: 46 | minute = hour % 100 47 | hour = hour / 100 48 | 49 | if match.group(9): 50 | second = int(match.group(9)) 51 | if second >= 60: return None 52 | 53 | if match.group(11): 54 | if hour > 12: return None 55 | 56 | if match.group(11).lower() == 'am': 57 | meridiem = 'am' 58 | if hour == 12: 59 | hour = 0 60 | 61 | if match.group(11).lower() == "pm": 62 | meridiem = 'pm' 63 | if hour != 12: 64 | hour += 12 65 | 66 | if hour >= 24: return None 67 | if hour >= 12: meridiem = 'pm' 68 | 69 | result.text = match.group(0) 70 | result.text = result.text[len(match.groups()[0]):len(result.text) - 71 | len(match.groups()[-1])] 72 | result.index = match.start() + len(match.group(1)) 73 | 74 | result.start.assign('hour', hour) 75 | result.start.assign('minute', minute) 76 | result.start.assign('second', second) 77 | 78 | if meridiem: 79 | result.start.assign('meridiem', meridiem) 80 | 81 | second_pattern = re.compile(SECOND_REG_PATTERN, re.IGNORECASE) 82 | 83 | match = second_pattern.match(text[result.index + len(result.text):]) 84 | if not match: 85 | if re.match('^\d+$', result.text): return None 86 | return result 87 | 88 | hour = int(match.group(2)) 89 | minute = 0 90 | second = 0 91 | meridiem = None 92 | 93 | if match.group(5): 94 | 95 | minute = int(match.group(5)) 96 | if minute >= 60: return None 97 | 98 | elif hour > 100: 99 | minute = hour % 100 100 | hour = hour / 100 101 | 102 | if match.group(8): 103 | second = int(matcher.group(8)) 104 | if second >= 60: return None 105 | 106 | if match.group(10): 107 | 108 | if hour > 12: return None 109 | if match.group(10).lower() == "am": 110 | meridiem = 'am' 111 | if hour == 12: 112 | hour = 0 #!!!!! 113 | 114 | if match.group(10).lower() == "pm": 115 | meridiem = 'pm' 116 | if hour != 12: hour += 12 117 | 118 | if not result.start.is_certain('meridiem'): 119 | 120 | if meridiem == 'am': 121 | 122 | result.start.imply('meridiem', 'am') 123 | 124 | if result.start.get('hour') == 12: 125 | result.start.assign('hour', 0) 126 | 127 | if meridiem == 'pm': 128 | 129 | result.start.imply('meridiem', 'pm') 130 | 131 | if result.start.get('hour') != 12: 132 | result.start.assign('hour', 133 | result.start.get('hour') + 12) 134 | 135 | if hour >= 24: return None 136 | if hour >= 12: meridiem = 'pm' 137 | 138 | result.text = result.text + match.group() 139 | result.end = result.start.copy() 140 | 141 | result.end.assign('hour', hour) 142 | result.end.assign('minute', minute) 143 | result.end.assign('second', second) 144 | 145 | if meridiem: 146 | result.end.assign('meridiem', meridiem) 147 | 148 | return result 149 | -------------------------------------------------------------------------------- /tests/en/test_format_time_and_date.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import chrono 4 | 5 | from datetime import datetime 6 | 7 | 8 | class DateTimeParsingTest(unittest.TestCase): 9 | def setUp(self): 10 | pass 11 | 12 | def test_date_time_as_point(self): 13 | 14 | results = chrono.parse('Test : 2013-2-27T21:08:12') 15 | self.assertEqual(len(results), 1) 16 | 17 | result = results[0] 18 | self.assertEqual(result.index, 7) 19 | self.assertEqual(result.text, '2013-2-27T21:08:12') 20 | self.assertEqual(result.start.get('day'), 27) 21 | self.assertEqual(result.start.get('month'), 2) 22 | self.assertEqual(result.start.get('year'), 2013) 23 | self.assertEqual(result.start.get('hour'), 21) 24 | self.assertEqual(result.start.get('minute'), 8) 25 | self.assertEqual(result.start.get('second'), 12) 26 | self.assertEqual(result.start.date(), datetime(2013, 2, 27, 21, 8, 12)) 27 | 28 | results = chrono.parse('Test : 21:08:12 on 2013-2-27') 29 | 30 | self.assertEqual(len(results), 1) 31 | result = results[0] 32 | self.assertEqual(result.index, 7) 33 | self.assertEqual(result.text, '21:08:12 on 2013-2-27') 34 | self.assertEqual(result.start.get('day'), 27) 35 | self.assertEqual(result.start.get('month'), 2) 36 | self.assertEqual(result.start.get('year'), 2013) 37 | self.assertEqual(result.start.get('hour'), 21) 38 | self.assertEqual(result.start.get('minute'), 8) 39 | self.assertEqual(result.start.get('second'), 12) 40 | self.assertEqual(result.start.date(), datetime(2013, 2, 27, 21, 8, 12)) 41 | 42 | results = chrono.parse('Test : 2013-2-27 at 21:08:12') 43 | self.assertEqual(len(results), 1) 44 | result = results[0] 45 | self.assertEqual(result.index, 7) 46 | self.assertEqual(result.text, '2013-2-27 at 21:08:12') 47 | self.assertEqual(result.start.get('day'), 27) 48 | self.assertEqual(result.start.get('month'), 2) 49 | self.assertEqual(result.start.get('year'), 2013) 50 | self.assertEqual(result.start.get('hour'), 21) 51 | self.assertEqual(result.start.get('minute'), 8) 52 | self.assertEqual(result.start.get('second'), 12) 53 | self.assertEqual(result.start.date(), datetime(2013, 2, 27, 21, 8, 12)) 54 | 55 | results = chrono.parse('Test : 2013-2-27 on 9:08 PM') 56 | self.assertEqual(len(results), 1) 57 | result = results[0] 58 | self.assertEqual(result.index, 7) 59 | self.assertEqual(result.text, '2013-2-27 on 9:08 PM') 60 | self.assertEqual(result.start.get('day'), 27) 61 | self.assertEqual(result.start.get('month'), 2) 62 | self.assertEqual(result.start.get('year'), 2013) 63 | self.assertEqual(result.start.get('hour'), 21) 64 | self.assertEqual(result.start.get('minute'), 8) 65 | self.assertEqual(result.start.date(), datetime(2013, 2, 27, 21, 8)) 66 | 67 | def test_date_time_as_range(self): 68 | 69 | results = chrono.parse('Test : 2013-2-27 from 9:08 - 11.05 PM') 70 | self.assertEqual(len(results), 1) 71 | 72 | result = results[0] 73 | self.assertEqual(result.index, 7) 74 | self.assertEqual(result.text, '2013-2-27 from 9:08 - 11.05 PM') 75 | self.assertEqual(result.start.get('day'), 27) 76 | self.assertEqual(result.start.get('month'), 2) 77 | self.assertEqual(result.start.get('year'), 2013) 78 | self.assertEqual(result.start.get('hour'), 21) 79 | self.assertEqual(result.start.get('minute'), 8) 80 | self.assertEqual(result.start.date(), datetime(2013, 2, 27, 21, 8)) 81 | 82 | self.assertEqual(result.end.get('day'), 27) 83 | self.assertEqual(result.end.get('month'), 2) 84 | self.assertEqual(result.end.get('year'), 2013) 85 | self.assertEqual(result.end.get('hour'), 23) 86 | self.assertEqual(result.end.get('minute'), 5) 87 | self.assertEqual(result.end.date(), datetime(2013, 2, 27, 23, 5)) 88 | 89 | results = chrono.parse('Test : 2013-2-27 at 9:08 to 11.55') 90 | self.assertEqual(len(results), 1) 91 | 92 | result = results[0] 93 | self.assertEqual(result.index, 7) 94 | self.assertEqual(result.text, '2013-2-27 at 9:08 to 11.55') 95 | self.assertEqual(result.start.get('day'), 27) 96 | self.assertEqual(result.start.get('month'), 2) 97 | self.assertEqual(result.start.get('year'), 2013) 98 | self.assertEqual(result.start.get('hour'), 9) 99 | self.assertEqual(result.start.get('minute'), 8) 100 | self.assertEqual(result.start.date(), datetime(2013, 2, 27, 9, 8)) 101 | 102 | self.assertEqual(result.end.get('day'), 27) 103 | self.assertEqual(result.end.get('month'), 2) 104 | self.assertEqual(result.end.get('year'), 2013) 105 | self.assertEqual(result.end.get('hour'), 11) 106 | self.assertEqual(result.end.get('minute'), 55) 107 | self.assertEqual(result.end.date(), datetime(2013, 2, 27, 11, 55)) 108 | -------------------------------------------------------------------------------- /tests/en/test_format_little_endian.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import chrono 4 | 5 | from datetime import datetime 6 | 7 | 8 | class LittleEndianFormatTest(unittest.TestCase): 9 | def setUp(self): 10 | pass 11 | 12 | def test_little_endian(self): 13 | 14 | results = chrono.parse('Test : 24 March 2013') 15 | self.assertEqual(len(results), 1) 16 | 17 | result = results[0] 18 | self.assertEqual(result.index, 7) 19 | self.assertEqual(result.text, '24 March 2013') 20 | self.assertEqual(result.start.get('day'), 24) 21 | self.assertEqual(result.start.get('month'), 3) 22 | self.assertEqual(result.start.get('year'), 2013) 23 | self.assertEqual(result.start.date(), datetime(2013, 3, 24, 12)) 24 | 25 | results = chrono.parse('Test : 24 Mar 2013') 26 | self.assertEqual(len(results), 1) 27 | 28 | result = results[0] 29 | self.assertEqual(result.index, 7) 30 | self.assertEqual(result.text, '24 Mar 2013') 31 | self.assertEqual(result.start.get('day'), 24) 32 | self.assertEqual(result.start.get('month'), 3) 33 | self.assertEqual(result.start.get('year'), 2013) 34 | self.assertEqual(result.start.date(), datetime(2013, 3, 24, 12)) 35 | 36 | results = chrono.parse('Test : 24 mar 2013') 37 | self.assertEqual(len(results), 1) 38 | 39 | result = results[0] 40 | self.assertEqual(result.index, 7) 41 | self.assertEqual(result.text, '24 mar 2013') 42 | self.assertEqual(result.start.get('day'), 24) 43 | self.assertEqual(result.start.get('month'), 3) 44 | self.assertEqual(result.start.get('year'), 2013) 45 | self.assertEqual(result.start.date(), datetime(2013, 3, 24, 12)) 46 | 47 | results = chrono.parse('Test : 24 Mar', datetime(2012, 3, 22)) 48 | self.assertEqual(len(results), 1) 49 | 50 | result = results[0] 51 | self.assertEqual(result.index, 7) 52 | self.assertEqual(result.text, '24 Mar') 53 | self.assertEqual(result.start.get('day'), 24) 54 | self.assertEqual(result.start.get('month'), 3) 55 | self.assertEqual(result.start.get('year'), 2012) 56 | self.assertEqual(result.start.date(), datetime(2012, 3, 24, 12)) 57 | 58 | results = chrono.parse('Test : 24 March, test', datetime(2000, 10, 1)) 59 | self.assertEqual(len(results), 1) 60 | 61 | result = results[0] 62 | self.assertEqual(result.index, 7) 63 | self.assertEqual(result.text, '24 March') 64 | self.assertEqual(result.start.get('day'), 24) 65 | self.assertEqual(result.start.get('month'), 3) 66 | self.assertEqual(result.start.get('year'), 2001) 67 | self.assertEqual(result.start.date(), datetime(2001, 3, 24, 12)) 68 | 69 | def test_little_endian_range(self): 70 | 71 | results = chrono.parse('Test : 24 - 25 Mar', datetime(2012, 3, 22)) 72 | self.assertEqual(len(results), 1) 73 | 74 | result = results[0] 75 | self.assertEqual(result.index, 7) 76 | self.assertEqual(result.text, '24 - 25 Mar') 77 | self.assertEqual(result.start.get('day'), 24) 78 | self.assertEqual(result.start.get('month'), 3) 79 | self.assertEqual(result.start.get('year'), 2012) 80 | self.assertEqual(result.start.date(), datetime(2012, 3, 24, 12)) 81 | self.assertEqual(result.end.get('day'), 25) 82 | self.assertEqual(result.end.get('month'), 3) 83 | self.assertEqual(result.end.get('year'), 2012) 84 | self.assertEqual(result.end.date(), datetime(2012, 3, 25, 12)) 85 | 86 | results = chrono.parse('Test : 24 - 25 Mar 2014', datetime( 87 | 2012, 3, 22)) 88 | self.assertEqual(len(results), 1) 89 | 90 | result = results[0] 91 | self.assertEqual(result.index, 7) 92 | self.assertEqual(result.text, '24 - 25 Mar 2014') 93 | self.assertEqual(result.start.get('day'), 24) 94 | self.assertEqual(result.start.get('month'), 3) 95 | self.assertEqual(result.start.get('year'), 2014) 96 | self.assertEqual(result.start.date(), datetime(2014, 3, 24, 12)) 97 | self.assertEqual(result.end.get('day'), 25) 98 | self.assertEqual(result.end.get('month'), 3) 99 | self.assertEqual(result.end.get('year'), 2014) 100 | self.assertEqual(result.end.date(), datetime(2014, 3, 25, 12)) 101 | 102 | results = chrono.parse('Test : 24 Feb - 2 Mar 2014', 103 | datetime(2012, 3, 22)) 104 | self.assertEqual(len(results), 1) 105 | 106 | result = results[0] 107 | self.assertEqual(result.index, 7) 108 | self.assertEqual(result.text, '24 Feb - 2 Mar 2014') 109 | self.assertEqual(result.start.get('day'), 24) 110 | self.assertEqual(result.start.get('month'), 2) 111 | self.assertEqual(result.start.get('year'), 2014) 112 | self.assertEqual(result.start.date(), datetime(2014, 2, 24, 12)) 113 | self.assertEqual(result.end.get('day'), 2) 114 | self.assertEqual(result.end.get('month'), 3) 115 | self.assertEqual(result.end.get('year'), 2014) 116 | self.assertEqual(result.end.date(), datetime(2014, 3, 2, 12)) 117 | 118 | def test_little_endian_with_time(self): 119 | results = chrono.parse('Test : 2 Mar 2014 (10.00 - 11.00 AM)', 120 | datetime(2012, 3, 22)) 121 | self.assertEqual(len(results), 1) 122 | 123 | result = results[0] 124 | self.assertEqual(result.index, 7) 125 | self.assertEqual(result.text, '2 Mar 2014 (10.00 - 11.00 AM)') 126 | self.assertEqual(result.start.get('day'), 2) 127 | self.assertEqual(result.start.get('month'), 3) 128 | self.assertEqual(result.start.get('year'), 2014) 129 | self.assertEqual(result.start.date(), datetime(2014, 3, 2, 10)) 130 | self.assertEqual(result.end.get('day'), 2) 131 | self.assertEqual(result.end.get('month'), 3) 132 | self.assertEqual(result.end.get('year'), 2014) 133 | self.assertEqual(result.end.date(), datetime(2014, 3, 2, 11)) 134 | 135 | def test_little_endian_with_imposible_date(self): 136 | results = chrono.parse("32 August") 137 | self.assertEquals(len(results), 0) 138 | 139 | results = chrono.parse("32 August 2014") 140 | self.assertEquals(len(results), 0) 141 | 142 | results = chrono.parse("29 Feb 2014") 143 | self.assertEquals(len(results), 0) 144 | --------------------------------------------------------------------------------