├── .gitignore ├── .gitmodules ├── .travis.yml ├── LICENSE.md ├── README.md ├── bench.d ├── bench.sh ├── dub.json └── source └── dateparser ├── package.d ├── parseresult.d ├── parserinfo.d ├── splitter.d ├── timelexer.d └── ymd.d /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | .dub/ 3 | test.d 4 | main 5 | test 6 | parser 7 | dateparse 8 | package 9 | parseresult 10 | dateparser 11 | libdateparse.a 12 | libdateparser.a 13 | bench 14 | *.lst 15 | *.def 16 | *.log 17 | trace.html 18 | docdir/ 19 | gh-pages/ 20 | dub.selections.json 21 | *.sublime* 22 | *.ini 23 | docs.json 24 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "gh-pages/bootDoc"] 2 | path = gh-pages/bootDoc 3 | url = git://github.com/JakobOvrum/bootDoc.git 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | 3 | os: 4 | - linux 5 | 6 | language: d 7 | 8 | d: 9 | - dmd 10 | - ldc 11 | 12 | script: 13 | - dub test -b unittest-cov --compiler=${DC} 14 | after_success: 15 | - bash <(curl -s https://codecov.io/bash) 16 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | #date parser license 2 | 3 | Boost Software License - Version 1.0 - August 17th, 2003 4 | 5 | Permission is hereby granted, free of charge, to any person or organization 6 | obtaining a copy of the software and accompanying documentation covered by 7 | this license (the "Software") to use, reproduce, display, distribute, 8 | execute, and transmit the Software, and to prepare derivative works of the 9 | Software, and to permit third-parties to whom the Software is furnished to 10 | do so, all subject to the following: 11 | 12 | The copyright notices in the Software and this entire statement, including 13 | the above license grant, this restriction and the following disclaimer, 14 | must be included in all copies of the Software, in whole or in part, and 15 | all derivative works of the Software, unless such copies or derivative 16 | works are solely in the form of machine-executable object code generated by 17 | a source language processor. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 22 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 23 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 24 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | 27 | #dateutil license 28 | 29 | dateutil - Extensions to the standard Python datetime module. 30 | 31 | Copyright (c) 2003-2011 - Gustavo Niemeyer 32 | Copyright (c) 2012-2014 - Tomi Pieviläinen 33 | Copyright (c) 2014 - Yaron de Leeuw 34 | 35 | All rights reserved. 36 | 37 | Redistribution and use in source and binary forms, with or without 38 | modification, are permitted provided that the following conditions are met: 39 | 40 | * Redistributions of source code must retain the above copyright notice, 41 | this list of conditions and the following disclaimer. 42 | * Redistributions in binary form must reproduce the above copyright notice, 43 | this list of conditions and the following disclaimer in the documentation 44 | and/or other materials provided with the distribution. 45 | * Neither the name of the copyright holder nor the names of its 46 | contributors may be used to endorse or promote products derived from 47 | this software without specific prior written permission. 48 | 49 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 50 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 51 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 52 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 53 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 54 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 55 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 56 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 57 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 58 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 59 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Date Parser 2 | 3 | [![Build Status](https://travis-ci.org/JackStouffer/date-parser.svg?branch=master)](https://travis-ci.org/JackStouffer/date-parser) [![Dub](https://img.shields.io/dub/v/dateparser.svg)](http://code.dlang.org/packages/dateparser) [![codecov](https://codecov.io/gh/JackStouffer/date-parser/branch/master/graph/badge.svg)](https://codecov.io/gh/JackStouffer/date-parser) 4 | 5 | A port of the Python Dateutil date parser. This module offers a generic date/time string parser which is able to parse most known formats to represent a date and/or time. This module attempts to be forgiving with regards to unlikely input formats, returning a `SysTime` object even for dates which are ambiguous. 6 | 7 | Tested with ldc v1.12.0 - v1.13.0 and dmd v2.081.2 - v2.084.0. May work with earlier versions. 8 | 9 | ## Simple Example 10 | 11 | View the docs for more. 12 | 13 | ``` 14 | import std.datetime; 15 | import dateparser; 16 | 17 | void main() 18 | { 19 | assert(parse("2003-09-25") == SysTime(DateTime(2003, 9, 25))); 20 | assert(parse("09/25/2003") == SysTime(DateTime(2003, 9, 25))); 21 | assert(parse("Sep 2003") == SysTime(DateTime(2003, 9, 1))); 22 | } 23 | ``` 24 | 25 | ## Docs 26 | 27 | http://jackstouffer.com/dateparser/ 28 | 29 | ## Install With Dub 30 | 31 | ``` 32 | { 33 | ... 34 | "dependencies": { 35 | "dateparser": "~>3.0.0" 36 | } 37 | } 38 | ``` 39 | 40 | ## Speed 41 | 42 | Based on `master`, measured on a 2015 Macbook Pro 2.8GHz Intel i7. Python times measured with ipython's `%timeit` function. D times measured with `bench.sh`. 43 | 44 | String | Python 2.7.11 | LDC 1.13.0 | DMD 2.084.0 45 | ------ | ------ | --- | --- 46 | Thu Sep 25 10:36:28 BRST 2003 | 156 µs | 10 μs | 15 μs 47 | 2003-09-25T10:49:41.5-03:00 | 136 µs | 5 μs | 6 μs 48 | 09.25.2003 | 124 µs | 5 μs | 7 μs 49 | 2003-09-25 | 66.4 µs | 4 μs | 5 μs 50 | -------------------------------------------------------------------------------- /bench.d: -------------------------------------------------------------------------------- 1 | import std.datetime; 2 | import std.stdio; 3 | import std.compiler; 4 | import std.conv; 5 | import std.experimental.allocator; 6 | import std.experimental.allocator.mallocator; 7 | import std.experimental.allocator.gc_allocator; 8 | import dateparser; 9 | 10 | enum testCount = 500_000; 11 | 12 | enum stringOne = "Thu Sep 25 10:36:28 BRST 2003"; 13 | enum stringTwo = "09.25.2003"; 14 | enum stringThree = "2003-09-25"; 15 | enum stringFour = "2003-09-25T10:49:41.5-03:00"; 16 | enum stringFive = "25-Sep-2003"; 17 | 18 | void main() 19 | { 20 | version(unittest) {} else 21 | { 22 | 23 | auto customParser = new Parser!Mallocator(new ParserInfo()); 24 | 25 | auto result = to!Duration(benchmark!(() => customParser.parse(stringOne))(testCount)[0] / testCount); 26 | auto result2 = to!Duration(benchmark!(() => customParser.parse(stringTwo))(testCount)[0] / testCount); 27 | auto result3 = to!Duration(benchmark!(() => customParser.parse(stringThree))(testCount)[0] / testCount); 28 | auto result4 = to!Duration(benchmark!(() => customParser.parse(stringFour))(testCount)[0] / testCount); 29 | auto result5 = to!Duration(benchmark!(() => customParser.parse(stringFive))(testCount)[0] / testCount); 30 | 31 | writeln(stringOne, "\t", result); 32 | writeln(stringTwo, "\t\t\t", result2); 33 | writeln(stringThree, "\t\t\t", result3); 34 | writeln(stringFour, "\t", result4); 35 | writeln(stringFive, "\t\t\t", result5); 36 | } 37 | } -------------------------------------------------------------------------------- /bench.sh: -------------------------------------------------------------------------------- 1 | dub run --config=benchmark --compiler=ldc2 --build=release 2 | -------------------------------------------------------------------------------- /dub.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dateparser", 3 | "description": "Library for parsing randomly formatted date strings", 4 | "license": "BSL-1.0", 5 | "authors": ["Jack Stouffer"], 6 | "dependencies": { 7 | "emsi_containers": "~>0.8.0" 8 | }, 9 | "configurations": [ 10 | { 11 | "name": "library", 12 | "targetType": "library" 13 | }, 14 | { 15 | "name": "unittest", 16 | "mainSourceFile": "bench.d", 17 | "targetType": "executable" 18 | }, 19 | { 20 | "name": "benchmark", 21 | "mainSourceFile": "bench.d", 22 | "targetType": "executable", 23 | "dflags-ldc": ["-O5", "-singleobj", "-boundscheck=off"] 24 | } 25 | ], 26 | "buildTypes": { 27 | "ddox": { 28 | "dependencies": { 29 | "ddox": "~>0.15.2" 30 | }, 31 | "buildOptions": ["syntaxOnly"], 32 | "dflags": ["-c", "-Df__dummy.html", "-Xfdocs.json"], 33 | "postBuildCommands": [ 34 | "rm -rf docs", 35 | "dub run ddox -- filter --min-protection=Public --ex containers docs.json", 36 | "dub run ddox -- generate-html docs.json docs/" 37 | ] 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /source/dateparser/package.d: -------------------------------------------------------------------------------- 1 | /** 2 | * Boost Software License - Version 1.0 - August 17th, 2003 3 | * 4 | * Permission is hereby granted, free of charge, to any person or organization 5 | * obtaining a copy of the software and accompanying documentation covered by 6 | * this license (the "Software") to use, reproduce, display, distribute, 7 | * execute, and transmit the Software, and to prepare derivative works of the 8 | * Software, and to permit third-parties to whom the Software is furnished to 9 | * do so, all subject to the following: 10 | * 11 | * The copyright notices in the Software and this entire statement, including 12 | * the above license grant, this restriction and the following disclaimer, 13 | * must be included in all copies of the Software, in whole or in part, and 14 | * all derivative works of the Software, unless such copies or derivative 15 | * works are solely in the form of machine-executable object code generated by 16 | * a source language processor. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 21 | * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 22 | * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 23 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | * DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | module dateparser; 28 | 29 | debug(dateparser) import std.stdio; 30 | import std.datetime; 31 | import std.traits; 32 | import std.typecons; 33 | import std.regex; 34 | import std.range; 35 | import std.experimental.allocator.common; 36 | import std.experimental.allocator.gc_allocator; 37 | import dateparser.timelexer; 38 | import dateparser.ymd; 39 | import dateparser.parseresult; 40 | public import dateparser.parserinfo; 41 | 42 | private: 43 | 44 | Parser!GCAllocator defaultParser; 45 | static this() 46 | { 47 | defaultParser = new Parser!GCAllocator(new ParserInfo()); 48 | } 49 | 50 | /** 51 | * Parse a I[.F] seconds value into (seconds, microseconds) 52 | * 53 | * Params: 54 | * value = value to parse 55 | * Returns: 56 | * tuple of two `int`s 57 | */ 58 | auto parseMS(R)(R s) if ( 59 | isForwardRange!R && 60 | !isInfinite!R && 61 | isSomeChar!(ElementEncodingType!R)) 62 | { 63 | import std.string : leftJustifier; 64 | import std.algorithm.searching : canFind; 65 | import std.algorithm.iteration : splitter; 66 | import std.typecons : tuple; 67 | import std.conv : parse; 68 | import std.utf : byCodeUnit; 69 | 70 | // auto decoding special case 71 | static if (isNarrowString!R) 72 | auto value = s.byCodeUnit; 73 | else 74 | alias value = s; 75 | 76 | if (!(value.save.canFind('.'))) 77 | { 78 | return tuple(parse!int(value), 0); 79 | } 80 | else 81 | { 82 | auto splitValue = value.splitter('.'); 83 | auto secs = splitValue.front; 84 | splitValue.popFront(); 85 | auto msecs = splitValue.front.leftJustifier(6, '0'); 86 | return tuple( 87 | parse!int(secs), 88 | parse!int(msecs) 89 | ); 90 | } 91 | } 92 | 93 | pure unittest 94 | { 95 | import std.typecons : tuple; 96 | import std.utf : byChar; 97 | 98 | auto s = "123"; 99 | assert(s.parseMS == tuple(123, 0)); 100 | 101 | auto s2 = "123.4"; 102 | assert(s2.parseMS == tuple(123, 400000)); 103 | 104 | auto s3 = "123.4567".byChar; 105 | assert(s3.parseMS == tuple(123, 456700)); 106 | } 107 | 108 | void setAttribute(P, T)(ref P p, string name, auto ref T value) 109 | { 110 | foreach (mem; __traits(allMembers, P)) 111 | { 112 | static if (is(typeof(__traits(getMember, p, mem)) Q)) 113 | { 114 | static if (is(T : Q)) 115 | { 116 | if (mem == name) 117 | { 118 | __traits(getMember, p, mem) = value; 119 | return; 120 | } 121 | } 122 | } 123 | } 124 | assert(0, P.stringof ~ " has no member " ~ name); 125 | } 126 | 127 | public: 128 | 129 | /** 130 | This function offers a generic date/time string Parser which is able to parse 131 | most known formats to represent a date and/or time. 132 | 133 | This function attempts to be forgiving with regards to unlikely input formats, 134 | returning a `SysTime` object even for dates which are ambiguous. 135 | 136 | If an element of a date/time stamp is omitted, the following rules are applied: 137 | 138 | $(UL 139 | $(LI If AM or PM is left unspecified, a 24-hour clock is assumed, however, 140 | an hour on a 12-hour clock (0 <= hour <= 12) *must* be specified if 141 | AM or PM is specified.) 142 | $(LI If a time zone is omitted, a SysTime is given with the timezone of the 143 | host machine.) 144 | ) 145 | 146 | Missing information is allowed, and what ever is given is applied on top of 147 | the `defaultDate` parameter, which defaults to January 1, 1 AD at midnight. 148 | E.g. a string of `"10:00 AM"` with a `defaultDate` of 149 | `SysTime(Date(2016, 1, 1))` will yield `SysTime(DateTime(2016, 1, 1, 10, 0, 0))`. 150 | 151 | If your date string uses timezone names in place of UTC offsets, then timezone 152 | information must be user provided, as there is no way to reliably get timezones 153 | from the OS by abbreviation. But, the timezone will be properly set if an offset 154 | is given. Timezone info and their abbreviations change constantly, so it's a 155 | good idea to not rely on `timezoneInfos` too much. 156 | 157 | This function allocates memory and throws on the GC. In order to reduce GC allocations, 158 | use a custom `Parser` instance with a different allocator. 159 | 160 | Unicode_Specifics: 161 | $(OL 162 | $(LI The AA key comparisons done with `ParserInfo` are on a code unit by code 163 | unit basis. As such, if user data passed to this function has a different 164 | normalization than the AAs in the used `ParserInfo` class, then you will 165 | get parser exceptions.) 166 | $(LI While other languages have writing systems without Arabic numerals, 167 | the overwhelming majority of dates are written with them. As such, 168 | this function does not work with other number systems and expects ASCII 169 | numbers.) 170 | ) 171 | 172 | Params: 173 | timeString = A forward range containing a date/time stamp. 174 | ignoreTimezone = Set to false by default, time zones in parsed strings are ignored and a 175 | SysTime with the local time zone is returned. If timezone information 176 | is not important, setting this to true is slightly faster. 177 | timezoneInfos = Time zone names / aliases which may be present in the 178 | string. This argument maps time zone names (and optionally offsets 179 | from those time zones) to time zones. This parameter is ignored if 180 | ignoreTimezone is set. 181 | dayFirst = Whether to interpret the first value in an ambiguous 3-integer date 182 | (e.g. 01/05/09) as the day (`true`) or month (`false`). If 183 | yearFirst is set to true, this distinguishes between YDM and 184 | YMD. 185 | yearFirst = Whether to interpret the first value in an ambiguous 3-integer date 186 | (e.g. 01/05/09) as the year. If true, the first number is taken to 187 | be the year, otherwise the last number is taken to be the year. 188 | fuzzy = Whether to allow fuzzy parsing, allowing for string like "Today is 189 | January 1, 2047 at 8:21:00AM". 190 | defaultDate = The date to apply the given information on top of. Defaults to 191 | January 1st, 1 AD 192 | 193 | Returns: 194 | A SysTime object representing the parsed string 195 | 196 | Throws: 197 | `ConvException` will be thrown for invalid string or unknown string format 198 | 199 | Throws: 200 | `TimeException` if the date string is successfully parsed but the created 201 | date would be invalid 202 | 203 | Throws: 204 | `ConvOverflowException` if one of the numbers in the parsed date exceeds 205 | `float.max` 206 | */ 207 | SysTime parse(Range)(Range timeString, 208 | Flag!"ignoreTimezone" ignoreTimezone = No.ignoreTimezone, 209 | const(TimeZone)[string] timezoneInfos = null, 210 | Flag!"dayFirst" dayFirst = No.dayFirst, 211 | Flag!"yearFirst" yearFirst = No.yearFirst, 212 | Flag!"fuzzy" fuzzy = No.fuzzy, 213 | SysTime defaultDate = SysTime(DateTime(1, 1, 1))) if ( 214 | isForwardRange!Range && !isInfinite!Range && isSomeChar!(ElementEncodingType!Range)) 215 | in 216 | { 217 | assert(defaultParser !is null, "Accessing defaultParser before static this initalization. Use your own Parser instance."); 218 | } 219 | do 220 | { 221 | // dfmt off 222 | return defaultParser.parse( 223 | timeString, 224 | ignoreTimezone, 225 | timezoneInfos, 226 | dayFirst, 227 | yearFirst, 228 | fuzzy, 229 | defaultDate 230 | ); 231 | } 232 | 233 | /// 234 | unittest 235 | { 236 | immutable brazilTime = new SimpleTimeZone(dur!"seconds"(-10_800)); 237 | const(TimeZone)[string] timezones = ["BRST" : brazilTime]; 238 | 239 | immutable parsed = parse("Thu Sep 25 10:36:28 BRST 2003", No.ignoreTimezone, timezones); 240 | // SysTime opEquals ignores timezones 241 | assert(parsed == SysTime(DateTime(2003, 9, 25, 10, 36, 28))); 242 | assert(parsed.timezone == brazilTime); 243 | 244 | assert(parse( 245 | "2003 10:36:28 BRST 25 Sep Thu", 246 | No.ignoreTimezone, 247 | timezones 248 | ) == SysTime(DateTime(2003, 9, 25, 10, 36, 28))); 249 | assert(parse("Thu Sep 25 10:36:28") == SysTime(DateTime(1, 9, 25, 10, 36, 28))); 250 | assert(parse("20030925T104941") == SysTime(DateTime(2003, 9, 25, 10, 49, 41))); 251 | assert(parse("2003-09-25T10:49:41") == SysTime(DateTime(2003, 9, 25, 10, 49, 41))); 252 | assert(parse("10:36:28") == SysTime(DateTime(1, 1, 1, 10, 36, 28))); 253 | assert(parse("09-25-2003") == SysTime(DateTime(2003, 9, 25))); 254 | } 255 | 256 | /// Apply information on top of `defaultDate` 257 | unittest 258 | { 259 | assert("10:36:28".parse(No.ignoreTimezone, null, No.dayFirst, No.yearFirst, 260 | No.fuzzy, SysTime(DateTime(2016, 3, 15))) 261 | == SysTime(DateTime(2016, 3, 15, 10, 36, 28))); 262 | assert("August 07".parse(No.ignoreTimezone, null, No.dayFirst, No.yearFirst, 263 | No.fuzzy, SysTime(DateTime(2016, 1, 1))) 264 | == SysTime(Date(2016, 8, 7))); 265 | assert("2000".parse(No.ignoreTimezone, null, No.dayFirst, No.yearFirst, 266 | No.fuzzy, SysTime(DateTime(2016, 3, 1))) 267 | == SysTime(Date(2000, 3, 1))); 268 | } 269 | 270 | /// Custom allocators 271 | unittest 272 | { 273 | import std.experimental.allocator.mallocator : Mallocator; 274 | 275 | auto customParser = new Parser!Mallocator(new ParserInfo()); 276 | assert(customParser.parse("2003-09-25T10:49:41") == 277 | SysTime(DateTime(2003, 9, 25, 10, 49, 41))); 278 | } 279 | 280 | /// Exceptions 281 | unittest 282 | { 283 | import std.exception : assertThrown; 284 | import std.conv : ConvException; 285 | 286 | assertThrown!ConvException(parse("")); 287 | assertThrown!ConvException(parse("AM")); 288 | assertThrown!ConvException(parse("The quick brown fox jumps over the lazy dog")); 289 | assertThrown!TimeException(parse("Feb 30, 2007")); 290 | assertThrown!TimeException(parse("Jan 20, 2015 PM")); 291 | assertThrown!ConvException(parse("01-Jane-01")); 292 | assertThrown!ConvException(parse("13:44 AM")); 293 | assertThrown!ConvException(parse("January 25, 1921 23:13 PM")); 294 | } 295 | // dfmt on 296 | 297 | unittest 298 | { 299 | assert(parse("Thu Sep 10:36:28") == SysTime(DateTime(1, 9, 5, 10, 36, 28))); 300 | assert(parse("Thu 10:36:28") == SysTime(DateTime(1, 1, 3, 10, 36, 28))); 301 | assert(parse("Sep 10:36:28") == SysTime(DateTime(1, 9, 1, 10, 36, 28))); 302 | assert(parse("Sep 2003") == SysTime(DateTime(2003, 9, 1))); 303 | assert(parse("Sep") == SysTime(DateTime(1, 9, 1))); 304 | assert(parse("2003") == SysTime(DateTime(2003, 1, 1))); 305 | assert(parse("10:36") == SysTime(DateTime(1, 1, 1, 10, 36))); 306 | } 307 | 308 | unittest 309 | { 310 | assert(parse("Thu 10:36:28") == SysTime(DateTime(1, 1, 3, 10, 36, 28))); 311 | assert(parse("20030925T104941") == SysTime(DateTime(2003, 9, 25, 10, 49, 41))); 312 | assert(parse("20030925T1049") == SysTime(DateTime(2003, 9, 25, 10, 49, 0))); 313 | assert(parse("20030925T10") == SysTime(DateTime(2003, 9, 25, 10))); 314 | assert(parse("20030925") == SysTime(DateTime(2003, 9, 25))); 315 | assert(parse("2003-09-25 10:49:41,502") == SysTime(DateTime(2003, 9, 25, 10, 316 | 49, 41), msecs(502))); 317 | assert(parse("199709020908") == SysTime(DateTime(1997, 9, 2, 9, 8))); 318 | assert(parse("19970902090807") == SysTime(DateTime(1997, 9, 2, 9, 8, 7))); 319 | } 320 | 321 | unittest 322 | { 323 | assert(parse("2003 09 25") == SysTime(DateTime(2003, 9, 25))); 324 | assert(parse("2003 Sep 25") == SysTime(DateTime(2003, 9, 25))); 325 | assert(parse("25 Sep 2003") == SysTime(DateTime(2003, 9, 25))); 326 | assert(parse("25 Sep 2003") == SysTime(DateTime(2003, 9, 25))); 327 | assert(parse("Sep 25 2003") == SysTime(DateTime(2003, 9, 25))); 328 | assert(parse("09 25 2003") == SysTime(DateTime(2003, 9, 25))); 329 | assert(parse("25 09 2003") == SysTime(DateTime(2003, 9, 25))); 330 | assert(parse("10 09 2003", No.ignoreTimezone, null, 331 | Yes.dayFirst) == SysTime(DateTime(2003, 9, 10))); 332 | assert(parse("10 09 2003") == SysTime(DateTime(2003, 10, 9))); 333 | assert(parse("10 09 03") == SysTime(DateTime(2003, 10, 9))); 334 | assert(parse("10 09 03", No.ignoreTimezone, null, No.dayFirst, 335 | Yes.yearFirst) == SysTime(DateTime(2010, 9, 3))); 336 | assert(parse("25 09 03") == SysTime(DateTime(2003, 9, 25))); 337 | } 338 | 339 | unittest 340 | { 341 | assert(parse("03 25 Sep") == SysTime(DateTime(2003, 9, 25))); 342 | assert(parse("2003 25 Sep") == SysTime(DateTime(2003, 9, 25))); 343 | assert(parse("25 03 Sep") == SysTime(DateTime(2025, 9, 3))); 344 | assert(parse("Thu Sep 25 2003") == SysTime(DateTime(2003, 9, 25))); 345 | assert(parse("Sep 25 2003") == SysTime(DateTime(2003, 9, 25))); 346 | } 347 | 348 | // Naked times 349 | unittest 350 | { 351 | assert(parse("10h36m28.5s") == SysTime(DateTime(1, 1, 1, 10, 36, 28), msecs(500))); 352 | assert(parse("10h36m28s") == SysTime(DateTime(1, 1, 1, 10, 36, 28))); 353 | assert(parse("10h36m") == SysTime(DateTime(1, 1, 1, 10, 36))); 354 | assert(parse("10h") == SysTime(DateTime(1, 1, 1, 10, 0, 0))); 355 | assert(parse("10 h 36") == SysTime(DateTime(1, 1, 1, 10, 36, 0))); 356 | assert(parse("10 hours 36 minutes") == SysTime(DateTime(1, 1, 1, 10, 36, 0))); 357 | } 358 | 359 | // AM vs PM 360 | unittest 361 | { 362 | assert(parse("10h am") == SysTime(DateTime(1, 1, 1, 10))); 363 | assert(parse("10h pm") == SysTime(DateTime(1, 1, 1, 22))); 364 | assert(parse("10am") == SysTime(DateTime(1, 1, 1, 10))); 365 | assert(parse("10pm") == SysTime(DateTime(1, 1, 1, 22))); 366 | assert(parse("12 am") == SysTime(DateTime(1, 1, 1, 0, 0))); 367 | assert(parse("12am") == SysTime(DateTime(1, 1, 1, 0, 0))); 368 | assert(parse("11 pm") == SysTime(DateTime(1, 1, 1, 23, 0))); 369 | assert(parse("10:00 am") == SysTime(DateTime(1, 1, 1, 10))); 370 | assert(parse("10:00 pm") == SysTime(DateTime(1, 1, 1, 22))); 371 | assert(parse("10:00am") == SysTime(DateTime(1, 1, 1, 10))); 372 | assert(parse("10:00pm") == SysTime(DateTime(1, 1, 1, 22))); 373 | assert(parse("10:00a.m") == SysTime(DateTime(1, 1, 1, 10))); 374 | assert(parse("10:00p.m") == SysTime(DateTime(1, 1, 1, 22))); 375 | assert(parse("10:00a.m.") == SysTime(DateTime(1, 1, 1, 10))); 376 | assert(parse("10:00p.m.") == SysTime(DateTime(1, 1, 1, 22))); 377 | } 378 | 379 | // ISO and ISO stripped 380 | unittest 381 | { 382 | immutable zone = new SimpleTimeZone(dur!"seconds"(-10_800)); 383 | 384 | immutable parsed = parse("2003-09-25T10:49:41.5-03:00"); 385 | assert(parsed == SysTime(DateTime(2003, 9, 25, 10, 49, 41), msecs(500), zone)); 386 | assert((cast(immutable(SimpleTimeZone)) parsed.timezone).utcOffset == hours(-3)); 387 | 388 | immutable parsed2 = parse("2003-09-25T10:49:41-03:00"); 389 | assert(parsed2 == SysTime(DateTime(2003, 9, 25, 10, 49, 41), zone)); 390 | assert((cast(immutable(SimpleTimeZone)) parsed2.timezone).utcOffset == hours(-3)); 391 | 392 | assert(parse("2003-09-25T10:49:41") == SysTime(DateTime(2003, 9, 25, 10, 49, 41))); 393 | assert(parse("2003-09-25T10:49") == SysTime(DateTime(2003, 9, 25, 10, 49))); 394 | assert(parse("2003-09-25T10") == SysTime(DateTime(2003, 9, 25, 10))); 395 | assert(parse("2003-09-25") == SysTime(DateTime(2003, 9, 25))); 396 | 397 | immutable parsed3 = parse("2003-09-25T10:49:41-03:00"); 398 | assert(parsed3 == SysTime(DateTime(2003, 9, 25, 10, 49, 41), zone)); 399 | assert((cast(immutable(SimpleTimeZone)) parsed3.timezone).utcOffset == hours(-3)); 400 | 401 | immutable parsed4 = parse("20030925T104941-0300"); 402 | assert(parsed4 == SysTime(DateTime(2003, 9, 25, 10, 49, 41), zone)); 403 | assert((cast(immutable(SimpleTimeZone)) parsed4.timezone).utcOffset == hours(-3)); 404 | 405 | assert(parse("20030925T104941") == SysTime(DateTime(2003, 9, 25, 10, 49, 41))); 406 | assert(parse("20030925T1049") == SysTime(DateTime(2003, 9, 25, 10, 49, 0))); 407 | assert(parse("20030925T10") == SysTime(DateTime(2003, 9, 25, 10))); 408 | assert(parse("20030925") == SysTime(DateTime(2003, 9, 25))); 409 | } 410 | 411 | // Dashes 412 | unittest 413 | { 414 | assert(parse("2003-09-25") == SysTime(DateTime(2003, 9, 25))); 415 | assert(parse("2003-Sep-25") == SysTime(DateTime(2003, 9, 25))); 416 | assert(parse("25-Sep-2003") == SysTime(DateTime(2003, 9, 25))); 417 | assert(parse("25-Sep-2003") == SysTime(DateTime(2003, 9, 25))); 418 | assert(parse("Sep-25-2003") == SysTime(DateTime(2003, 9, 25))); 419 | assert(parse("09-25-2003") == SysTime(DateTime(2003, 9, 25))); 420 | assert(parse("25-09-2003") == SysTime(DateTime(2003, 9, 25))); 421 | assert(parse("10-09-2003", No.ignoreTimezone, null, 422 | Yes.dayFirst) == SysTime(DateTime(2003, 9, 10))); 423 | assert(parse("10-09-2003") == SysTime(DateTime(2003, 10, 9))); 424 | assert(parse("10-09-03") == SysTime(DateTime(2003, 10, 9))); 425 | assert(parse("10-09-03", No.ignoreTimezone, null, No.dayFirst, 426 | Yes.yearFirst) == SysTime(DateTime(2010, 9, 3))); 427 | assert(parse("01-99") == SysTime(DateTime(1999, 1, 1))); 428 | assert(parse("99-01") == SysTime(DateTime(1999, 1, 1))); 429 | assert(parse("13-01", No.ignoreTimezone, null, Yes.dayFirst) == SysTime(DateTime(1, 430 | 1, 13))); 431 | assert(parse("01-13") == SysTime(DateTime(1, 1, 13))); 432 | assert(parse("01-99-Jan") == SysTime(DateTime(1999, 1, 1))); 433 | } 434 | 435 | // Dots 436 | unittest 437 | { 438 | assert(parse("2003.09.25") == SysTime(DateTime(2003, 9, 25))); 439 | assert(parse("2003.Sep.25") == SysTime(DateTime(2003, 9, 25))); 440 | assert(parse("25.Sep.2003") == SysTime(DateTime(2003, 9, 25))); 441 | assert(parse("25.Sep.2003") == SysTime(DateTime(2003, 9, 25))); 442 | assert(parse("Sep.25.2003") == SysTime(DateTime(2003, 9, 25))); 443 | assert(parse("09.25.2003") == SysTime(DateTime(2003, 9, 25))); 444 | assert(parse("25.09.2003") == SysTime(DateTime(2003, 9, 25))); 445 | assert(parse("10.09.2003", No.ignoreTimezone, null, 446 | Yes.dayFirst) == SysTime(DateTime(2003, 9, 10))); 447 | assert(parse("10.09.2003") == SysTime(DateTime(2003, 10, 9))); 448 | assert(parse("10.09.03") == SysTime(DateTime(2003, 10, 9))); 449 | assert(parse("10.09.03", No.ignoreTimezone, null, No.dayFirst, 450 | Yes.yearFirst) == SysTime(DateTime(2010, 9, 3))); 451 | } 452 | 453 | // Slashes 454 | unittest 455 | { 456 | assert(parse("2003/09/25") == SysTime(DateTime(2003, 9, 25))); 457 | assert(parse("2003/Sep/25") == SysTime(DateTime(2003, 9, 25))); 458 | assert(parse("25/Sep/2003") == SysTime(DateTime(2003, 9, 25))); 459 | assert(parse("25/Sep/2003") == SysTime(DateTime(2003, 9, 25))); 460 | assert(parse("Sep/25/2003") == SysTime(DateTime(2003, 9, 25))); 461 | assert(parse("09/25/2003") == SysTime(DateTime(2003, 9, 25))); 462 | assert(parse("25/09/2003") == SysTime(DateTime(2003, 9, 25))); 463 | assert(parse("10/09/2003", No.ignoreTimezone, null, 464 | Yes.dayFirst) == SysTime(DateTime(2003, 9, 10))); 465 | assert(parse("10/09/2003") == SysTime(DateTime(2003, 10, 9))); 466 | assert(parse("10/09/03") == SysTime(DateTime(2003, 10, 9))); 467 | assert(parse("10/09/03", No.ignoreTimezone, null, No.dayFirst, 468 | Yes.yearFirst) == SysTime(DateTime(2010, 9, 3))); 469 | } 470 | 471 | // Random formats 472 | unittest 473 | { 474 | assert(parse("Wed, July 10, '96") == SysTime(DateTime(1996, 7, 10, 0, 0))); 475 | assert(parse("1996.07.10 AD at 15:08:56 PDT", 476 | Yes.ignoreTimezone) == SysTime(DateTime(1996, 7, 10, 15, 8, 56))); 477 | assert(parse("1996.July.10 AD 12:08 PM") == SysTime(DateTime(1996, 7, 10, 12, 8))); 478 | assert(parse("Tuesday, April 12, 1952 AD 3:30:42pm PST", 479 | Yes.ignoreTimezone) == SysTime(DateTime(1952, 4, 12, 15, 30, 42))); 480 | assert(parse("November 5, 1994, 8:15:30 am EST", 481 | Yes.ignoreTimezone) == SysTime(DateTime(1994, 11, 5, 8, 15, 30))); 482 | assert(parse("1994-11-05T08:15:30-05:00", 483 | Yes.ignoreTimezone) == SysTime(DateTime(1994, 11, 5, 8, 15, 30))); 484 | assert(parse("1994-11-05T08:15:30Z", 485 | Yes.ignoreTimezone) == SysTime(DateTime(1994, 11, 5, 8, 15, 30))); 486 | assert(parse("July 4, 1976") == SysTime(DateTime(1976, 7, 4))); 487 | assert(parse("7 4 1976") == SysTime(DateTime(1976, 7, 4))); 488 | assert(parse("4 jul 1976") == SysTime(DateTime(1976, 7, 4))); 489 | assert(parse("7-4-76") == SysTime(DateTime(1976, 7, 4))); 490 | assert(parse("19760704") == SysTime(DateTime(1976, 7, 4))); 491 | assert(parse("0:01:02") == SysTime(DateTime(1, 1, 1, 0, 1, 2))); 492 | assert(parse("12h 01m02s am") == SysTime(DateTime(1, 1, 1, 0, 1, 2))); 493 | assert(parse("0:01:02 on July 4, 1976") == SysTime(DateTime(1976, 7, 4, 0, 1, 2))); 494 | assert(parse("0:01:02 on July 4, 1976") == SysTime(DateTime(1976, 7, 4, 0, 1, 2))); 495 | assert(parse("1976-07-04T00:01:02Z", 496 | Yes.ignoreTimezone) == SysTime(DateTime(1976, 7, 4, 0, 1, 2))); 497 | assert(parse("July 4, 1976 12:01:02 am") == SysTime(DateTime(1976, 7, 4, 0, 1, 498 | 2))); 499 | assert(parse("Mon Jan 2 04:24:27 1995") == SysTime(DateTime(1995, 1, 2, 4, 24, 500 | 27))); 501 | assert(parse("Tue Apr 4 00:22:12 PDT 1995", 502 | Yes.ignoreTimezone) == SysTime(DateTime(1995, 4, 4, 0, 22, 12))); 503 | assert(parse("04.04.95 00:22") == SysTime(DateTime(1995, 4, 4, 0, 22))); 504 | assert(parse("Jan 1 1999 11:23:34.578") == SysTime(DateTime(1999, 1, 1, 11, 23, 505 | 34), msecs(578))); 506 | assert(parse("950404 122212") == SysTime(DateTime(1995, 4, 4, 12, 22, 12))); 507 | assert(parse("0:00 PM, PST", Yes.ignoreTimezone) == SysTime(DateTime(1, 1, 1, 12, 508 | 0))); 509 | assert(parse("12:08 PM") == SysTime(DateTime(1, 1, 1, 12, 8))); 510 | assert(parse("5:50 A.M. on June 13, 1990") == SysTime(DateTime(1990, 6, 13, 5, 511 | 50))); 512 | assert(parse("3rd of May 2001") == SysTime(DateTime(2001, 5, 3))); 513 | assert(parse("5th of March 2001") == SysTime(DateTime(2001, 3, 5))); 514 | assert(parse("1st of May 2003") == SysTime(DateTime(2003, 5, 1))); 515 | assert(parse("01h02m03") == SysTime(DateTime(1, 1, 1, 1, 2, 3))); 516 | assert(parse("01h02") == SysTime(DateTime(1, 1, 1, 1, 2))); 517 | assert(parse("01h02s") == SysTime(DateTime(1, 1, 1, 1, 0, 2))); 518 | assert(parse("01m02") == SysTime(DateTime(1, 1, 1, 0, 1, 2))); 519 | assert(parse("01m02h") == SysTime(DateTime(1, 1, 1, 2, 1))); 520 | assert(parse("2004 10 Apr 11h30m") == SysTime(DateTime(2004, 4, 10, 11, 30))); 521 | } 522 | 523 | // Pertain, weekday, and month 524 | unittest 525 | { 526 | assert(parse("Sep 03") == SysTime(DateTime(1, 9, 3))); 527 | assert(parse("Sep of 03") == SysTime(DateTime(2003, 9, 1))); 528 | assert(parse("Wed") == SysTime(DateTime(1, 1, 2))); 529 | assert(parse("Wednesday") == SysTime(DateTime(1, 1, 2))); 530 | assert(parse("October") == SysTime(DateTime(1, 10, 1))); 531 | assert(parse("31-Dec-00") == SysTime(DateTime(2000, 12, 31))); 532 | } 533 | 534 | // Fuzzy 535 | unittest 536 | { 537 | // Sometimes fuzzy parsing results in AM/PM flag being set without 538 | // hours - if it's fuzzy it should ignore that. 539 | auto s1 = "I have a meeting on March 1 1974."; 540 | auto s2 = "On June 8th, 2020, I am going to be the first man on Mars"; 541 | 542 | // Also don't want any erroneous AM or PMs changing the parsed time 543 | auto s3 = "Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003"; 544 | auto s4 = "Meet me at 3:00AM on December 3rd, 2003 at the AM/PM on Sunset"; 545 | auto s5 = "Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00."; 546 | auto s6 = "Jan 29, 1945 14:45 AM I going to see you there?"; 547 | 548 | assert(parse(s1, No.ignoreTimezone, null, No.dayFirst, No.yearFirst, 549 | Yes.fuzzy) == SysTime(DateTime(1974, 3, 1))); 550 | assert(parse(s2, No.ignoreTimezone, null, No.dayFirst, No.yearFirst, 551 | Yes.fuzzy) == SysTime(DateTime(2020, 6, 8))); 552 | assert(parse(s3, No.ignoreTimezone, null, No.dayFirst, No.yearFirst, 553 | Yes.fuzzy) == SysTime(DateTime(2003, 12, 3, 3))); 554 | assert(parse(s4, No.ignoreTimezone, null, No.dayFirst, No.yearFirst, 555 | Yes.fuzzy) == SysTime(DateTime(2003, 12, 3, 3))); 556 | 557 | immutable zone = new SimpleTimeZone(dur!"hours"(-3)); 558 | immutable parsed = parse(s5, No.ignoreTimezone, null, No.dayFirst, No.yearFirst, 559 | Yes.fuzzy); 560 | assert(parsed == SysTime(DateTime(2003, 9, 25, 10, 49, 41), zone)); 561 | 562 | assert(parse(s6, No.ignoreTimezone, null, No.dayFirst, No.yearFirst, 563 | Yes.fuzzy) == SysTime(DateTime(1945, 1, 29, 14, 45))); 564 | } 565 | 566 | // dfmt off 567 | /// Custom parser info allows for international time representation 568 | unittest 569 | { 570 | import std.utf : byChar; 571 | 572 | class RusParserInfo : ParserInfo 573 | { 574 | this() 575 | { 576 | monthsAA = ParserInfo.convert([ 577 | ["янв", "Январь"], 578 | ["фев", "Февраль"], 579 | ["мар", "Март"], 580 | ["апр", "Апрель"], 581 | ["май", "Май"], 582 | ["июн", "Июнь"], 583 | ["июл", "Июль"], 584 | ["авг", "Август"], 585 | ["сен", "Сентябрь"], 586 | ["окт", "Октябрь"], 587 | ["ноя", "Ноябрь"], 588 | ["дек", "Декабрь"] 589 | ]); 590 | } 591 | } 592 | 593 | auto rusParser = new Parser!GCAllocator(new RusParserInfo()); 594 | immutable parsedTime = rusParser.parse("10 Сентябрь 2015 10:20"); 595 | assert(parsedTime == SysTime(DateTime(2015, 9, 10, 10, 20))); 596 | 597 | immutable parsedTime2 = rusParser.parse("10 Сентябрь 2015 10:20"d.byChar); 598 | assert(parsedTime2 == SysTime(DateTime(2015, 9, 10, 10, 20))); 599 | } 600 | // dfmt on 601 | 602 | // Test ranges 603 | unittest 604 | { 605 | import std.utf : byCodeUnit, byChar; 606 | 607 | // forward ranges 608 | assert("10h36m28s".byChar.parse == SysTime( 609 | DateTime(1, 1, 1, 10, 36, 28))); 610 | assert("Thu Sep 10:36:28".byChar.parse == SysTime( 611 | DateTime(1, 9, 5, 10, 36, 28))); 612 | 613 | // bidirectional ranges 614 | assert("2003-09-25T10:49:41".byCodeUnit.parse == SysTime( 615 | DateTime(2003, 9, 25, 10, 49, 41))); 616 | assert("Thu Sep 10:36:28".byCodeUnit.parse == SysTime( 617 | DateTime(1, 9, 5, 10, 36, 28))); 618 | } 619 | 620 | // Test different string types 621 | unittest 622 | { 623 | import std.meta : AliasSeq; 624 | import std.conv : to; 625 | 626 | alias StringTypes = AliasSeq!( 627 | char[], string, 628 | wchar[], wstring, 629 | dchar[], dstring 630 | ); 631 | 632 | foreach (T; StringTypes) 633 | { 634 | assert("10h36m28s".to!T.parse == SysTime( 635 | DateTime(1, 1, 1, 10, 36, 28))); 636 | assert("Thu Sep 10:36:28".to!T.parse == SysTime( 637 | DateTime(1, 9, 5, 10, 36, 28))); 638 | assert("2003-09-25T10:49:41".to!T.parse == SysTime( 639 | DateTime(2003, 9, 25, 10, 49, 41))); 640 | assert("Thu Sep 10:36:28".to!T.parse == SysTime( 641 | DateTime(1, 9, 5, 10, 36, 28))); 642 | } 643 | } 644 | 645 | // Issue #1 646 | unittest 647 | { 648 | assert(parse("Sat, 12 Mar 2016 01:30:59 -0900", 649 | Yes.ignoreTimezone) == SysTime(DateTime(2016, 3, 12, 01, 30, 59))); 650 | } 651 | 652 | /** 653 | * Implements the parsing functionality for the parse function. If you are 654 | * using a custom `ParserInfo` many times in the same program, you can avoid 655 | * unnecessary allocations by using the `Parser.parse` function directly. 656 | * 657 | * Params: 658 | * Allocator = the allocator type to use 659 | * parserInfo = the parser info to reference when parsing 660 | */ 661 | final class Parser(Allocator) if ( 662 | hasMember!(Allocator, "allocate") && hasMember!(Allocator, "deallocate")) 663 | { 664 | private const ParserInfo info; 665 | 666 | public: 667 | /// 668 | this(const ParserInfo parserInfo = null) 669 | { 670 | if (parserInfo is null) 671 | { 672 | info = new ParserInfo(); 673 | } 674 | else 675 | { 676 | info = parserInfo; 677 | } 678 | } 679 | 680 | /** 681 | * This function has the same functionality as the free version of `parse`. 682 | * The only difference is this will use your custom `ParserInfo` or allocator 683 | * if provided. 684 | */ 685 | SysTime parse(Range)(Range timeString, 686 | Flag!"ignoreTimezone" ignoreTimezone = No.ignoreTimezone, 687 | const(TimeZone)[string] timezoneInfos = null, 688 | Flag!"dayFirst" dayFirst = No.dayFirst, 689 | Flag!"yearFirst" yearFirst = No.yearFirst, 690 | Flag!"fuzzy" fuzzy = No.fuzzy, 691 | SysTime defaultDate = SysTime(Date(1, 1, 1))) if ( 692 | isForwardRange!Range && !isInfinite!Range && isSomeChar!(ElementEncodingType!Range)) 693 | { 694 | import std.conv : to, ConvException; 695 | 696 | auto res = parseImpl(timeString, dayFirst, yearFirst, fuzzy); 697 | 698 | if (res.badData) 699 | throw new ConvException("Unknown string format"); 700 | 701 | if (res.year.isNull() && res.month.isNull() && res.day.isNull() 702 | && res.hour.isNull() && res.minute.isNull() 703 | && res.second.isNull() && res.weekday.isNull() 704 | && res.shortcutResult.isNull() && res.shortcutTimeResult.isNull()) 705 | throw new ConvException("String does not contain a date."); 706 | 707 | if (res.shortcutResult.isNull && res.shortcutTimeResult.isNull) 708 | { 709 | if (!res.year.isNull) 710 | defaultDate.year(res.year); 711 | 712 | if (!res.day.isNull) 713 | defaultDate.day(res.day); 714 | 715 | if (!res.month.isNull) 716 | defaultDate.month(to!Month(res.month)); 717 | 718 | if (!res.hour.isNull) 719 | defaultDate.hour(res.hour); 720 | 721 | if (!res.minute.isNull) 722 | defaultDate.minute(res.minute); 723 | 724 | if (!res.second.isNull) 725 | defaultDate.second(res.second); 726 | 727 | if (!res.microsecond.isNull) 728 | defaultDate.fracSecs(usecs(res.microsecond)); 729 | 730 | if (!res.weekday.isNull() && (res.day.isNull || !res.day)) 731 | { 732 | immutable delta_days = daysToDayOfWeek( 733 | defaultDate.dayOfWeek(), 734 | to!DayOfWeek(res.weekday) 735 | ); 736 | defaultDate += dur!"days"(delta_days); 737 | } 738 | } 739 | else if (!res.shortcutTimeResult.isNull) 740 | defaultDate = SysTime(DateTime(Date( 741 | defaultDate.year, 742 | defaultDate.month, 743 | defaultDate.day, 744 | ), res.shortcutTimeResult.get())); 745 | 746 | if (!ignoreTimezone) 747 | { 748 | if (res.tzname in timezoneInfos) 749 | defaultDate = defaultDate.toOtherTZ( 750 | cast(immutable) timezoneInfos[res.tzname] 751 | ); 752 | else if (res.tzname.length > 0 && (res.tzname == LocalTime().stdName 753 | || res.tzname == LocalTime().dstName)) 754 | defaultDate = SysTime(cast(DateTime) defaultDate); 755 | else if (!res.tzoffset.isNull && res.tzoffset == 0) 756 | defaultDate = SysTime(cast(DateTime) defaultDate, cast(immutable) UTC()); 757 | else if (!res.tzoffset.isNull && res.tzoffset != 0) 758 | { 759 | defaultDate = SysTime( 760 | cast(DateTime) defaultDate, 761 | new immutable SimpleTimeZone(dur!"seconds"(res.tzoffset), res.tzname) 762 | ); 763 | } 764 | } 765 | else if (ignoreTimezone && !res.shortcutResult.isNull) 766 | res.shortcutResult = SysTime(cast(DateTime) res.shortcutResult.get); 767 | 768 | if (!res.shortcutResult.isNull) 769 | return res.shortcutResult.get; 770 | else 771 | return defaultDate; 772 | } 773 | 774 | private: 775 | /** 776 | * Private method which performs the heavy lifting of parsing, called from 777 | * `parse`. 778 | * 779 | * Params: 780 | * timeString = the string to parse. 781 | * dayFirst = Whether to interpret the first value in an ambiguous 782 | * 3-integer date (e.g. 01/05/09) as the day (true) or month (false). If 783 | * yearFirst is set to true, this distinguishes between YDM 784 | * and YMD. If set to null, this value is retrieved from the 785 | * current :class:ParserInfo object (which itself defaults to 786 | * false). 787 | * yearFirst = Whether to interpret the first value in an ambiguous 3-integer date 788 | * (e.g. 01/05/09) as the year. If true, the first number is taken 789 | * to be the year, otherwise the last number is taken to be the year. 790 | * fuzzy = Whether to allow fuzzy parsing, allowing for string like "Today is 791 | * January 1, 2047 at 8:21:00AM". 792 | */ 793 | ParseResult parseImpl(Range)(Range timeString, bool dayFirst = false, 794 | bool yearFirst = false, bool fuzzy = false) if (isForwardRange!Range 795 | && !isInfinite!Range && isSomeChar!(ElementEncodingType!Range)) 796 | { 797 | import std.algorithm.searching : canFind, countUntil; 798 | import std.algorithm.iteration : filter; 799 | import std.uni : isUpper; 800 | import std.ascii : isDigit; 801 | import std.utf : byCodeUnit, byChar; 802 | import std.conv : to, ConvException; 803 | import containers.dynamicarray : DynamicArray; 804 | 805 | ParseResult res; 806 | 807 | DynamicArray!(string, Allocator, true) tokens; 808 | 809 | static if (is(Unqual!(ElementEncodingType!Range) == dchar) || 810 | is(Unqual!(ElementEncodingType!Range) == wchar)) 811 | { 812 | put(tokens, timeString.save.byChar.timeLexer); 813 | } 814 | else static if (isSomeString!Range && is(Unqual!(ElementEncodingType!Range) == char)) 815 | { 816 | put(tokens, timeString.save.byCodeUnit.timeLexer); 817 | } 818 | else 819 | { 820 | put(tokens, timeString.save.timeLexer); 821 | } 822 | 823 | debug(dateparser) writeln("tokens: ", tokens[]); 824 | 825 | //keep up with the last token skipped so we can recombine 826 | //consecutively skipped tokens (-2 for when i begins at 0). 827 | int last_skipped_token_i = -2; 828 | 829 | //year/month/day list 830 | YMD ymd; 831 | 832 | //Index of the month string in ymd 833 | ptrdiff_t mstridx = -1; 834 | 835 | immutable size_t tokensLength = tokens.length; 836 | debug(dateparser) writeln("tokensLength: ", tokensLength); 837 | uint i = 0; 838 | while (i < tokensLength) 839 | { 840 | //Check if it's a number 841 | Nullable!(float, float.infinity) value; 842 | string value_repr; 843 | debug(dateparser) writeln("index: ", i); 844 | debug(dateparser) writeln("tokens[i]: ", tokens[i]); 845 | 846 | if (tokens[i][0].isDigit) 847 | { 848 | value_repr = tokens[i]; 849 | debug(dateparser) writeln("value_repr: ", value_repr); 850 | value = to!float(value_repr); 851 | } 852 | 853 | //Token is a number 854 | if (!value.isNull()) 855 | { 856 | immutable tokensItemLength = tokens[i].length; 857 | ++i; 858 | 859 | if (ymd.length == 3 && (tokensItemLength == 2 860 | || tokensItemLength == 4) && res.hour.isNull 861 | && (i >= tokensLength || (tokens[i] != ":" && info.hms(tokens[i]) == -1))) 862 | { 863 | debug(dateparser) writeln("branch 1"); 864 | //19990101T23[59] 865 | auto s = tokens[i - 1]; 866 | res.hour = to!int(s[0 .. 2]); 867 | 868 | if (tokensItemLength == 4) 869 | { 870 | res.minute = to!int(s[2 .. $]); 871 | } 872 | } 873 | else if (tokensItemLength == 6 || (tokensItemLength > 6 874 | && tokens[i - 1].countUntil('.') == 6)) 875 | { 876 | debug(dateparser) writeln("branch 2"); 877 | //YYMMDD || HHMMSS[.ss] 878 | auto s = tokens[i - 1]; 879 | 880 | if (ymd.length == 0 && !tokens[i - 1].canFind('.')) 881 | { 882 | ymd.put(s[0 .. 2]); 883 | ymd.put(s[2 .. 4]); 884 | ymd.put(s[4 .. $]); 885 | } 886 | else 887 | { 888 | //19990101T235959[.59] 889 | res.hour = to!int(s[0 .. 2]); 890 | res.minute = to!int(s[2 .. 4]); 891 | auto ms = parseMS(s[4 .. $]); 892 | res.second = ms[0]; 893 | res.microsecond = ms[1]; 894 | } 895 | } 896 | else if (tokensItemLength == 8 || tokensItemLength == 12 || tokensItemLength == 14) 897 | { 898 | debug(dateparser) writeln("branch 3"); 899 | //YYYYMMDD 900 | auto s = tokens[i - 1]; 901 | ymd.put(s[0 .. 4]); 902 | ymd.put(s[4 .. 6]); 903 | ymd.put(s[6 .. 8]); 904 | 905 | if (tokensItemLength > 8) 906 | { 907 | res.hour = to!int(s[8 .. 10]); 908 | res.minute = to!int(s[10 .. 12]); 909 | 910 | if (tokensItemLength > 12) 911 | { 912 | res.second = to!int(s[12 .. $]); 913 | } 914 | } 915 | } 916 | else if ((i < tokensLength && info.hms(tokens[i]) > -1) 917 | || (i + 1 < tokensLength && tokens[i] == " " && info.hms(tokens[i + 1]) > -1)) 918 | { 919 | debug(dateparser) writeln("branch 4"); 920 | //HH[ ]h or MM[ ]m or SS[.ss][ ]s 921 | if (tokens[i] == " ") 922 | { 923 | ++i; 924 | } 925 | 926 | auto idx = info.hms(tokens[i]); 927 | 928 | while (true) 929 | { 930 | if (idx == 0) 931 | { 932 | res.hour = to!int(value.get()); 933 | 934 | if (value % 1) 935 | res.minute = to!int(60 * (value % 1)); 936 | } 937 | else if (idx == 1) 938 | { 939 | res.minute = to!int(value.get()); 940 | 941 | if (value % 1) 942 | res.second = to!int(60 * (value % 1)); 943 | } 944 | else if (idx == 2) 945 | { 946 | auto temp = parseMS(value_repr); 947 | res.second = temp[0]; 948 | res.microsecond = temp[1]; 949 | } 950 | 951 | ++i; 952 | 953 | if (i >= tokensLength || idx == 2) 954 | break; 955 | 956 | //12h00 957 | try 958 | { 959 | value_repr = tokens[i]; 960 | value = to!float(value_repr); 961 | } 962 | catch (ConvException) 963 | { 964 | break; 965 | } 966 | 967 | ++i; 968 | ++idx; 969 | 970 | if (i < tokensLength) 971 | { 972 | immutable newidx = info.hms(tokens[i]); 973 | 974 | if (newidx > -1) 975 | idx = newidx; 976 | } 977 | } 978 | } 979 | else if (i == tokensLength && tokensLength > 3 980 | && tokens[i - 2] == " " && info.hms(tokens[i - 3]) > -1) 981 | { 982 | debug(dateparser) writeln("branch 5"); 983 | //X h MM or X m SS 984 | immutable idx = info.hms(tokens[i - 3]) + 1; 985 | 986 | if (idx == 1) 987 | { 988 | res.minute = to!int(value.get()); 989 | 990 | if (value % 1) 991 | res.second = to!int(60 * (value % 1)); 992 | else if (idx == 2) 993 | { 994 | auto seconds = parseMS(value_repr); 995 | res.second = seconds[0]; 996 | res.microsecond = seconds[1]; 997 | ++i; 998 | } 999 | } 1000 | } 1001 | else if (i + 1 < tokensLength && tokens[i] == ":") 1002 | { 1003 | debug(dateparser) writeln("branch 6"); 1004 | //HH:MM[:SS[.ss]] 1005 | static if (isSomeString!Range) 1006 | { 1007 | if (tokensLength == 5 && info.ampm(tokens[4]) == -1) 1008 | { 1009 | try 1010 | { 1011 | res.shortcutTimeResult = TimeOfDay.fromISOExtString(timeString); 1012 | return res; 1013 | } 1014 | catch (DateTimeException) {} 1015 | } 1016 | } 1017 | res.hour = to!int(value.get()); 1018 | ++i; 1019 | value = to!float(tokens[i]); 1020 | res.minute = to!int(value.get()); 1021 | 1022 | if (value % 1) 1023 | res.second = to!int(60 * (value % 1)); 1024 | 1025 | ++i; 1026 | 1027 | if (i < tokensLength && tokens[i] == ":") 1028 | { 1029 | auto temp = parseMS(tokens[i + 1]); 1030 | res.second = temp[0]; 1031 | res.microsecond = temp[1]; 1032 | i += 2; 1033 | } 1034 | } 1035 | else if (i < tokensLength && (tokens[i] == "-" || tokens[i] == "/" 1036 | || tokens[i] == ".")) 1037 | { 1038 | debug(dateparser) writeln("branch 7"); 1039 | immutable string separator = tokens[i]; 1040 | ymd.put(value_repr); 1041 | ++i; 1042 | 1043 | if (i < tokensLength && !info.jump(tokens[i])) 1044 | { 1045 | if (tokens[i][0].isDigit) 1046 | { 1047 | //01-01[-01] 1048 | static if (isSomeString!Range) 1049 | { 1050 | if (tokensLength >= 11) 1051 | { 1052 | try 1053 | { 1054 | res.shortcutResult = SysTime.fromISOExtString(timeString); 1055 | return res; 1056 | } 1057 | catch (DateTimeException) {} 1058 | } 1059 | } 1060 | 1061 | ymd.put(tokens[i]); 1062 | } 1063 | else 1064 | { 1065 | //01-Jan[-01] 1066 | value = info.month(tokens[i]); 1067 | 1068 | if (value > -1) 1069 | { 1070 | ymd.put(value.get()); 1071 | mstridx = cast(ptrdiff_t) (ymd.length == 0 ? 0 : ymd.length - 1); 1072 | } 1073 | else 1074 | { 1075 | res.badData = true; 1076 | return res; 1077 | } 1078 | } 1079 | 1080 | ++i; 1081 | 1082 | if (i < tokensLength && tokens[i] == separator) 1083 | { 1084 | //We have three members 1085 | ++i; 1086 | value = info.month(tokens[i]); 1087 | 1088 | if (value > -1) 1089 | { 1090 | ymd.put(value.get()); 1091 | mstridx = ymd.length - 1; 1092 | } 1093 | else 1094 | ymd.put(tokens[i]); 1095 | 1096 | ++i; 1097 | } 1098 | } 1099 | } 1100 | else if (i >= tokensLength || info.jump(tokens[i])) 1101 | { 1102 | debug(dateparser) writeln("branch 8"); 1103 | if (i + 1 < tokensLength && info.ampm(tokens[i + 1]) > -1) 1104 | { 1105 | //12 am 1106 | res.hour = to!int(value.get()); 1107 | 1108 | if (res.hour < 12 && info.ampm(tokens[i + 1]) == 1) 1109 | res.hour += 12; 1110 | else if (res.hour == 12 && info.ampm(tokens[i + 1]) == 0) 1111 | res.hour = 0; 1112 | 1113 | ++i; 1114 | } 1115 | else 1116 | { 1117 | //Year, month or day 1118 | ymd.put(value.get()); 1119 | } 1120 | ++i; 1121 | } 1122 | else if (info.ampm(tokens[i]) > -1) 1123 | { 1124 | debug(dateparser) writeln("branch 9"); 1125 | //12am 1126 | res.hour = to!int(value.get()); 1127 | 1128 | if (res.hour < 12 && info.ampm(tokens[i]) == 1) 1129 | res.hour += 12; 1130 | else if (res.hour == 12 && info.ampm(tokens[i]) == 0) 1131 | res.hour = 0; 1132 | 1133 | ++i; 1134 | } 1135 | else if (!fuzzy) 1136 | { 1137 | debug(dateparser) writeln("branch 10"); 1138 | res.badData = true; 1139 | return res; 1140 | } 1141 | else 1142 | { 1143 | debug(dateparser) writeln("branch 11"); 1144 | ++i; 1145 | } 1146 | continue; 1147 | } 1148 | 1149 | //Check weekday 1150 | value = info.weekday(tokens[i]); 1151 | if (value > -1) 1152 | { 1153 | debug(dateparser) writeln("branch 12"); 1154 | res.weekday = to!uint(value.get()); 1155 | ++i; 1156 | continue; 1157 | } 1158 | 1159 | //Check month name 1160 | value = info.month(tokens[i]); 1161 | if (value > -1) 1162 | { 1163 | debug(dateparser) writeln("branch 13"); 1164 | ymd.put(value.get); 1165 | assert(mstridx == -1); 1166 | mstridx = ymd.length - 1; 1167 | 1168 | ++i; 1169 | if (i < tokensLength) 1170 | { 1171 | if (tokens[i] == "-" || tokens[i] == "/") 1172 | { 1173 | //Jan-01[-99] 1174 | immutable separator = tokens[i]; 1175 | ++i; 1176 | ymd.put(tokens[i]); 1177 | ++i; 1178 | 1179 | if (i < tokensLength && tokens[i] == separator) 1180 | { 1181 | //Jan-01-99 1182 | ++i; 1183 | ymd.put(tokens[i]); 1184 | ++i; 1185 | } 1186 | } 1187 | else if (i + 3 < tokensLength && tokens[i] == " " 1188 | && tokens[i + 2] == " " && info.pertain(tokens[i + 1])) 1189 | { 1190 | //Jan of 01 1191 | //In this case, 01 is clearly year 1192 | try 1193 | { 1194 | value = to!int(tokens[i + 3]); 1195 | //Convert it here to become unambiguous 1196 | ymd.put(convertYear(value.get.to!int())); 1197 | } 1198 | catch (ConvException) {} 1199 | i += 4; 1200 | } 1201 | } 1202 | continue; 1203 | } 1204 | 1205 | //Check am/pm 1206 | value = info.ampm(tokens[i]); 1207 | if (value > -1) 1208 | { 1209 | debug(dateparser) writeln("branch 14"); 1210 | //For fuzzy parsing, 'a' or 'am' (both valid English words) 1211 | //may erroneously trigger the AM/PM flag. Deal with that 1212 | //here. 1213 | bool valIsAMPM = true; 1214 | 1215 | //If there's already an AM/PM flag, this one isn't one. 1216 | if (fuzzy && !res.ampm.isNull()) 1217 | valIsAMPM = false; 1218 | 1219 | //If AM/PM is found and hour is not, raise a ValueError 1220 | if (res.hour.isNull) 1221 | { 1222 | if (fuzzy) 1223 | valIsAMPM = false; 1224 | else 1225 | throw new ConvException("No hour specified with AM or PM flag."); 1226 | } 1227 | else if (!(0 <= res.hour && res.hour <= 12)) 1228 | { 1229 | //If AM/PM is found, it's a 12 hour clock, so raise 1230 | //an error for invalid range 1231 | if (fuzzy) 1232 | valIsAMPM = false; 1233 | else 1234 | throw new ConvException("Invalid hour specified for 12-hour clock."); 1235 | } 1236 | 1237 | if (valIsAMPM) 1238 | { 1239 | if (value == 1 && res.hour < 12) 1240 | res.hour += 12; 1241 | else if (value == 0 && res.hour == 12) 1242 | res.hour = 0; 1243 | 1244 | res.ampm = to!uint(value.get()); 1245 | } 1246 | 1247 | ++i; 1248 | continue; 1249 | } 1250 | 1251 | //Check for a timezone name 1252 | immutable upperItems = tokens[i] 1253 | .byCodeUnit 1254 | .filter!(a => !isUpper(a)) 1255 | .walkLength(1); 1256 | if (!res.hour.isNull && tokens[i].length <= 5 1257 | && res.tzname.length == 0 && res.tzoffset.isNull && upperItems == 0) 1258 | { 1259 | debug(dateparser) writeln("branch 15"); 1260 | res.tzname = tokens[i]; 1261 | 1262 | ++i; 1263 | 1264 | //Check for something like GMT+3, or BRST+3. Notice 1265 | //that it doesn't mean "I am 3 hours after GMT", but 1266 | //"my time +3 is GMT". If found, we reverse the 1267 | //logic so that timezone parsing code will get it 1268 | //right. 1269 | if (i < tokensLength && (tokens[i][0] == '+' || tokens[i][0] == '-')) 1270 | { 1271 | tokens[i] = tokens[i][0] == '+' ? "-" : "+"; 1272 | res.tzoffset = 0; 1273 | if (info.utczone(res.tzname)) 1274 | { 1275 | //With something like GMT+3, the timezone 1276 | //is *not* GMT. 1277 | res.tzname = []; 1278 | } 1279 | } 1280 | 1281 | continue; 1282 | } 1283 | 1284 | //Check for a numbered timezone 1285 | if (!res.hour.isNull && (tokens[i] == "+" || tokens[i] == "-")) 1286 | { 1287 | debug(dateparser) writeln("branch 16"); 1288 | immutable int signal = tokens[i][0] == '+' ? 1 : -1; 1289 | ++i; 1290 | immutable size_t tokensItemLength = tokens[i].length; 1291 | 1292 | if (tokensItemLength == 4) 1293 | { 1294 | //-0300 1295 | res.tzoffset = to!int(tokens[i][0 .. 2]) * 3600 + to!int(tokens[i][2 .. $]) * 60; 1296 | } 1297 | else if (i + 1 < tokensLength && tokens[i + 1] == ":") 1298 | { 1299 | //-03:00 1300 | res.tzoffset = to!int(tokens[i]) * 3600 + to!int(tokens[i + 2]) * 60; 1301 | i += 2; 1302 | } 1303 | else if (tokensItemLength <= 2) 1304 | { 1305 | //-[0]3 1306 | res.tzoffset = to!int(tokens[i]) * 3600; 1307 | } 1308 | else 1309 | { 1310 | res.badData = true; 1311 | return res; 1312 | } 1313 | ++i; 1314 | 1315 | res.tzoffset *= signal; 1316 | 1317 | //Look for a timezone name between parenthesis 1318 | if (i + 3 < tokensLength) 1319 | { 1320 | immutable notUpperItems = tokens[i + 2] 1321 | .byCodeUnit 1322 | .filter!(a => !isUpper(a)) 1323 | .walkLength(1); 1324 | if (info.jump(tokens[i]) && tokens[i + 1] == "(" 1325 | && tokens[i + 3] == ")" && 3 <= tokens[i + 2].length 1326 | && tokens[i + 2].length <= 5 && notUpperItems == 0) 1327 | { 1328 | //-0300 (BRST) 1329 | res.tzname = tokens[i + 2]; 1330 | i += 4; 1331 | } 1332 | } 1333 | continue; 1334 | } 1335 | 1336 | //Check jumps 1337 | if (!(info.jump(tokens[i]) || fuzzy)) 1338 | { 1339 | debug(dateparser) writeln("branch 17"); 1340 | res.badData = true; 1341 | return res; 1342 | } 1343 | 1344 | last_skipped_token_i = i; 1345 | ++i; 1346 | } 1347 | 1348 | auto ymdResult = ymd.resolveYMD(tokens[], mstridx, yearFirst, dayFirst); 1349 | 1350 | // year 1351 | if (ymdResult[0] > -1) 1352 | { 1353 | res.year = ymdResult[0]; 1354 | res.centurySpecified = ymd.centurySpecified; 1355 | } 1356 | 1357 | // month 1358 | if (ymdResult[1] > 0) 1359 | res.month = ymdResult[1]; 1360 | 1361 | // day 1362 | if (ymdResult[2] > 0) 1363 | res.day = ymdResult[2]; 1364 | 1365 | info.validate(res); 1366 | return res; 1367 | } 1368 | } 1369 | -------------------------------------------------------------------------------- /source/dateparser/parseresult.d: -------------------------------------------------------------------------------- 1 | /** 2 | * Boost Software License - Version 1.0 - August 17th, 2003 3 | * 4 | * Permission is hereby granted, free of charge, to any person or organization 5 | * obtaining a copy of the software and accompanying documentation covered by 6 | * this license (the "Software") to use, reproduce, display, distribute, 7 | * execute, and transmit the Software, and to prepare derivative works of the 8 | * Software, and to permit third-parties to whom the Software is furnished to 9 | * do so, all subject to the following: 10 | * 11 | * The copyright notices in the Software and this entire statement, including 12 | * the above license grant, this restriction and the following disclaimer, 13 | * must be included in all copies of the Software, in whole or in part, and 14 | * all derivative works of the Software, unless such copies or derivative 15 | * works are solely in the form of machine-executable object code generated by 16 | * a source language processor. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 21 | * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 22 | * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 23 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | * DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | module dateparser.parseresult; 28 | 29 | import std.datetime; 30 | import std.typecons; 31 | 32 | package: 33 | 34 | struct ParseResult 35 | { 36 | Nullable!(SysTime) shortcutResult; 37 | Nullable!(TimeOfDay) shortcutTimeResult; 38 | string tzname; 39 | Nullable!(int, int.min) year; 40 | Nullable!(int, int.min) month; 41 | Nullable!(int, int.min) day; 42 | Nullable!(int, int.min) weekday; 43 | Nullable!(int, int.min) hour; 44 | Nullable!(int, int.min) minute; 45 | Nullable!(int, int.min) second; 46 | Nullable!(int, int.min) microsecond; 47 | Nullable!(int, int.min) tzoffset; 48 | Nullable!(int, int.min) ampm; 49 | bool centurySpecified; 50 | bool badData = false; 51 | } 52 | -------------------------------------------------------------------------------- /source/dateparser/parserinfo.d: -------------------------------------------------------------------------------- 1 | /** 2 | * Boost Software License - Version 1.0 - August 17th, 2003 3 | * 4 | * Permission is hereby granted, free of charge, to any person or organization 5 | * obtaining a copy of the software and accompanying documentation covered by 6 | * this license (the "Software") to use, reproduce, display, distribute, 7 | * execute, and transmit the Software, and to prepare derivative works of the 8 | * Software, and to permit third-parties to whom the Software is furnished to 9 | * do so, all subject to the following: 10 | * 11 | * The copyright notices in the Software and this entire statement, including 12 | * the above license grant, this restriction and the following disclaimer, 13 | * must be included in all copies of the Software, in whole or in part, and 14 | * all derivative works of the Software, unless such copies or derivative 15 | * works are solely in the form of machine-executable object code generated by 16 | * a source language processor. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 21 | * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 22 | * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 23 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | * DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | module dateparser.parserinfo; 28 | 29 | import std.traits; 30 | import std.range; 31 | import dateparser.parseresult; 32 | 33 | package: 34 | 35 | // dfmt off 36 | // m from a.m/p.m, t from ISO T separator, order doesn't 37 | // matter here, just a presence check 38 | enum jumpDefault = [ 39 | "and":9, "'":6, 40 | "at":7, "/":5, 41 | "st":14, 42 | ";":3, " ":0, 43 | "of":13, "nd":15, 44 | "rd":16, ".":1, 45 | "th":17, "on":8, 46 | "m":11, ",":2, 47 | "ad":10, "-":4, "t":12 48 | ]; 49 | enum weekdaysDefault = [ 50 | "mon":0, "monday":0, 51 | "tue":1, "tuesday":1, 52 | "wed":2, "wednesday":2, 53 | "thu":3, "thursday":3, 54 | "fri":4, "friday":4, 55 | "sat":5, "saturday":5, 56 | "sun":6, "sunday":6, 57 | ]; 58 | enum monthsDefault = [ 59 | "jan":0, "january":0, 60 | "feb":1, "february":1, 61 | "mar":2, "march":2, 62 | "apr":3, "april":3, 63 | "may":4, 64 | "jun":5, "june":5, 65 | "jul":6, "july":6, 66 | "aug":7, "august":7, 67 | "sep":8, "sept":8, "september":8, 68 | "oct":9, "october":9, 69 | "nov":10, "november":10, 70 | "dec":11, "december":11 71 | ]; 72 | enum hmsDefault = [ 73 | "h":0, "hour":0, "hours":0, 74 | "m":1, "minute":1, "minutes":1, 75 | "s":2, "second":2, "seconds":2 76 | ]; 77 | enum ampmDefault = [ 78 | "am":0, "a":0, 79 | "pm":1, "p":1 80 | ]; 81 | enum utcDefault = [ 82 | "UTC":0, "GMT":0, "Z":0 83 | ]; 84 | enum pertainDefault = [ 85 | "of":0 86 | ]; 87 | // dfmt on 88 | 89 | /** 90 | * If the century isn't specified, e.g. `"'07"`, then assume that the year 91 | * is in the current century and return it as such. Otherwise do nothing 92 | * 93 | * Params: 94 | * convertYear = year to be converted 95 | * centurySpecified = is the century given in the year 96 | * 97 | * Returns: 98 | * the converted year 99 | */ 100 | int convertYear(int convertYear, bool centurySpecified = false) @safe 101 | { 102 | import std.math : abs; 103 | import std.datetime : Clock; 104 | 105 | immutable year = Clock.currTime.year; 106 | immutable century = (year / 100) * 100; 107 | 108 | if (convertYear < 100 && !centurySpecified) 109 | { 110 | convertYear += century; 111 | if (abs(convertYear - year) >= 50) 112 | { 113 | if (convertYear < year) 114 | convertYear += 100; 115 | else 116 | convertYear -= 100; 117 | } 118 | } 119 | 120 | return convertYear; 121 | } 122 | 123 | public: 124 | 125 | /** 126 | Class which handles what inputs are accepted. Subclass this to customize 127 | the language and acceptable values for each parameter. 128 | 129 | Params: 130 | dayFirst = Whether to interpret the first value in an ambiguous 3-integer date 131 | (e.g. 01/05/09) as the day (`true`) or month (`false`). If 132 | `yearFirst` is set to `true`, this distinguishes between YDM 133 | and YMD. Default is `false`. 134 | yearFirst = Whether to interpret the first value in an ambiguous 3-integer date 135 | (e.g. 01/05/09) as the year. If `true`, the first number is taken 136 | to be the year, otherwise the last number is taken to be the year. 137 | Default is `false`. 138 | */ 139 | class ParserInfo 140 | { 141 | private: 142 | bool dayFirst; 143 | bool yearFirst; 144 | 145 | package: 146 | /** 147 | * Takes and Result and converts it year and checks if the timezone is UTC 148 | */ 149 | final void validate(ref ParseResult res) @safe const 150 | { 151 | //move to info 152 | if (!res.year.isNull) 153 | res.year = convertYear(res.year, res.centurySpecified); 154 | 155 | if ((!res.tzoffset.isNull && res.tzoffset == 0) 156 | && (res.tzname.length == 0 || res.tzname == "Z")) 157 | { 158 | res.tzname = "UTC"; 159 | res.tzoffset = 0; 160 | } 161 | else if (!res.tzoffset.isNull && res.tzoffset != 0 && res.tzname.length > 0 162 | && this.utczone(res.tzname)) 163 | res.tzoffset = 0; 164 | } 165 | 166 | public: 167 | /** 168 | * AAs used for matching strings to calendar numbers, e.g. Jan is 1. 169 | * 170 | * `jumpAA`, `utczoneAA`, and `pertainAA` are only used to check the 171 | * presence of a key; the value of the key doesn't matter. 172 | */ 173 | int[string] jumpAA; 174 | ///ditto 175 | int[string] weekdaysAA; 176 | ///ditto 177 | int[string] monthsAA; 178 | ///ditto 179 | int[string] hmsAA; 180 | ///ditto 181 | int[string] ampmAA; 182 | ///ditto 183 | int[string] utczoneAA; 184 | ///ditto 185 | int[string] pertainAA; 186 | 187 | /** 188 | * Take a range of character ranges or a range of ranges of character 189 | * ranges and converts it to an associative array that the internal 190 | * parser info methods can use. 191 | * 192 | * Use this method in order to override the default parser info field 193 | * values. See the example on the $(REF parse). 194 | * 195 | * Params: 196 | * list = a range of character ranges 197 | * 198 | * Returns: 199 | * An associative array of `int`s accessed by strings 200 | */ 201 | static int[string] convert(Range)(Range list) if (isInputRange!Range 202 | && isSomeChar!(ElementEncodingType!(ElementEncodingType!(Range))) 203 | || isSomeChar!( 204 | ElementEncodingType!(ElementEncodingType!(ElementEncodingType!(Range))))) 205 | { 206 | import std.array : array; 207 | import std.conv : to; 208 | import std.uni : asLowerCase; 209 | 210 | int[string] dictionary; 211 | 212 | foreach (i, value; list) 213 | { 214 | // tuple of strings or multidimensional string array 215 | static if (isInputRange!(ElementType!(ElementType!(Range)))) 216 | foreach (item; value) 217 | dictionary[item.asLowerCase.array.to!string] = cast(int) i; 218 | else 219 | dictionary[value.asLowerCase.array.to!string] = cast(int) i; 220 | } 221 | 222 | return dictionary; 223 | } 224 | 225 | /// Ctor 226 | this(bool dayFirst = false, bool yearFirst = false) @safe 227 | { 228 | dayFirst = dayFirst; 229 | yearFirst = yearFirst; 230 | 231 | jumpAA = jumpDefault; 232 | weekdaysAA = weekdaysDefault; 233 | monthsAA = monthsDefault; 234 | hmsAA = hmsDefault; 235 | ampmAA = ampmDefault; 236 | utczoneAA = utcDefault; 237 | pertainAA = pertainDefault; 238 | } 239 | 240 | /// Tests for presence of `name` in each of the AAs 241 | final bool jump(S)(const S name) const if (isSomeString!S) 242 | { 243 | import std.uni : toLower; 244 | return name.toLower() in jumpAA ? true : false; 245 | } 246 | 247 | /// ditto 248 | final int weekday(S)(const S name) const if (isSomeString!S) 249 | { 250 | import std.uni : toLower; 251 | 252 | auto key = name.toLower(); 253 | if (key in weekdaysAA) 254 | return weekdaysAA[key]; 255 | else 256 | return -1; 257 | } 258 | 259 | /// ditto 260 | final int month(S)(const S name) const if (isSomeString!S) 261 | { 262 | import std.uni : toLower; 263 | 264 | auto key = name.toLower(); 265 | if (key in monthsAA) 266 | return monthsAA[key] + 1; 267 | else 268 | return -1; 269 | } 270 | 271 | /// ditto 272 | final int hms(S)(const S name) const if (isSomeString!S) 273 | { 274 | import std.uni : toLower; 275 | 276 | auto key = name.toLower(); 277 | if (key in hmsAA) 278 | return hmsAA[key]; 279 | else 280 | return -1; 281 | } 282 | 283 | /// ditto 284 | final int ampm(S)(const S name) const if (isSomeString!S) 285 | { 286 | import std.uni : toLower; 287 | 288 | auto key = name.toLower(); 289 | if (key in ampmAA) 290 | return ampmAA[key]; 291 | else 292 | return -1; 293 | } 294 | 295 | /// ditto 296 | final bool pertain(S)(const S name) const if (isSomeString!S) 297 | { 298 | import std.uni : toLower; 299 | 300 | return name.toLower() in pertainAA ? true : false; 301 | } 302 | 303 | /// ditto 304 | final bool utczone(S)(const S name) const if (isSomeString!S) 305 | { 306 | import std.uni : toLower; 307 | 308 | return name.toLower() in utczoneAA ? true : false; 309 | } 310 | } -------------------------------------------------------------------------------- /source/dateparser/splitter.d: -------------------------------------------------------------------------------- 1 | /** 2 | * Boost Software License - Version 1.0 - August 17th, 2003 3 | * 4 | * Permission is hereby granted, free of charge, to any person or organization 5 | * obtaining a copy of the software and accompanying documentation covered by 6 | * this license (the "Software") to use, reproduce, display, distribute, 7 | * execute, and transmit the Software, and to prepare derivative works of the 8 | * Software, and to permit third-parties to whom the Software is furnished to 9 | * do so, all subject to the following: 10 | * 11 | * The copyright notices in the Software and this entire statement, including 12 | * the above license grant, this restriction and the following disclaimer, 13 | * must be included in all copies of the Software, in whole or in part, and 14 | * all derivative works of the Software, unless such copies or derivative 15 | * works are solely in the form of machine-executable object code generated by 16 | * a source language processor. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 21 | * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 22 | * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 23 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | * DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | module dateparser.splitter; 28 | 29 | debug(dateparser) import std.stdio; 30 | import std.compiler; 31 | import std.regex; 32 | import std.traits; 33 | import std.range; 34 | 35 | // Compatibility patch for version 2.071 and earlier 36 | // To be removed when support is dropped for 2.071 37 | static if (version_major == 2 && version_minor < 72) 38 | { 39 | /** 40 | * Split the given string on `pat`, but keep the matches in the final result. 41 | * 42 | * Params: 43 | * r = the string to be split 44 | * pat = the regex pattern 45 | * Returns: 46 | * A forward range of strings 47 | */ 48 | package auto splitterWithMatches(Range, RegEx)(Range r, RegEx pat) if ( 49 | is(Unqual!(ElementEncodingType!Range) : dchar)) 50 | { 51 | return SplitterResult!(Range, RegEx)(r, pat); 52 | } 53 | 54 | // Issue 15831: This should be a Voldemort type, but due to linker slowdown 55 | // it's a good idea to put this outside so we don't slowdown people's build 56 | // times 57 | package static struct SplitterResult(Range, alias RegEx = Regex) 58 | { 59 | private: 60 | Range _input; 61 | size_t _offset; 62 | bool onMatch = false; 63 | alias Rx = typeof(match(Range.init, RegEx.init)); 64 | Rx _match; 65 | 66 | @trusted this(Range input, RegEx separator) 67 | { 68 | _input = input; 69 | if (_input.empty) 70 | { 71 | //there is nothing to match at all, make _offset > 0 72 | _offset = 1; 73 | } 74 | else 75 | { 76 | _match = Rx(_input, separator); 77 | } 78 | } 79 | 80 | public: 81 | auto ref opSlice() 82 | { 83 | return this.save; 84 | } 85 | 86 | ///Forward range primitives. 87 | @property Range front() 88 | { 89 | import std.algorithm : min; 90 | 91 | assert(!empty && _offset <= _match.pre.length && _match.pre.length <= _input.length); 92 | 93 | if (!onMatch) 94 | return _input[_offset .. min($, _match.pre.length)]; 95 | else 96 | return _match.hit(); 97 | } 98 | 99 | ///ditto 100 | @property bool empty() 101 | { 102 | return _offset >= _input.length; 103 | } 104 | 105 | ///ditto 106 | void popFront() 107 | { 108 | assert(!empty); 109 | if (_match.empty) 110 | { 111 | //No more separators, work is done here 112 | _offset = _input.length + 1; 113 | } 114 | else 115 | { 116 | if (!onMatch) 117 | { 118 | //skip past the separator 119 | _offset = _match.pre.length; 120 | onMatch = true; 121 | } 122 | else 123 | { 124 | onMatch = false; 125 | _offset += _match.hit.length; 126 | _match.popFront(); 127 | } 128 | } 129 | } 130 | 131 | ///ditto 132 | @property auto save() 133 | { 134 | return this; 135 | } 136 | } 137 | } 138 | else 139 | { 140 | import std.typecons; 141 | 142 | package auto splitterWithMatches(Range, RegEx)(Range r, RegEx pat) 143 | { 144 | return splitter!(Yes.keepSeparators)(r, pat); 145 | } 146 | } 147 | 148 | unittest 149 | { 150 | import std.algorithm.comparison : equal; 151 | 152 | assert("2003.04.05" 153 | .splitterWithMatches(regex(`([\.,])`, "g")) 154 | .equal(["2003", ".", "04", ".", "05"])); 155 | 156 | assert("10:00a.m." 157 | .splitterWithMatches(regex(`([\.,])`, "g")) 158 | .equal(["10:00a", ".", "m", "."])); 159 | } -------------------------------------------------------------------------------- /source/dateparser/timelexer.d: -------------------------------------------------------------------------------- 1 | /** 2 | * Boost Software License - Version 1.0 - August 17th, 2003 3 | * 4 | * Permission is hereby granted, free of charge, to any person or organization 5 | * obtaining a copy of the software and accompanying documentation covered by 6 | * this license (the "Software") to use, reproduce, display, distribute, 7 | * execute, and transmit the Software, and to prepare derivative works of the 8 | * Software, and to permit third-parties to whom the Software is furnished to 9 | * do so, all subject to the following: 10 | * 11 | * The copyright notices in the Software and this entire statement, including 12 | * the above license grant, this restriction and the following disclaimer, 13 | * must be included in all copies of the Software, in whole or in part, and 14 | * all derivative works of the Software, unless such copies or derivative 15 | * works are solely in the form of machine-executable object code generated by 16 | * a source language processor. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 21 | * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 22 | * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 23 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | * DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | module dateparser.timelexer; 28 | 29 | debug(dateparser) import std.stdio; 30 | import std.range; 31 | import std.traits; 32 | import std.regex; 33 | import dateparser.splitter; 34 | 35 | private enum State 36 | { 37 | EMPTY, 38 | ALPHA, 39 | NUMERIC, 40 | ALPHA_PERIOD, 41 | PERIOD, 42 | NUMERIC_PERIOD 43 | } 44 | 45 | package: 46 | 47 | // Needs to be explicitly flagged global for the backwards compatible 48 | // version of splitterWithMatches 49 | enum split_decimal = ctRegex!(`([\.,])`, "g"); 50 | 51 | /** 52 | * This function breaks the time string into lexical units (tokens), which 53 | * can be parsed by the parser. Lexical units are demarcated by changes in 54 | * the character set, so any continuous string of letters is considered 55 | * one unit, any continuous string of numbers is considered one unit. 56 | * 57 | * The main complication arises from the fact that dots ('.') can be used 58 | * both as separators (e.g. "Sep.20.2009") or decimal points (e.g. 59 | * "4:30:21.447"). As such, it is necessary to read the full context of 60 | * any dot-separated strings before breaking it into tokens; as such, this 61 | * function maintains a "token stack", for when the ambiguous context 62 | * demands that multiple tokens be parsed at once. 63 | * 64 | * Params: 65 | * r = the range to parse 66 | * Returns: 67 | * a input range of strings 68 | */ 69 | auto timeLexer(Range)(Range r) if (isInputRange!Range && is(Unqual!(ElementType!Range) == char)) 70 | { 71 | return TimeLexerResult!Range(r); 72 | } 73 | 74 | // Issue 15831: This should be a Voldemort type, but due to linker slowdown 75 | // it's a good idea to put this outside so we don't slowdown people's build 76 | // times 77 | struct TimeLexerResult(Range) 78 | { 79 | private: 80 | Range source; 81 | string charStack; 82 | string[] tokenStack; 83 | string token; 84 | 85 | public: 86 | this(Range r) 87 | { 88 | source = r; 89 | popFront; 90 | } 91 | 92 | auto front() @property 93 | { 94 | return token; 95 | } 96 | 97 | void popFront() 98 | { 99 | import std.utf : byCodeUnit; 100 | import std.algorithm.searching : canFind, count; 101 | import std.uni : isAlpha; 102 | import std.ascii : isDigit; 103 | 104 | if (tokenStack.length > 0) 105 | { 106 | immutable f = tokenStack.front; 107 | tokenStack.popFront; 108 | token = f; 109 | return; 110 | } 111 | 112 | bool seenLetters = false; 113 | State state = State.EMPTY; 114 | token = string.init; 115 | 116 | while (!source.empty || !charStack.empty) 117 | { 118 | // We only realize that we've reached the end of a token when we 119 | // find a character that's not part of the current token - since 120 | // that character may be part of the next token, it's stored in the 121 | // charStack. 122 | char nextChar; 123 | 124 | if (!charStack.empty) 125 | { 126 | nextChar = charStack[0]; 127 | charStack = charStack[1 .. $]; 128 | } 129 | else 130 | { 131 | nextChar = source.front; 132 | source.popFront; 133 | } 134 | 135 | if (state == State.EMPTY) 136 | { 137 | debug(dateparser) writeln("EMPTY"); 138 | // First character of the token - determines if we're starting 139 | // to parse a word, a number or something else. 140 | token ~= nextChar; 141 | 142 | if (nextChar.isAlpha) 143 | state = State.ALPHA; 144 | else if (nextChar.isDigit) 145 | state = State.NUMERIC; 146 | else if (nextChar == ' ') 147 | { 148 | token = " "; 149 | break; //emit token 150 | } 151 | else 152 | break; //emit token 153 | debug(dateparser) writeln("TOKEN ", token, " STATE ", state); 154 | } 155 | else if (state == State.ALPHA) 156 | { 157 | debug(dateparser) writeln("STATE ", state, " nextChar: ", nextChar); 158 | // If we've already started reading a word, we keep reading 159 | // letters until we find something that's not part of a word. 160 | seenLetters = true; 161 | 162 | if (nextChar != '.' && nextChar != ',' && 163 | nextChar != '/' && nextChar != '-' && 164 | nextChar != '+' && nextChar != ' ' && 165 | !nextChar.isDigit) 166 | { 167 | token ~= nextChar; 168 | } 169 | else if (nextChar == '.') 170 | { 171 | token ~= nextChar; 172 | state = State.ALPHA_PERIOD; 173 | } 174 | else 175 | { 176 | charStack ~= nextChar; 177 | break; //emit token 178 | } 179 | } 180 | else if (state == State.NUMERIC) 181 | { 182 | // If we've already started reading a number, we keep reading 183 | // numbers until we find something that doesn't fit. 184 | debug(dateparser) writeln("STATE ", state, " nextChar: ", nextChar); 185 | if (nextChar.isDigit) 186 | token ~= nextChar; 187 | else if (nextChar == '.' || (nextChar == ',' && token.length >= 2)) 188 | { 189 | token ~= nextChar; 190 | state = State.NUMERIC_PERIOD; 191 | } 192 | else 193 | { 194 | charStack ~= nextChar; 195 | debug(dateparser) writeln("charStack add: ", charStack); 196 | break; //emit token 197 | } 198 | } 199 | else if (state == State.ALPHA_PERIOD) 200 | { 201 | debug(dateparser) writeln("STATE ", state, " nextChar: ", nextChar); 202 | // If we've seen some letters and a dot separator, continue 203 | // parsing, and the tokens will be broken up later. 204 | seenLetters = true; 205 | if (nextChar == '.' || nextChar.isAlpha) 206 | { 207 | token ~= nextChar; 208 | } 209 | else if (nextChar.isDigit && token[$ - 1] == '.') 210 | { 211 | token ~= nextChar; 212 | state = State.NUMERIC_PERIOD; 213 | } 214 | else 215 | { 216 | charStack ~= nextChar; 217 | break; //emit token 218 | } 219 | } 220 | else if (state == State.NUMERIC_PERIOD) 221 | { 222 | debug(dateparser) writeln("STATE ", state, " nextChar: ", nextChar); 223 | // If we've seen at least one dot separator, keep going, we'll 224 | // break up the tokens later. 225 | if (nextChar == '.' || nextChar.isDigit) 226 | token ~= nextChar; 227 | else if (nextChar.isAlpha && token[$ - 1] == '.') 228 | { 229 | token ~= nextChar; 230 | state = State.ALPHA_PERIOD; 231 | } 232 | else 233 | { 234 | charStack ~= nextChar; 235 | break; //emit token 236 | } 237 | } 238 | } 239 | 240 | debug(dateparser) writeln("STATE ", state, " seenLetters: ", seenLetters); 241 | if ((state == State.ALPHA_PERIOD || state == State.NUMERIC_PERIOD) 242 | && (seenLetters || token.byCodeUnit.count('.') > 1 243 | || (token[$ - 1] == '.' || token[$ - 1] == ','))) 244 | if ((state == State.ALPHA_PERIOD 245 | || state == State.NUMERIC_PERIOD) && (seenLetters 246 | || token.byCodeUnit.count('.') > 1 || (token[$ - 1] == '.' || token[$ - 1] == ','))) 247 | { 248 | auto l = splitterWithMatches(token[], split_decimal); 249 | token = l.front; 250 | l.popFront; 251 | 252 | foreach (tok; l) 253 | if (tok.length > 0) 254 | tokenStack ~= tok; 255 | } 256 | 257 | if (state == State.NUMERIC_PERIOD && !token.byCodeUnit.canFind('.')) 258 | token = token.replace(",", "."); 259 | } 260 | 261 | bool empty() @nogc @safe @property nothrow pure 262 | { 263 | return token.empty && source.empty && charStack.empty && tokenStack.empty; 264 | } 265 | } 266 | 267 | unittest 268 | { 269 | import std.algorithm.comparison : equal; 270 | import std.utf : byCodeUnit; 271 | 272 | assert("Thu Sep 25 10:36:28 BRST 2003".byCodeUnit.timeLexer.equal( 273 | ["Thu", " ", "Sep", " ", "25", " ", 274 | "10", ":", "36", ":", "28", " ", 275 | "BRST", " ", "2003"] 276 | )); 277 | 278 | assert("2003-09-25T10:49:41.5-03:00".byCodeUnit.timeLexer.equal( 279 | ["2003", "-", "09", "-", "25", "T", 280 | "10", ":", "49", ":", "41.5", "-", 281 | "03", ":", "00"] 282 | )); 283 | } 284 | 285 | unittest 286 | { 287 | import std.algorithm.comparison : equal; 288 | import std.utf : byChar; 289 | 290 | assert("10:10" 291 | .byChar 292 | .timeLexer 293 | .equal(["10", ":", "10"])); 294 | assert("Thu Sep 10:36:28" 295 | .byChar 296 | .timeLexer 297 | .equal(["Thu", " ", "Sep", " ", "10", ":", "36", ":", "28"])); 298 | } -------------------------------------------------------------------------------- /source/dateparser/ymd.d: -------------------------------------------------------------------------------- 1 | /** 2 | * Boost Software License - Version 1.0 - August 17th, 2003 3 | * 4 | * Permission is hereby granted, free of charge, to any person or organization 5 | * obtaining a copy of the software and accompanying documentation covered by 6 | * this license (the "Software") to use, reproduce, display, distribute, 7 | * execute, and transmit the Software, and to prepare derivative works of the 8 | * Software, and to permit third-parties to whom the Software is furnished to 9 | * do so, all subject to the following: 10 | * 11 | * The copyright notices in the Software and this entire statement, including 12 | * the above license grant, this restriction and the following disclaimer, 13 | * must be included in all copies of the Software, in whole or in part, and 14 | * all derivative works of the Software, unless such copies or derivative 15 | * works are solely in the form of machine-executable object code generated by 16 | * a source language processor. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 21 | * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 22 | * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 23 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | * DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | module dateparser.ymd; 28 | 29 | debug(dateparser) import std.stdio; 30 | import std.traits; 31 | import std.range; 32 | 33 | package: 34 | 35 | struct YMD 36 | { 37 | private: 38 | bool century_specified = false; 39 | int[3] data; 40 | byte dataPosition; 41 | 42 | public: 43 | /** 44 | * Params 45 | */ 46 | static bool couldBeYear(Range, N)(Range token, N year) if (isInputRange!Range 47 | && isSomeChar!(ElementEncodingType!Range) && is(NumericTypeOf!N : int)) 48 | { 49 | import std.algorithm.comparison : equal; 50 | import std.algorithm.mutation : stripLeft; 51 | import std.ascii : isDigit; 52 | import std.conv : toChars; 53 | 54 | if (token.front.isDigit) 55 | { 56 | return year.toChars.equal(token.stripLeft('0')); 57 | } 58 | else 59 | { 60 | return false; 61 | } 62 | } 63 | 64 | /** 65 | * Attempt to deduce if a pre 100 year was lost due to padded zeros being 66 | * taken off 67 | * 68 | * Params: 69 | * tokens = a range of tokens 70 | * Returns: 71 | * the index of the year token. If no probable result was found, then -1 72 | * is returned 73 | */ 74 | int probableYearIndex(Range)(Range tokens) const if (isInputRange!Range 75 | && isNarrowString!(ElementType!(Range))) 76 | { 77 | import std.algorithm.iteration : filter; 78 | import std.range : walkLength; 79 | 80 | foreach (index, ref item; data[]) 81 | { 82 | auto potentialYearTokens = tokens.filter!(a => YMD.couldBeYear(a, item)); 83 | immutable frontLength = potentialYearTokens.front.length; 84 | immutable length = potentialYearTokens.walkLength(2); 85 | 86 | if (length == 1 && frontLength > 2) 87 | return cast(int) index; 88 | } 89 | 90 | return -1; 91 | } 92 | 93 | /// Put a value in that represents a year, month, or day 94 | void put(N)(N val) if (isNumeric!N) 95 | in 96 | { 97 | assert(dataPosition <= 3); 98 | } 99 | do 100 | { 101 | static if (is(N : int)) 102 | { 103 | if (val > 100) 104 | this.century_specified = true; 105 | 106 | data[dataPosition] = val; 107 | ++dataPosition; 108 | } 109 | else 110 | put(cast(int) val); 111 | } 112 | 113 | /// ditto 114 | void put(S)(const S val) if (isSomeString!S) 115 | in 116 | { 117 | assert(dataPosition <= 3); 118 | } 119 | do 120 | { 121 | import std.conv : to; 122 | 123 | data[dataPosition] = to!int(val); 124 | ++dataPosition; 125 | 126 | if (val.length > 2) 127 | this.century_specified = true; 128 | } 129 | 130 | /// length getter 131 | size_t length() @property const @safe pure nothrow @nogc 132 | { 133 | return dataPosition; 134 | } 135 | 136 | /// century_specified getter 137 | bool centurySpecified() @property const @safe pure nothrow @nogc 138 | { 139 | return century_specified; 140 | } 141 | 142 | /** 143 | * Turns the array of ints into a `Tuple` of three, representing the year, 144 | * month, and day. 145 | * 146 | * Params: 147 | * mstridx = The index of the month in the data 148 | * yearfirst = if the year is first in the string 149 | * dayfirst = if the day is first in the string 150 | * Returns: 151 | * tuple of three ints 152 | */ 153 | auto resolveYMD(R, N)(R tokens, N mstridx, bool yearfirst, bool dayfirst) if (is(NumericTypeOf!N : size_t)) 154 | { 155 | import std.algorithm.mutation : remove; 156 | import std.typecons : tuple; 157 | 158 | int year = -1; 159 | int month; 160 | int day; 161 | 162 | if (dataPosition == 1 || (mstridx != -1 && dataPosition == 2)) //One member, or two members with a month string 163 | { 164 | if (mstridx != -1) 165 | { 166 | month = data[mstridx]; 167 | switch (mstridx) 168 | { 169 | case 0: 170 | data[0] = data[1]; 171 | data[1] = data[2]; 172 | data[2] = 0; 173 | break; 174 | case 1: 175 | data[1] = data[2]; 176 | data[2] = 0; 177 | break; 178 | case 2: 179 | data[2] = 0; 180 | break; 181 | default: break; 182 | } 183 | } 184 | 185 | if (dataPosition > 1 || mstridx == -1) 186 | { 187 | if (data[0] > 31) 188 | year = data[0]; 189 | else 190 | day = data[0]; 191 | } 192 | } 193 | else if (dataPosition == 2) //Two members with numbers 194 | { 195 | if (data[0] > 31) 196 | { 197 | //99-01 198 | year = data[0]; 199 | month = data[1]; 200 | } 201 | else if (data[1] > 31) 202 | { 203 | //01-99 204 | month = data[0]; 205 | year = data[1]; 206 | } 207 | else if (dayfirst && data[1] <= 12) 208 | { 209 | //13-01 210 | day = data[0]; 211 | month = data[1]; 212 | } 213 | else 214 | { 215 | //01-13 216 | month = data[0]; 217 | day = data[1]; 218 | } 219 | 220 | } 221 | else if (dataPosition == 3) //Three members 222 | { 223 | if (mstridx == 0) 224 | { 225 | month = data[0]; 226 | day = data[1]; 227 | year = data[2]; 228 | } 229 | else if (mstridx == 1) 230 | { 231 | if (data[0] > 31 || (yearfirst && data[2] <= 31)) 232 | { 233 | //99-Jan-01 234 | year = data[0]; 235 | month = data[1]; 236 | day = data[2]; 237 | } 238 | else 239 | { 240 | //01-Jan-01 241 | //Give precedence to day-first, since 242 | //two-digit years is usually hand-written. 243 | day = data[0]; 244 | month = data[1]; 245 | year = data[2]; 246 | } 247 | } 248 | else if (mstridx == 2) 249 | { 250 | if (data[1] > 31) 251 | { 252 | //01-99-Jan 253 | day = data[0]; 254 | year = data[1]; 255 | month = data[2]; 256 | } 257 | else 258 | { 259 | //99-01-Jan 260 | year = data[0]; 261 | day = data[1]; 262 | month = data[2]; 263 | } 264 | } 265 | else 266 | { 267 | if (data[0] > 31 || probableYearIndex(tokens) == 0 268 | || (yearfirst && data[1] <= 12 && data[2] <= 31)) 269 | { 270 | //99-01-01 271 | year = data[0]; 272 | month = data[1]; 273 | day = data[2]; 274 | } 275 | else if (data[0] > 12 || (dayfirst && data[1] <= 12)) 276 | { 277 | //13-01-01 278 | day = data[0]; 279 | month = data[1]; 280 | year = data[2]; 281 | } 282 | else 283 | { 284 | //01-13-01 285 | month = data[0]; 286 | day = data[1]; 287 | year = data[2]; 288 | } 289 | } 290 | } 291 | 292 | return tuple(year, month, day); 293 | } 294 | } --------------------------------------------------------------------------------