├── Dockerfile ├── LICENSE.md ├── README.md ├── pyduckling ├── DucklingFFI.hs ├── DucklingFFI_stub.h ├── __init__.py ├── pyduckling.c ├── pyduckling.i ├── pyduckling_stub.h └── stack.yaml └── test.py /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.5 2 | 3 | ENV PATH=/root/.local/bin:$PATH 4 | 5 | RUN apt-get update -y && apt-get install -y swig 6 | RUN curl -sSL https://get.haskellstack.org/ | sh 7 | RUN stack setup 8 | COPY . /pyduckling 9 | WORKDIR /pyduckling/pyduckling 10 | RUN stack build 11 | RUN stack ghc -- -c -dynamic -fPIC DucklingFFI.hs 12 | RUN swig -python pyduckling.i 13 | RUN gcc -fpic -c pyduckling.c pyduckling_wrap.c `python3.5-config --includes` -I`stack ghc -- --print-libdir`/include 14 | RUN stack ghc --package duckling -- -o _pyduckling.so -shared -dynamic -fPIC pyduckling.o pyduckling_wrap.o DucklingFFI.o -lHSrts-ghc8.0.2 15 | WORKDIR /pyduckling 16 | RUN pip install pytest python-dateutil 17 | 18 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # BSD 3-Clause License 2 | 3 | Copyright 2017 and following, Sebastian Mika 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 10 | 11 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyduckling - a native python interface to the haskell version of duckling 2 | 3 | Duckling by wit.ai/Facebook (https://github.com/facebookincubator/duckling) is one of the best libraries to find and parse time expressions. 4 | 5 | This package contains a native python wrapper to use duckling directly from within python. It basically provides a wrapper for the Haskell FFI (Foreign Function Interface). 6 | 7 | **Note**: This code is experimental. Contributions are welcome, but I cannot guarantee to integrate stuff quickly. But please feel free to use it as an inspiration for your own work. 8 | 9 | 10 | # Usage 11 | 12 | The wrapper currently only supports the Time dimension, but should be easily extendable to other extensions (see `DucklingFFI.hs`). I only tested it within Docker containers. There the process is as follows: 13 | 14 | * in the project root build the image `docker build -t pyduckling .` 15 | 16 | * start a shell in the container `docker run -it pyduckling bash` 17 | 18 | * start python 19 | 20 | * run: 21 | 22 | 23 | ``` 24 | from datetime import datetime 25 | from pyduckling import parse_time 26 | 27 | now = int(1000 * datetime.timestamp(datetime.utcnow())) 28 | 29 | parse_time('next week', 'EN', now) 30 | parse_time('nächsten Montag', 'DE', now) 31 | ``` 32 | 33 | -------------------------------------------------------------------------------- /pyduckling/DucklingFFI.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ForeignFunctionInterface #-} 2 | module Pyduckling where 3 | 4 | import Foreign.C.Types 5 | import Foreign.C.String 6 | import Data.String 7 | -- import Control.Monad.IO.Class 8 | import qualified Data.Text as Text 9 | -- import qualified Data.Text.Encoding as Text 10 | import Data.Time 11 | import Data.Time.Clock.POSIX 12 | import Data.Time.LocalTime.TimeZone.Series 13 | import Data.Maybe 14 | import Text.Read (readMaybe) 15 | 16 | import Duckling.Core 17 | import Duckling.Types 18 | import Duckling.Resolve 19 | 20 | -- | Builds a `DucklingTime` for timezone `tz` at `utcTime`. 21 | -- If no `series` found for `tz`, uses UTC. 22 | _makeReftime :: TimeZoneSeries -> UTCTime -> DucklingTime 23 | _makeReftime tzs utcTime = DucklingTime $ ZoneSeriesTime utcTime tzs 24 | 25 | millisToUTC :: Integer -> UTCTime 26 | millisToUTC t = posixSecondsToUTCTime $ (fromInteger t) / 1000 27 | 28 | foreign export ccall hs_parse :: CString -> CString -> CLong -> IO CString 29 | hs_parse :: CString -> CString -> CLong -> IO CString 30 | hs_parse text lang time = do 31 | hs_text <- peekCString text 32 | hs_lang <- peekCString lang 33 | utcNow <- getCurrentTime 34 | case hs_text of 35 | [] -> do 36 | newCString "need some text" 37 | _ -> do 38 | refTime <- return $ _makeReftime (TimeZoneSeries utc []) (millisToUTC $ fromIntegral time) 39 | let 40 | context = Context 41 | { referenceTime = refTime 42 | , lang = parseLang hs_lang 43 | } 44 | parsedResult = parse (Text.pack hs_text) context [This Time] 45 | newCString $ Text.unpack $ toJText parsedResult 46 | where 47 | utcNow = getCurrentTime 48 | defaultLang = EN 49 | 50 | parseLang :: String -> Lang 51 | parseLang l = fromMaybe defaultLang $ readMaybe (Text.unpack $ Text.toUpper $ Text.pack l) 52 | 53 | 54 | hs_parse_old :: CString -> CString -> IO CString 55 | hs_parse_old text lang = do 56 | hs_parse text lang 0 57 | -------------------------------------------------------------------------------- /pyduckling/DucklingFFI_stub.h: -------------------------------------------------------------------------------- 1 | #include "HsFFI.h" 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | extern HsPtr hs_parse(HsPtr a1, HsPtr a2); 6 | #ifdef __cplusplus 7 | } 8 | #endif 9 | 10 | -------------------------------------------------------------------------------- /pyduckling/__init__.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | from . pyduckling import py_init, py_exit, parse 3 | import json 4 | from datetime import datetime 5 | from dateutil import parser 6 | 7 | atexit.register(py_exit) 8 | py_init([]) 9 | 10 | 11 | def parse_time(text, lang='EN', date=0): 12 | """parse a text using duckling and return the Time dimension as 13 | json 14 | 15 | :param text string: The text to be parsed 16 | :param lang string: The language of the text as two letter code 17 | (see duckling) 18 | :param date int: The current time in milliseconds since the epoch 19 | (UTC) (e.g. int(1000 * datetime.timestamp(datetime.utcnow()))) 20 | 21 | :returns: The duckling json representation of all found time 22 | expressions. 23 | """ 24 | try: 25 | type(date) != int 26 | date = int(1000 * datetime.timestamp(parser.parse(date))) 27 | return json.loads(parse(text, lang, date)) 28 | except: 29 | return json.loads(parse(text, lang, date)) 30 | -------------------------------------------------------------------------------- /pyduckling/pyduckling.c: -------------------------------------------------------------------------------- 1 | #include "pyduckling_stub.h" 2 | 3 | void py_init(int argc, char *argv[]) { 4 | hs_init(&argc, &argv); 5 | } 6 | 7 | void py_exit() { 8 | hs_exit(); 9 | } 10 | 11 | char* parse(char* text, char* lang, long time) { 12 | return hs_parse(text, lang, time); 13 | } 14 | -------------------------------------------------------------------------------- /pyduckling/pyduckling.i: -------------------------------------------------------------------------------- 1 | %module pyduckling 2 | %{ 3 | 4 | /* Put header files here or function declarations like below */ 5 | extern char* parse(char* text, char* lang, long time); 6 | extern void py_init(int argc, char** argv); 7 | extern void py_exit(); 8 | %} 9 | 10 | %typemap(in) (int argc, char **argv) { 11 | /* Check if is a list */ 12 | if (PyList_Check($input)) { 13 | int i; 14 | $1 = PyList_Size($input); 15 | $2 = (char **) malloc(($1+1)*sizeof(char *)); 16 | for (i = 0; i < $1; i++) { 17 | PyObject *o = PyList_GetItem($input,i); 18 | if (PyString_Check(o)) 19 | $2[i] = PyString_AsString(PyList_GetItem($input,i)); 20 | else { 21 | PyErr_SetString(PyExc_TypeError,"list must contain strings"); 22 | free($2); 23 | return NULL; 24 | } 25 | } 26 | $2[i] = 0; 27 | } else { 28 | PyErr_SetString(PyExc_TypeError,"not a list"); 29 | return NULL; 30 | } 31 | } 32 | 33 | %typemap(freearg) (int argc, char **argv) { 34 | free((char *) $2); 35 | } 36 | 37 | extern char* parse(char* text, char* lang, long time); 38 | extern void py_init(int argc, char** argv); 39 | extern void py_exit(); 40 | -------------------------------------------------------------------------------- /pyduckling/pyduckling_stub.h: -------------------------------------------------------------------------------- 1 | #include "HsFFI.h" 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | extern HsPtr hs_parse(HsPtr text, HsPtr lang, long int time); 6 | #ifdef __cplusplus 7 | } 8 | #endif 9 | 10 | -------------------------------------------------------------------------------- /pyduckling/stack.yaml: -------------------------------------------------------------------------------- 1 | resolver: lts-8.8 2 | 3 | packages: 4 | - location: 5 | git: https://github.com/facebookincubator/duckling.git 6 | commit: 3ec2228eac0d515db38db3dea906f69be0b3e59e 7 | 8 | extra-deps: [] 9 | 10 | flags: {} 11 | 12 | extra-package-dbs: [] 13 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from datetime import time, date, timedelta, datetime 3 | from dateutil import parser 4 | from pyduckling import parse_time 5 | 6 | 7 | @pytest.fixture 8 | def current_time(): 9 | return int(1000 * datetime.timestamp(datetime.utcnow())) 10 | 11 | 12 | def test_parse_time(): 13 | result = parse_time( 14 | u'Let\'s meet at 11:45am') 15 | assert len(result) == 1 16 | assert time(11, 45) == parser.parse(result[0][u'value'][u'value']).time() 17 | 18 | 19 | def test_parse_time_with_reference_date(): 20 | result = parse_time( 21 | u'Let\'s meet tomorrow', date=u'1990-12-30') 22 | assert len(result) == 1 23 | assert parser.parse(u'1990-12-30').date() + \ 24 | timedelta(days=1) == parser.parse(result[0][u'value'][u'value']).date() 25 | 26 | def test_parse_time_with_reference_date_and_time(): 27 | result = parse_time( 28 | u'Let\'s meet tomorrow at 5pm', date=u'1990-12-30T08:26:07.470413') 29 | assert len(result) == 1 30 | assert parser.parse(u'1990-12-30').date() + \ 31 | timedelta(days=1) == parser.parse(result[0][u'value'][u'value']).date() 32 | assert time(17, 00) == parser.parse(result[0][u'value'][u'value']).time() 33 | 34 | def test_parse_time_with_reference_date_and_time_2(): 35 | result = parse_time( 36 | u'Let\'s meet tomorrow at 17h', date=u'1990-12-30T08:26:07') 37 | assert len(result) == 1 38 | assert parser.parse(u'1990-12-30').date() + \ 39 | timedelta(days=1) == parser.parse(result[0][u'value'][u'value']).date() 40 | assert time(17, 00) == parser.parse(result[0][u'value'][u'value']).time() 41 | 42 | def test_parse_time_with_reference_date_and_time_3(): 43 | result = parse_time( 44 | u'Let\'s meet tomorrow at 17:45', date=u'1990-12-30T08:26:07') 45 | assert len(result) == 1 46 | assert parser.parse(u'1990-12-30').date() + \ 47 | timedelta(days=1) == parser.parse(result[0][u'value'][u'value']).date() 48 | assert time(17, 45) == parser.parse(result[0][u'value'][u'value']).time() 49 | 50 | def test_parse_time_with_reference_date_and_time_4(): 51 | result = parse_time( 52 | u'Let\'s meet in one week', date=u'1990-12-30T08:26:07') 53 | assert len(result) == 1 54 | assert parser.parse(u'1990-12-30').date() + \ 55 | timedelta(days=7) == parser.parse(result[0][u'value'][u'value']).date() 56 | 57 | def test_parse_time_with_reference_date_and_time_5(): 58 | result = parse_time( 59 | u'Let\'s meet in 2h', date=u'1990-12-30T08:26:07') 60 | assert len(result) == 1 61 | assert parser.parse(u'1990-12-30').date() + \ 62 | timedelta(days=0) == parser.parse(result[0][u'value'][u'value']).date() 63 | assert time(10, 26) == parser.parse(result[0][u'value'][u'value']).time() 64 | 65 | def test_parse_multiple_times(): 66 | result = parse_time( 67 | u'Let\'s meet at 11:45am or tomorrow', date=current_time()) 68 | assert len(result) == 2 69 | assert time(11, 45) == parser.parse(result[0][u'value'][u'value']).time() 70 | assert date.today() + \ 71 | timedelta(days=1) == parser.parse(result[1][u'value'][u'value']).date() 72 | 73 | def test_parse_multiple_times_2(): 74 | result = parse_time( 75 | u'Let\'s meet at 11:45am, or tomorrow at 11am or in one week', date=current_time()) 76 | assert len(result) == 3 77 | assert time(11, 45) == parser.parse(result[0][u'value'][u'value']).time() 78 | assert date.today() + \ 79 | timedelta(days=1) == parser.parse(result[1][u'value'][u'value']).date() 80 | assert time(11, 00) == parser.parse(result[1][u'value'][u'value']).time() 81 | assert date.today() + \ 82 | timedelta(days=7) == parser.parse(result[2][u'value'][u'value']).date() 83 | 84 | 85 | --------------------------------------------------------------------------------