├── .gitignore ├── LICENSE ├── README.md ├── padaos.py ├── setup.py └── test_padaos.py /.gitignore: -------------------------------------------------------------------------------- 1 | .pytest_cache/ 2 | .idea/ 3 | *.pyc 4 | *.egg-info/ 5 | dist/ 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018 Matthew D. Scholefield 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Padaos 2 | 3 | *A rigid, lightweight, dead-simple intent parser* 4 | 5 | To deploy an intent parser into production with an automated feedback loop, 6 | it's essential that the new data doesn't interfere with the old data. It's 7 | also necessary that the parser can learn from sentences that closely match 8 | written English (or any other language). That's what Padaos does. 9 | 10 | ## Example 11 | 12 | ```python 13 | from padaos import IntentContainer 14 | 15 | container = IntentContainer() 16 | container.add_intent('hello', [ 17 | 'hello', 'hi', 'how are you', "what's up" 18 | ]) 19 | container.add_intent('buy', [ 20 | 'buy {item}', 'purchase {item}', 'get {item}', 'get {item} for me' 21 | ]) 22 | container.add_intent('search', [ 23 | 'search for {query} on {engine}', 'using {engine} (search|look) for {query}', 24 | 'find {query} (with|using) {engine}' 25 | ]) 26 | container.add_entity('engine', ['abc', 'xyz']) 27 | container.calc_intent('find cats using xyz') 28 | # {'name': 'search', 'entities': {'query': 'cats', 'engine': 'xyz'}} 29 | ``` 30 | 31 | ## How it works 32 | 33 | Padaos converts a series of example sentences into 34 | one big chunk of regex. Each intent is a single compiled regex matcher. 35 | Here's a fex examples of the input example and the output regex. 36 | 37 | ``` 38 | This is a test 39 | -> 40 | \W*This\W+is\W+a\W+test\W* 41 | ``` 42 | 43 | ``` 44 | Eat an (apple|orange). 45 | -> 46 | \W*Eat\W+an\W*(\W*apple\W*|\W*orange\W*)\.?\W* 47 | ``` 48 | 49 | ``` 50 | Hello! 51 | Hi! 52 | -> 53 | (\W*Hello\W*\!?\W*|\W*Hi\W*\!?\W*) 54 | ``` 55 | 56 | ``` 57 | This is something (inside parentheses) 58 | -> 59 | (\W*This\W+is\W+something\W*\(?\W*inside\W+parentheses\W*\)?\W*) 60 | ``` 61 | -------------------------------------------------------------------------------- /padaos.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sre_constants 3 | import logging 4 | from threading import Lock 5 | 6 | 7 | LOG = logging.getLogger('padaos') 8 | 9 | class IntentContainer: 10 | def __init__(self): 11 | self.intent_lines, self.entity_lines = {}, {} 12 | self.intents, self.entities = {}, {} 13 | self.must_compile = True 14 | self.i = 0 15 | self.compile_lock = Lock() 16 | 17 | def add_intent(self, name, lines): 18 | with self.compile_lock: 19 | self.must_compile = True 20 | self.intent_lines[name] = lines 21 | 22 | def remove_intent(self, name): 23 | with self.compile_lock: 24 | self.must_compile = True 25 | if name in self.intent_lines: 26 | del self.intent_lines[name] 27 | 28 | def add_entity(self, name, lines): 29 | with self.compile_lock: 30 | self.must_compile = True 31 | self.entity_lines[name] = lines 32 | 33 | def remove_entity(self, name): 34 | with self.compile_lock: 35 | self.must_compile = True 36 | if name in self.entity_lines: 37 | del self.entity_lines[name] 38 | 39 | def _create_pattern(self, line): 40 | for pat, rep in ( 41 | # === Preserve Plain Parentheses === 42 | (r'\(([^\|)]*)\)', r'{~(\1)~}'), # (hi) -> {~(hi)~} 43 | 44 | # === Convert to regex literal === 45 | (r'(\W)', r'\\\1'), 46 | (r' {} '.format, None), # 'abc' -> ' abc ' 47 | 48 | # === Unescape Chars for Convenience === 49 | (r'\\ ', r' '), # "\ " -> " " 50 | (r'\\{', r'{'), # \{ -> { 51 | (r'\\}', r'}'), # \} -> } 52 | (r'\\#', r'#'), # \# -> # 53 | 54 | # === Support Parentheses Expansion === 55 | (r'(? ( ignoring \{\~\( 56 | (r'\\\)(?!\\~\\})', r')'), # \) -> ) ignoring \)\~\} 57 | (r'\\{\\~\\\(', r'\\('), # \{\~\( -> \( 58 | (r'\\\)\\~\\}', r'\\)'), # \)\~\} -> \) 59 | (r'\\\|', r'|'), # \| -> | 60 | 61 | # === Support Special Symbols === 62 | (r'(?<=\s)\\:0(?=\s)', r'\\w+'), 63 | (r'#', r'\\d'), 64 | (r'\d', r'\\d'), 65 | 66 | # === Space Word Separations === 67 | (r'(? a :b 68 | (r'([^\\\w\s{])(\w)', r'\1 \2'), # a :b -> a : b 69 | 70 | # === Make Symbols Optional === 71 | (r'(\\[^\w ])', r'\1?'), 72 | 73 | # === Force 1+ Space Between Words === 74 | (r'(?<=(\w|\}))(\\\s|\s)+(?=\S)', r'\\W+'), 75 | 76 | # === Force 0+ Space Between Everything Else === 77 | (r'\s+', r'\\W*'), 78 | ): 79 | if callable(pat): 80 | line = pat(line) 81 | else: 82 | line = re.sub(pat, rep, line) 83 | return line 84 | 85 | def _create_intent_pattern(self, line, intent_name): 86 | namespace = intent_name.split(':')[0] + ':' 87 | line = self._create_pattern(line) 88 | replacements = {} 89 | for ent_name in set(re.findall(r'{([a-z_:]+)}', line)): 90 | replacements[ent_name] = r'(?P<{}__{{}}>.*?\w.*?)'.format(ent_name) 91 | for ent_name, ent in self.entities.items(): 92 | ent_regex = r'(?P<{}__{{}}>{})' 93 | if ent_name.startswith(namespace): 94 | replacements[ent_name[len(namespace):]] = ent_regex.format( 95 | ent_name[len(namespace):], ent 96 | ) 97 | else: 98 | replacements[ent_name] = ent_regex.format(ent_name.replace(':', '__colon__'), ent) 99 | for key, value in replacements.items(): 100 | line = line.replace('{' + key + '}', value.format(self.i), 1) 101 | self.i += 1 102 | return '^{}$'.format(line) 103 | 104 | def _create_regex(self, line, intent_name): 105 | """ Create regex and return. If error occurs returns None. """ 106 | try: 107 | return re.compile(self._create_intent_pattern(line, intent_name), 108 | re.IGNORECASE) 109 | except sre_constants.error as e: 110 | LOG.warning('Failed to parse the line "{}" ' 111 | 'for {}'.format(line, intent_name)) 112 | return None 113 | 114 | def create_regexes(self, lines, intent_name): 115 | regexes = [self._create_regex(line, intent_name) 116 | for line in sorted(lines, key=len, reverse=True) 117 | if line.strip()] 118 | # Filter out all regexes that fails 119 | return [r for r in regexes if r is not None] 120 | 121 | def compile(self): 122 | with self.compile_lock: 123 | self._compile() 124 | 125 | def _compile(self): 126 | self.entities = { 127 | ent_name: r'({})'.format('|'.join( 128 | self._create_pattern(line) for line in lines if line.strip() 129 | )) 130 | for ent_name, lines in self.entity_lines.items() 131 | } 132 | self.intents = { 133 | intent_name: self.create_regexes(lines, intent_name) 134 | for intent_name, lines in self.intent_lines.items() 135 | } 136 | self.must_compile = False 137 | 138 | def _calc_entities(self, query, regexes): 139 | for regex in regexes: 140 | match = regex.match(query) 141 | if match: 142 | yield { 143 | k.rsplit('__', 1)[0].replace('__colon__', ':'): v.strip() 144 | for k, v in match.groupdict().items() if v 145 | } 146 | 147 | def calc_intents(self, query): 148 | query = ' ' + query + ' ' 149 | if self.must_compile: 150 | self.compile() 151 | for intent_name, regexes in self.intents.items(): 152 | entities = list(self._calc_entities(query, regexes)) 153 | if entities: 154 | yield { 155 | 'name': intent_name, 156 | 'entities': min(entities, key=lambda x: sum(map(len, x.values()))) 157 | } 158 | 159 | def calc_intent(self, query): 160 | return min( 161 | self.calc_intents(query), 162 | key=lambda x: sum(map(len, x['entities'].values())), 163 | default={'name': None, 'entities': {}} 164 | ) 165 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from setuptools import setup 4 | 5 | setup( 6 | name='padaos', 7 | version='0.1.10', 8 | description='A rigid, lightweight, dead-simple intent parser', 9 | url='http://github.com/MatthewScholefield/padaos', 10 | author='Matthew Scholefield', 11 | author_email='matthew331199@gmail.com', 12 | license='MIT', 13 | py_modules=[ 14 | 'padaos' 15 | ], 16 | zip_safe=True 17 | ) 18 | 19 | -------------------------------------------------------------------------------- /test_padaos.py: -------------------------------------------------------------------------------- 1 | from padaos import IntentContainer 2 | 3 | 4 | class TestIntentContainer: 5 | def setup(self): 6 | self.container = IntentContainer() 7 | 8 | def test(self): 9 | self.container.add_intent('hello', [ 10 | 'hello', 'hi', 'how are you', "what's up" 11 | ]) 12 | self.container.add_intent('buy', [ 13 | 'buy {item}', 'purchase {item}', 'get {item}', 'get {item} for me' 14 | ]) 15 | self.container.add_entity('item', [ 16 | 'milk', 'cheese' 17 | ]) 18 | self.container.add_intent('drive', [ 19 | 'drive me to {place}', 'take me to {place}', 'navigate to {place}' 20 | ]) 21 | self.container.add_intent('eat', [ 22 | 'eat {fruit}', 'eat some {fruit}', 'munch on (some|) {fruit}' 23 | ]) 24 | self.container.compile() 25 | assert self.container.calc_intent('hello')['name'] == 'hello' 26 | assert not self.container.calc_intent('bye')['name'] 27 | assert self.container.calc_intent('buy milk') == { 28 | 'name': 'buy', 'entities': {'item': 'milk'} 29 | } 30 | assert self.container.calc_intent('eat some bananas') == { 31 | 'name': 'eat', 'entities': {'fruit': 'bananas'} 32 | } 33 | 34 | def test_case(self): 35 | self.container.add_intent('test', ['Testing cAPitalizAtion']) 36 | assert self.container.calc_intent('teStiNg CapitalIzation')['name'] == 'test' 37 | 38 | def test_punctuation(self): 39 | self.container.add_intent('test', ['Test! Of: Punctuation']) 40 | assert self.container.calc_intent('test of !punctuation...')['name'] == 'test' 41 | 42 | def test_spaces(self): 43 | self.container.add_intent('test', ['this is a test']) 44 | assert self.container.calc_intent('thisisatest')['name'] is None 45 | self.container.add_intent('test2', ['this has(one|two)options']) 46 | assert self.container.calc_intent('this has two options')['name'] == 'test2' 47 | assert self.container.calc_intent('th is is a test')['name'] is None 48 | 49 | self.container.add_intent('test3', ['I see {thing} (in|on) {place}']) 50 | assert self.container.calc_intent('I see a bin test')['name'] is None 51 | assert self.container.calc_intent('I see a bin in there') == { 52 | 'name': 'test3', 'entities': {'thing': 'a bin', 'place': 'there'} 53 | } 54 | --------------------------------------------------------------------------------