├── .gitignore ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.rst ├── Vagrantfile ├── cahoots ├── __init__.py ├── confidence │ ├── __init__.py │ ├── normalizer.py │ └── normalizers │ │ ├── __init__.py │ │ ├── base.py │ │ ├── character.py │ │ ├── date.py │ │ ├── equation.py │ │ ├── number.py │ │ └── phone.py ├── config.py ├── data.py ├── data │ └── prepositions.yaml ├── parser.py ├── parsers │ ├── __init__.py │ ├── base.py │ ├── boolean.py │ ├── character.py │ ├── date.py │ ├── email.py │ ├── equation.py │ ├── location │ │ ├── __init__.py │ │ ├── address.py │ │ ├── coordinate.py │ │ ├── data │ │ │ ├── .gitignore │ │ │ ├── LICENSE │ │ │ ├── city.sql │ │ │ ├── city.txt.bz2 │ │ │ ├── country.csv.bz2 │ │ │ ├── country.sql │ │ │ ├── landmark.csv.bz2 │ │ │ ├── landmark.sql │ │ │ ├── location.sql │ │ │ ├── location.sqlite.dist │ │ │ ├── street_suffix.csv.bz2 │ │ │ └── street_suffix.sql │ │ ├── landmark.py │ │ └── postalcode.py │ ├── measurement │ │ ├── __init__.py │ │ └── units │ │ │ ├── imperial_area.yaml │ │ │ ├── imperial_length.yaml │ │ │ ├── imperial_mass.yaml │ │ │ ├── imperial_temperature.yaml │ │ │ ├── imperial_volume.yaml │ │ │ ├── metric_area.yaml │ │ │ ├── metric_length.yaml │ │ │ ├── metric_mass.yaml │ │ │ ├── metric_temperature.yaml │ │ │ ├── metric_volume.yaml │ │ │ └── misc_length.yaml │ ├── name.py │ ├── number.py │ ├── phone.py │ ├── programming │ │ ├── LICENSES │ │ │ ├── LICENSES_ACTIONSCRIPT │ │ │ ├── LICENSES_C │ │ │ ├── LICENSES_CPP │ │ │ ├── LICENSES_CS │ │ │ ├── LICENSES_JAVA │ │ │ ├── LICENSES_JAVASCRIPT │ │ │ ├── LICENSES_PERL │ │ │ ├── LICENSES_PHP │ │ │ ├── LICENSES_PYTHON │ │ │ ├── LICENSES_RUBY │ │ │ └── LICENSES_VB │ │ ├── __init__.py │ │ ├── bayesian.py │ │ ├── languages │ │ │ ├── actionscript.yaml │ │ │ ├── c.yaml │ │ │ ├── cpp.yaml │ │ │ ├── cs.yaml │ │ │ ├── java.yaml │ │ │ ├── javascript.yaml │ │ │ ├── perl.yaml │ │ │ ├── php.yaml │ │ │ ├── python.yaml │ │ │ ├── ruby.yaml │ │ │ └── vb.yaml │ │ ├── lexer.py │ │ └── trainers.zip │ └── uri.py ├── result.py └── util.py ├── cahootserver ├── __init__.py ├── config.py ├── out.py ├── server.py ├── static │ ├── cahoots.css │ ├── cahoots.js │ ├── dark_leather.png │ └── favicon.ico └── templates │ ├── home.html │ └── master.html ├── docs ├── Makefile ├── cahoots.confidence.normalizer.rst ├── cahoots.confidence.normalizers.base.rst ├── cahoots.confidence.normalizers.character.rst ├── cahoots.confidence.normalizers.date.rst ├── cahoots.confidence.normalizers.equation.rst ├── cahoots.confidence.normalizers.number.rst ├── cahoots.confidence.normalizers.phone.rst ├── cahoots.confidence.normalizers.rst ├── cahoots.confidence.rst ├── cahoots.config.rst ├── cahoots.data.rst ├── cahoots.parser.rst ├── cahoots.parsers.base.rst ├── cahoots.parsers.boolean.rst ├── cahoots.parsers.character.rst ├── cahoots.parsers.date.rst ├── cahoots.parsers.email.rst ├── cahoots.parsers.equation.rst ├── cahoots.parsers.location.address.rst ├── cahoots.parsers.location.coordinate.rst ├── cahoots.parsers.location.landmark.rst ├── cahoots.parsers.location.postalcode.rst ├── cahoots.parsers.location.rst ├── cahoots.parsers.measurement.rst ├── cahoots.parsers.name.rst ├── cahoots.parsers.number.rst ├── cahoots.parsers.phone.rst ├── cahoots.parsers.programming.bayesian.rst ├── cahoots.parsers.programming.lexer.rst ├── cahoots.parsers.programming.rst ├── cahoots.parsers.rst ├── cahoots.parsers.uri.rst ├── cahoots.result.rst ├── cahoots.rst ├── cahoots.util.rst ├── conf.py ├── index.rst └── make.bat ├── pylintrc ├── setup.py ├── setup ├── bashrc ├── dev_provision.sh ├── requirements.dev.txt ├── requirements.system.txt ├── requirements.txt ├── server_provision.sh └── vagrant_provision.sh └── tests ├── __init__.py ├── build.sh ├── confidence ├── __init__.py ├── normalizer.py └── normalizers │ ├── __init__.py │ ├── base.py │ ├── character.py │ ├── date.py │ ├── equation.py │ ├── number.py │ └── phone.py ├── config.py ├── data.py ├── parser.py ├── parsers ├── __init__.py ├── base.py ├── boolean.py ├── character.py ├── date.py ├── email.py ├── equation.py ├── location │ ├── __init__.py │ ├── address.py │ ├── coordinate.py │ ├── landmark.py │ └── postalcode.py ├── measurement.py ├── name.py ├── number.py ├── phone.py ├── programming │ ├── __init__.py │ ├── bayesian.py │ └── lexer.py └── uri.py ├── test.py └── util.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .vagrant 3 | .env 4 | .idea 5 | build 6 | .cover* 7 | simplebayes 8 | Cahoots.egg-info/ 9 | dist/ -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | cache: 3 | - apt 4 | - pip 5 | sudo: false 6 | python: 7 | - "2.7" 8 | - "3.4" 9 | install: 10 | - pip install -r setup/requirements.txt 11 | - pip install -r setup/requirements.dev.txt 12 | script: 13 | - nosetests tests/test.py --with-coverage --cover-package=cahoots --cover-min-percentage 100 14 | - flake8 cahoots tests cahootserver 15 | - pylint cahoots tests cahootserver 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2012-2015 Serenity Software, LLC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | include LICENSE 3 | include CHANGELOG -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | def Kernel.is_windows? 2 | processor, platform, *rest = RUBY_PLATFORM.split("-") 3 | platform == 'mingw32' 4 | end 5 | 6 | Vagrant.configure("2") do |config| 7 | 8 | config.vm.box = "pjcolp/trusty64" 9 | config.vm.network :public_network, :bridge => ENV['VAGRANT_BRIDGE'] 10 | config.vm.network :forwarded_port, guest: 8000, host: 8000 11 | config.vm.synced_folder ".", "/vagrant", type: "nfs" 12 | config.vm.provision :shell, :path => "setup/vagrant_provision.sh" 13 | config.ssh.username = "vagrant" 14 | config.ssh.shell = "bash -l" 15 | config.ssh.keep_alive = true 16 | config.ssh.forward_agent = true 17 | config.ssh.forward_x11 = true 18 | config.vagrant.host = :detect 19 | 20 | config.vm.provider :virtualbox do |virtualbox, override| 21 | virtualbox.customize ["modifyvm", :id, "--name", "cahoots"] 22 | virtualbox.customize ["modifyvm", :id, "--natdnshostresolver1", "on"] 23 | virtualbox.customize ["modifyvm", :id, "--natdnsproxy1", "on"] 24 | virtualbox.customize ["modifyvm", :id, "--memory", 2048] 25 | end 26 | 27 | config.vm.provider "hyperv" do |hv| 28 | hv.memory = 2048 29 | hv.vmname = 'cahoots' 30 | hv.cpus = 2 31 | hv.ip_address_timeout = 300 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /cahoots/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | -------------------------------------------------------------------------------- /cahoots/confidence/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | -------------------------------------------------------------------------------- /cahoots/confidence/normalizer.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | 25 | 26 | class HierarchicalNormalizerChain(object): 27 | """ 28 | Orchestrates the normalization of confidence values based 29 | on the presence of other result types. 30 | """ 31 | 32 | def __init__(self, config, types, all_types): 33 | """ 34 | :param config: cahoots config 35 | :type config: cahoots.config.BaseConfig 36 | :param types: list of result types 37 | :type types: list 38 | :param all_types: list of result types + subtypes 39 | :type all_types: list 40 | """ 41 | self.config = config 42 | self.types = types 43 | self.all_types = all_types 44 | 45 | def normalize(self, results): 46 | """ 47 | Runs all normalizers against the result set 48 | 49 | :param results: list of the parse result objects 50 | :type results: list 51 | :return: the parse result list, normalized 52 | :rtype: list 53 | """ 54 | for normalizer in \ 55 | [n for n in self.config.enabled_confidence_normalizers if 56 | n.test(self.types, self.all_types)]: 57 | results = normalizer.normalize(results) 58 | 59 | # returning only results that have a condfidence greater than 0 60 | return [res for res in results if res.confidence > 0] 61 | -------------------------------------------------------------------------------- /cahoots/confidence/normalizers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | -------------------------------------------------------------------------------- /cahoots/confidence/normalizers/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | 25 | 26 | class BaseNormalizer(object): 27 | """Interface for Normalizer classes""" 28 | 29 | @staticmethod 30 | def test(types, all_types): 31 | """ 32 | Tests if we should attempt to normalize/alter this value 33 | 34 | :param types: list of result types 35 | :type types: list 36 | :param all_types: list of result types + subtypes 37 | :type all_types: list 38 | :return: if this normalizer should normalize this result set 39 | :rtype: bool 40 | """ 41 | raise NotImplementedError("Class must override the test() method") 42 | 43 | @staticmethod 44 | def normalize(results): 45 | """ 46 | Normalizes confidence based on various other results 47 | 48 | :param results: list of results we want to normalize 49 | :type results: list 50 | :return: the normalized results 51 | :rtype: list 52 | """ 53 | raise NotImplementedError( 54 | "Class must override the normalize() method" 55 | ) 56 | -------------------------------------------------------------------------------- /cahoots/confidence/normalizers/character.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | from cahoots.confidence.normalizers.base import BaseNormalizer 25 | 26 | 27 | class CharacterWithoutBoolean(BaseNormalizer): 28 | """If we get a character and not a boolean, we boost char confidence""" 29 | 30 | @staticmethod 31 | def test(types, _): 32 | """ 33 | We want to normalize if there is a character and not a boolean 34 | 35 | :param types: list of result types 36 | :type types: list 37 | :param all_types: list of result types + subtypes 38 | :type all_types: list 39 | :return: if this normalizer should normalize this result set 40 | :rtype: bool 41 | """ 42 | return 'Character' in types and 'Boolean' not in types 43 | 44 | @staticmethod 45 | def normalize(results): 46 | """ 47 | setting char confidence to 100% if there's no boolean result 48 | 49 | :param results: list of results we want to normalize 50 | :type results: list 51 | :return: the normalized results 52 | :rtype: list 53 | """ 54 | for result in [r for r in results if r.type == 'Character']: 55 | result.confidence = 100 56 | 57 | return results 58 | -------------------------------------------------------------------------------- /cahoots/confidence/normalizers/date.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | from cahoots.confidence.normalizers.base import BaseNormalizer 25 | from cahoots.result import ParseResult 26 | 27 | 28 | class DateWithPostalCode(BaseNormalizer): 29 | """If we get a date and a postal code, the date gets higher confidence""" 30 | 31 | @staticmethod 32 | def test(types, _): 33 | """ 34 | We want to normalize if there are Numbers as well as non numbers 35 | 36 | :param types: list of result types 37 | :type types: list 38 | :param all_types: list of result types + subtypes 39 | :type all_types: list 40 | :return: if this normalizer should normalize this result set 41 | :rtype: bool 42 | """ 43 | return 'Date' in types and 'Postal Code' in types 44 | 45 | @staticmethod 46 | def normalize(results): 47 | """ 48 | If we don't just have numbers, we cut our confidence in half. 49 | 50 | :param results: list of results we want to normalize 51 | :type results: list 52 | :return: the normalized results 53 | :rtype: list 54 | """ 55 | postal_code = None 56 | date = None 57 | 58 | for result in \ 59 | [r for r in results if r.type in ['Date', 'Postal Code']]: 60 | 61 | if result.type == 'Date': 62 | date = result 63 | elif result.type == 'Postal Code': 64 | postal_code = result 65 | 66 | assert isinstance(date, ParseResult) 67 | assert isinstance(postal_code, ParseResult) 68 | 69 | date.confidence = min(70, postal_code.confidence+4) 70 | 71 | return results 72 | -------------------------------------------------------------------------------- /cahoots/confidence/normalizers/equation.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | from cahoots.confidence.normalizers.base import BaseNormalizer 25 | 26 | 27 | class EquationWithPhonePostalCode(BaseNormalizer): 28 | """Normalizes Equations scores if there's a phone/postal code present""" 29 | 30 | @staticmethod 31 | def test(types, _): 32 | """ 33 | We want to normalize if there is an equation and a phone/postal code 34 | 35 | :param types: list of result types 36 | :type types: list 37 | :param all_types: list of result types + subtypes 38 | :type all_types: list 39 | :return: if this normalizer should normalize this result set 40 | :rtype: bool 41 | """ 42 | return 'Equation' in types and \ 43 | ('Postal Code' in types or 'Phone' in types) 44 | 45 | @staticmethod 46 | def normalize(results): 47 | """ 48 | 15 point confidence hit if this equation is also a phone/postal code 49 | 50 | :param results: list of results we want to normalize 51 | :type results: list 52 | :return: the normalized results 53 | :rtype: list 54 | """ 55 | for result in [r for r in results if r.type == 'Equation']: 56 | result.confidence -= 15 57 | 58 | return results 59 | -------------------------------------------------------------------------------- /cahoots/confidence/normalizers/number.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | from cahoots.confidence.normalizers.base import BaseNormalizer 25 | 26 | 27 | class NumberWithNonNumbers(BaseNormalizer): 28 | """Normalizes All Number Results Where Non-Numbers Are Present""" 29 | 30 | @staticmethod 31 | def test(types, _): 32 | """ 33 | We want to normalize if there are Numbers as well as non numbers 34 | 35 | :param types: list of result types 36 | :type types: list 37 | :param all_types: list of result types + subtypes 38 | :type all_types: list 39 | :return: if this normalizer should normalize this result set 40 | :rtype: bool 41 | """ 42 | others = [t for t in types if t != 'Number'] 43 | return len(others) > 0 and len(others) != len(types) 44 | 45 | @staticmethod 46 | def normalize(results): 47 | """ 48 | If we don't just have numbers, we cut our confidence in half. 49 | 50 | :param results: list of results we want to normalize 51 | :type results: list 52 | :return: the normalized results 53 | :rtype: list 54 | """ 55 | for result in [r for r in results if r.type == 'Number']: 56 | result.confidence = int(result.confidence * 0.5) 57 | 58 | return results 59 | 60 | 61 | class IntOctWithPhoneDatePostalCode(BaseNormalizer): 62 | """Normalizes Int/Oct where Phone, Date, or Postal Code Are Present""" 63 | 64 | @staticmethod 65 | def test(_, types): 66 | """ 67 | We want to normalize if there are Numbers as well as non numbers 68 | 69 | :param types: list of result types 70 | :type types: list 71 | :param all_types: list of result types + subtypes 72 | :type all_types: list 73 | :return: if this normalizer should normalize this result set 74 | :rtype: bool 75 | """ 76 | intersections = { 77 | 'alter': ['Integer', 'Octal'], 78 | 'search': ['Postal Code', 'Date', 'Phone'] 79 | } 80 | 81 | if set(intersections['alter']).intersection(types) and \ 82 | set(intersections['search']).intersection(types): 83 | return True 84 | return False 85 | 86 | @staticmethod 87 | def normalize(results): 88 | """ 89 | Significantly hitting items that qualify 90 | 91 | :param results: list of results we want to normalize 92 | :type results: list 93 | :return: the normalized results 94 | :rtype: list 95 | """ 96 | for result in \ 97 | [r for r in results if r.subtype in ['Integer', 'Octal']]: 98 | 99 | result.confidence = \ 100 | 5 if result.subtype == 'Octal' else 10 101 | 102 | return results 103 | -------------------------------------------------------------------------------- /cahoots/confidence/normalizers/phone.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | from cahoots.confidence.normalizers.base import BaseNormalizer 25 | 26 | 27 | class PhoneWithUri(BaseNormalizer): 28 | """Normalizes phone scores if there's a uri present""" 29 | 30 | @staticmethod 31 | def test(types, _): 32 | """ 33 | We want to normalize if there is an phone and a uri 34 | 35 | :param types: list of result types 36 | :type types: list 37 | :param all_types: list of result types + subtypes 38 | :type all_types: list 39 | :return: if this normalizer should normalize this result set 40 | :rtype: bool 41 | """ 42 | return 'Phone' in types and 'URI' in types 43 | 44 | @staticmethod 45 | def normalize(results): 46 | """ 47 | 25 point confidence hit if this phone is also a uri 48 | 49 | :param results: list of results we want to normalize 50 | :type results: list 51 | :return: the normalized results 52 | :rtype: list 53 | """ 54 | for result in [r for r in results if r.type == 'Phone']: 55 | result.confidence -= 25 56 | 57 | return results 58 | -------------------------------------------------------------------------------- /cahoots/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # Cahoots Parsers 25 | from cahoots.parsers.boolean import BooleanParser 26 | from cahoots.parsers.character import CharacterParser 27 | from cahoots.parsers.date import DateParser 28 | from cahoots.parsers.email import EmailParser 29 | from cahoots.parsers.equation import EquationParser 30 | from cahoots.parsers.measurement import MeasurementParser 31 | from cahoots.parsers.name import NameParser 32 | from cahoots.parsers.number import NumberParser 33 | from cahoots.parsers.phone import PhoneParser 34 | from cahoots.parsers.programming import ProgrammingParser 35 | from cahoots.parsers.uri import URIParser 36 | from cahoots.parsers.location.address import AddressParser 37 | from cahoots.parsers.location.coordinate import CoordinateParser 38 | from cahoots.parsers.location.landmark import LandmarkParser 39 | from cahoots.parsers.location.postalcode import PostalCodeParser 40 | # Hierarchical Confidence Normalizers 41 | from cahoots.confidence.normalizers.character import CharacterWithoutBoolean 42 | from cahoots.confidence.normalizers.date import DateWithPostalCode 43 | from cahoots.confidence.normalizers.equation import \ 44 | EquationWithPhonePostalCode 45 | from cahoots.confidence.normalizers.number import \ 46 | IntOctWithPhoneDatePostalCode, NumberWithNonNumbers 47 | from cahoots.confidence.normalizers.phone import PhoneWithUri 48 | 49 | 50 | class BaseConfig(object): 51 | """ 52 | Cahoots Configuration 53 | Change this file to suit your installation's needs. 54 | """ 55 | 56 | # Are we in debug mode? 57 | debug = False 58 | 59 | """ 60 | To disable a module, simply comment it out of this list. 61 | Be aware that modules may have soft-dependencies on one another, 62 | and a disabled module may impact results results from other 63 | modules which seek its "council." This may result in unexpected 64 | confidence scores, a change in result determination, etc. 65 | """ 66 | enabled_modules = [ 67 | AddressParser, 68 | BooleanParser, 69 | CharacterParser, 70 | CoordinateParser, 71 | DateParser, 72 | EmailParser, 73 | EquationParser, 74 | LandmarkParser, 75 | MeasurementParser, 76 | NameParser, 77 | NumberParser, 78 | PhoneParser, 79 | PostalCodeParser, 80 | ProgrammingParser, 81 | URIParser, 82 | ] 83 | 84 | enabled_confidence_normalizers = [ 85 | CharacterWithoutBoolean, 86 | DateWithPostalCode, 87 | EquationWithPhonePostalCode, 88 | IntOctWithPhoneDatePostalCode, 89 | NumberWithNonNumbers, 90 | PhoneWithUri, 91 | ] 92 | -------------------------------------------------------------------------------- /cahoots/data.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | from SereneRegistry import registry 25 | import yaml 26 | import os 27 | 28 | 29 | class DataHandler(object): 30 | """Handles the lookup of pieces of data from the data/ directory""" 31 | 32 | def __init__(self): 33 | self.path = os.path.dirname(os.path.abspath(__file__)) + '/data/' 34 | 35 | def get_file_handle(self, filename): 36 | """ 37 | Gets a requested file handle 38 | 39 | :param filename: The location of the file we want to load 40 | :type filename: str 41 | :return: file handle 42 | :rtype: file 43 | """ 44 | file_handle = open(self.path + filename, 'r') 45 | return file_handle 46 | 47 | def get_prepositions(self): 48 | """ 49 | returns the list of prepositions 50 | 51 | :return: list of prepositions 52 | :rtype: list 53 | """ 54 | if registry.test('DATA_prepositions'): 55 | return registry.get('DATA_prepositions') 56 | 57 | handle = self.get_file_handle('prepositions.yaml') 58 | prepositions = yaml.load(handle) 59 | handle.close() 60 | 61 | registry.set('DATA_prepositions', prepositions) 62 | return prepositions 63 | -------------------------------------------------------------------------------- /cahoots/data/prepositions.yaml: -------------------------------------------------------------------------------- 1 | - aboard 2 | - about 3 | - above 4 | - across 5 | - after 6 | - against 7 | - along 8 | - amid 9 | - among 10 | - anti 11 | - around 12 | - as 13 | - at 14 | - before 15 | - behind 16 | - below 17 | - beneath 18 | - beside 19 | - besides 20 | - between 21 | - beyond 22 | - but 23 | - by 24 | - concerning 25 | - considering 26 | - despite 27 | - down 28 | - during 29 | - except 30 | - excepting 31 | - excluding 32 | - following 33 | - for 34 | - from 35 | - in 36 | - inside 37 | - into 38 | - like 39 | - minus 40 | - near 41 | - of 42 | - 'off' 43 | - 'on' 44 | - onto 45 | - opposite 46 | - outside 47 | - over 48 | - past 49 | - per 50 | - plus 51 | - regarding 52 | - round 53 | - save 54 | - since 55 | - than 56 | - through 57 | - to 58 | - toward 59 | - towards 60 | - under 61 | - underneath 62 | - unlike 63 | - until 64 | - up 65 | - upon 66 | - versus 67 | - via 68 | - with 69 | - within 70 | - without -------------------------------------------------------------------------------- /cahoots/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | -------------------------------------------------------------------------------- /cahoots/parsers/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | from cahoots.result import ParseResult 25 | 26 | 27 | class BaseParser(object): 28 | '''Base parser that other parsers should extend''' 29 | 30 | config = None 31 | type = "Base" 32 | confidence = 10 33 | 34 | def __init__(self, config, p_type="Base", confidence=10): 35 | """ 36 | :param config: cahoots config 37 | :type config: cahoots.config.BaseConfig 38 | :param p_type: parser type 39 | :type p_type: str 40 | :param confidence: result confidence default 41 | :type confidence: int 42 | """ 43 | self.config = config 44 | self.type = p_type 45 | self.confidence = confidence 46 | 47 | @staticmethod 48 | def bootstrap(config): 49 | """ 50 | This method is statically called to bootstrap a parser 51 | 52 | :param config: cahoots config 53 | :type config: cahoots.config.BaseConfig 54 | """ 55 | pass 56 | 57 | def parse(self, data): 58 | """ 59 | Base parse method 60 | 61 | :param data: the string we want to parse 62 | :type data: str 63 | :return: yields parse result(s) if there are any 64 | :rtype: ParseResult 65 | """ 66 | raise NotImplementedError("Class must override the parse() method") 67 | 68 | def result(self, subtype="Unknown", confidence=0, value=None, data=None): 69 | """ 70 | Returns a ParseResult object detailing the results of parsing 71 | 72 | :param subtype: parse result subtype 73 | :type subtype: str 74 | :param confidence: how confident we are in this result (1-100) 75 | :type confidence: int 76 | :param value: representation of the parsed data 77 | :type value: mixed 78 | :param additional_data: any additional data a parser wants to provide 79 | :type additional_data: mixed 80 | :return: parse result 81 | :rtype: ParseResult 82 | """ 83 | if confidence == 0: 84 | confidence = self.confidence 85 | 86 | if data is None: 87 | data = {} 88 | 89 | return ParseResult( 90 | p_type=self.type, 91 | subtype=subtype, 92 | confidence=confidence, 93 | value=value, 94 | additional_data=data 95 | ) 96 | -------------------------------------------------------------------------------- /cahoots/parsers/boolean.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | from cahoots.parsers.base import BaseParser 25 | 26 | 27 | class BooleanParser(BaseParser): 28 | '''Determines if given data is a boolean value''' 29 | 30 | strongTrue = ["true", "yes"] 31 | mediumTrue = ["yep", "yup"] 32 | weakTrue = ["1", "t", "one"] 33 | 34 | strongFalse = ["false", "no"] 35 | mediumFalse = ["nope"] 36 | weakFalse = ["0", "f", "zero"] 37 | 38 | def __init__(self, config): 39 | """ 40 | :param config: cahoots config 41 | :type config: cahoots.config.BaseConfig 42 | """ 43 | BaseParser.__init__(self, config, "Boolean") 44 | 45 | @classmethod 46 | def is_true(cls, data): 47 | """ 48 | Checks if a value is true 49 | 50 | :param data: potential true value 51 | :type data: str 52 | :return: if it's true or not 53 | :rtype: bool 54 | """ 55 | if data in cls.strongTrue: 56 | return 100 57 | elif data in cls.mediumTrue: 58 | return 75 59 | elif data in cls.weakTrue: 60 | return 50 61 | return 0 62 | 63 | @classmethod 64 | def is_false(cls, data): 65 | """ 66 | Checks if a value is false 67 | 68 | :param data: potential false value 69 | :type data: str 70 | :return: if it's false or not 71 | :rtype: bool 72 | """ 73 | if data in cls.strongFalse: 74 | return 100 75 | elif data in cls.mediumFalse: 76 | return 75 77 | elif data in cls.weakFalse: 78 | return 50 79 | return 0 80 | 81 | def parse(self, data): 82 | """ 83 | parses for booleans 84 | 85 | :param data: the string we want to parse 86 | :type data: str 87 | :return: yields parse result(s) if there are any 88 | :rtype: ParseResult 89 | """ 90 | data = data.lower() 91 | 92 | # The largest boolean "value" we have is 5 characters long. 93 | if len(data) > 5: 94 | return 95 | 96 | # Testing for true 97 | confidence = self.is_true(data) 98 | if confidence: 99 | yield self.result("True", confidence, True) 100 | 101 | # Testing for false 102 | confidence = self.is_false(data) 103 | if confidence: 104 | yield self.result("False", confidence, False) 105 | -------------------------------------------------------------------------------- /cahoots/parsers/character.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | from cahoots.parsers.base import BaseParser 25 | import string 26 | 27 | 28 | class CharacterParser(BaseParser): 29 | '''Determines if given data is a character''' 30 | 31 | def __init__(self, config): 32 | """ 33 | :param config: cahoots config 34 | :type config: cahoots.config.BaseConfig 35 | """ 36 | BaseParser.__init__(self, config, "Character", 25) 37 | 38 | @classmethod 39 | def is_letter(cls, data): 40 | """ 41 | Checks if input is a letter 42 | 43 | :param data: data that might be a letter 44 | :type data: str 45 | :return: if it's a letter or not 46 | :rtype: bool 47 | """ 48 | if data in string.ascii_letters: 49 | return True 50 | 51 | return False 52 | 53 | @classmethod 54 | def is_punctuation(cls, data): 55 | """ 56 | Checks if input is punctuation 57 | 58 | :param data: data that might be punctuation 59 | :type data: str 60 | :return: if it's punctuation or not 61 | :rtype: bool 62 | """ 63 | if data in string.punctuation: 64 | return True 65 | 66 | return False 67 | 68 | @classmethod 69 | def is_whitespace(cls, data): 70 | """ 71 | Checks if input is whitespace 72 | 73 | :param data: data that might be whitespace 74 | :type data: str 75 | :return: if it's whitespace or not 76 | :rtype: bool 77 | """ 78 | if data in string.whitespace: 79 | return True 80 | 81 | return False 82 | 83 | def parse(self, data): 84 | """ 85 | parses for characters 86 | 87 | :param data: the string we want to parse 88 | :type data: str 89 | :return: yields parse result(s) if there are any 90 | :rtype: ParseResult 91 | """ 92 | if len(data) != 1: 93 | return 94 | 95 | character_data = { 96 | 'char-code': ord(data) 97 | } 98 | 99 | if self.is_letter(data): 100 | yield self.result("Letter", data=character_data) 101 | 102 | elif self.is_punctuation(data): 103 | yield self.result("Punctuation", data=character_data) 104 | 105 | elif self.is_whitespace(data): 106 | yield self.result("Whitespace", data=character_data) 107 | -------------------------------------------------------------------------------- /cahoots/parsers/email.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | from cahoots.parsers.base import BaseParser 25 | from SereneRegistry import registry 26 | from validate_email import VALID_ADDRESS_REGEXP 27 | import re 28 | 29 | 30 | class EmailParser(BaseParser): 31 | '''Determines if given data is an email address''' 32 | 33 | def __init__(self, config): 34 | """ 35 | :param config: cahoots config 36 | :type config: cahoots.config.BaseConfig 37 | """ 38 | BaseParser.__init__(self, config, "Email", 100) 39 | 40 | @staticmethod 41 | def bootstrap(config): 42 | """ 43 | This method is statically called to bootstrap a parser 44 | 45 | :param config: cahoots config 46 | :type config: cahoots.config.BaseConfig 47 | """ 48 | email_regex = re.compile(VALID_ADDRESS_REGEXP) 49 | registry.set('EP_valid_regex', email_regex) 50 | 51 | def parse(self, data_string): 52 | """ 53 | parses for email addresses 54 | 55 | :param data_string: the string we want to parse 56 | :type data_string: str 57 | :return: yields parse result(s) if there are any 58 | :rtype: ParseResult 59 | """ 60 | if len(data_string) > 254 or '@' not in data_string: 61 | return 62 | 63 | if registry.get('EP_valid_regex').match(data_string): 64 | yield self.result("Email Address", self.confidence) 65 | -------------------------------------------------------------------------------- /cahoots/parsers/location/data/.gitignore: -------------------------------------------------------------------------------- 1 | location.sqlite -------------------------------------------------------------------------------- /cahoots/parsers/location/data/LICENSE: -------------------------------------------------------------------------------- 1 | ----------------------------- 2 | GeoNames Postal Code Database 3 | ----------------------------- 4 | 5 | This work is licensed under a Creative Commons Attribution 3.0 License. 6 | This means you can use the dump as long as you give credit to geonames (a link on your website to www.geonames.org is ok) 7 | see http://creativecommons.org/licenses/by/3.0/ 8 | UK: Contains Royal Mail data Royal Mail copyright and database right 2010. 9 | The Data is provided "as is" without warranty or any representation of accuracy, timeliness or completeness. 10 | 11 | This readme describes the GeoNames Postal Code dataset. 12 | The main GeoNames gazetteer data extract is here: http://download.geonames.org/export/dump/ 13 | 14 | 15 | For many countries lat/lng are determined with an algorithm that searches the place names in the main geonames database 16 | using administrative divisions and numerical vicinity of the postal codes as factors in the disambiguation of place names. 17 | For postal codes and place name for which no corresponding toponym in the main geonames database could be found an average 18 | lat/lng of 'neighbouring' postal codes is calculated. 19 | Please let us know if you find any errors in the data set. Thanks 20 | 21 | For Canada we have only the first letters of the full postal codes (for copyright reasons) 22 | 23 | The Argentina data file contains 4-digit postal codes which were replaced with a new system in 1999. 24 | 25 | For Brazil only major postal codes are available (only the codes ending with -000 and the major code per municipality). 26 | 27 | For India the lat/lng accuracy is not yet comparable to other countries. 28 | 29 | The data format is tab-delimited text in utf8 encoding, with the following fields : 30 | 31 | country code : iso country code, 2 characters 32 | postal code : varchar(20) 33 | place name : varchar(180) 34 | admin name1 : 1. order subdivision (state) varchar(100) 35 | admin code1 : 1. order subdivision (state) varchar(20) 36 | admin name2 : 2. order subdivision (county/province) varchar(100) 37 | admin code2 : 2. order subdivision (county/province) varchar(20) 38 | admin name3 : 3. order subdivision (community) varchar(100) 39 | admin code3 : 3. order subdivision (community) varchar(20) 40 | latitude : estimated latitude (wgs84) 41 | longitude : estimated longitude (wgs84) 42 | accuracy : accuracy of lat/lng from 1=estimated to 6=centroid -------------------------------------------------------------------------------- /cahoots/parsers/location/data/city.sql: -------------------------------------------------------------------------------- 1 | .mode csv 2 | .separator "\t" 3 | .import cahoots/parsers/location/data/city.txt city -------------------------------------------------------------------------------- /cahoots/parsers/location/data/city.txt.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SerenitySoftware/cahoots/866336c51436343ff5e56f83f89dddc82a5693a3/cahoots/parsers/location/data/city.txt.bz2 -------------------------------------------------------------------------------- /cahoots/parsers/location/data/country.csv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SerenitySoftware/cahoots/866336c51436343ff5e56f83f89dddc82a5693a3/cahoots/parsers/location/data/country.csv.bz2 -------------------------------------------------------------------------------- /cahoots/parsers/location/data/country.sql: -------------------------------------------------------------------------------- 1 | .mode csv 2 | .import cahoots/parsers/location/data/country.csv country -------------------------------------------------------------------------------- /cahoots/parsers/location/data/landmark.csv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SerenitySoftware/cahoots/866336c51436343ff5e56f83f89dddc82a5693a3/cahoots/parsers/location/data/landmark.csv.bz2 -------------------------------------------------------------------------------- /cahoots/parsers/location/data/landmark.sql: -------------------------------------------------------------------------------- 1 | .mode csv 2 | .import cahoots/parsers/location/data/landmark.csv landmark -------------------------------------------------------------------------------- /cahoots/parsers/location/data/location.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `city` ( 2 | `country` TEXT collate nocase, 3 | `postal_code` TEXT collate nocase, 4 | `city` TEXT collate nocase, 5 | `state1` TEXT collate nocase, 6 | `state2` TEXT collate nocase, 7 | `province1` TEXT, 8 | `province2` TEXT, 9 | `community1` TEXT, 10 | `community2` TEXT, 11 | `latitude` REAL, 12 | `longitude` REAL, 13 | `coord_accuracy` INTEGER 14 | ); 15 | CREATE INDEX idx_country on city (country collate nocase); 16 | CREATE INDEX idx_postal_code on city (postal_code collate nocase); 17 | CREATE INDEX idx_city on city (city collate nocase); 18 | CREATE INDEX idx_state1 on city (state1 collate nocase); 19 | CREATE INDEX idx_state2 on city (state2 collate nocase); 20 | 21 | CREATE TABLE `country` ( 22 | `abbreviation` TEXT collate nocase, 23 | `name` TEXT collate nocase 24 | ); 25 | CREATE INDEX idx_abbreviation on country (abbreviation collate nocase); 26 | CREATE INDEX idx_name on country (name collate nocase); 27 | 28 | CREATE TABLE `street_suffix` ( 29 | `suffix_name` TEXT collate nocase 30 | ); 31 | CREATE INDEX idx_suffix_name on street_suffix (suffix_name collate nocase); 32 | 33 | CREATE TABLE `landmark` ( 34 | `resource` TEXT collate nocase, 35 | `address` TEXT, 36 | `city` TEXT, 37 | `county` TEXT, 38 | `state` TEXT, 39 | `country` TEXT 40 | ); 41 | CREATE INDEX idx_resource on landmark (resource collate nocase); 42 | -------------------------------------------------------------------------------- /cahoots/parsers/location/data/location.sqlite.dist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SerenitySoftware/cahoots/866336c51436343ff5e56f83f89dddc82a5693a3/cahoots/parsers/location/data/location.sqlite.dist -------------------------------------------------------------------------------- /cahoots/parsers/location/data/street_suffix.csv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SerenitySoftware/cahoots/866336c51436343ff5e56f83f89dddc82a5693a3/cahoots/parsers/location/data/street_suffix.csv.bz2 -------------------------------------------------------------------------------- /cahoots/parsers/location/data/street_suffix.sql: -------------------------------------------------------------------------------- 1 | .mode csv 2 | .import cahoots/parsers/location/data/street_suffix.csv street_suffix -------------------------------------------------------------------------------- /cahoots/parsers/location/landmark.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | from cahoots.parsers.base import BaseParser 25 | from cahoots.parsers.location import LocationDatabase, LandmarkEntity 26 | from cahoots.parsers.name import NameParser 27 | from SereneRegistry import registry 28 | import sqlite3 29 | import string 30 | import re 31 | 32 | 33 | class LandmarkParser(BaseParser): 34 | """ 35 | Determines if given data is a landmark 36 | 37 | This is dependent on if the landmark is in our database of landmarks. 38 | """ 39 | 40 | def __init__(self, config): 41 | """ 42 | :param config: cahoots config 43 | :type config: cahoots.config.BaseConfig 44 | """ 45 | BaseParser.__init__(self, config, "Landmark", 100) 46 | 47 | @staticmethod 48 | def bootstrap(config): 49 | """ 50 | This method is statically called to bootstrap a parser 51 | 52 | :param config: cahoots config 53 | :type config: cahoots.config.BaseConfig 54 | """ 55 | the_regex = re.compile('^the ', re.IGNORECASE) 56 | registry.set('LP_the_regex', the_regex) 57 | 58 | @classmethod 59 | def find_matching_landmarks(cls, data): 60 | """ 61 | Looks in the database for landmarks matching datastring 62 | 63 | :param data: string we want to check against landmarks 64 | :type data: str 65 | :return: list of landmarks 66 | :rtype: list 67 | """ 68 | database = LocationDatabase.get_database() 69 | cursor = database.cursor() 70 | 71 | try: 72 | rows = cursor.execute( 73 | 'SELECT * FROM landmark WHERE resource like ?', 74 | (data + '%',) 75 | ).fetchall() 76 | entities = LocationDatabase.hydrate(rows, LandmarkEntity) 77 | entities = \ 78 | LocationDatabase.substitute_country_data(entities, cursor) 79 | entities = [vars(x) for x in entities] 80 | database.close() 81 | except sqlite3.Error: 82 | database.close() 83 | return 84 | 85 | return entities 86 | 87 | def prepare_landmark_datastring(self, data): 88 | """ 89 | Cleans up and validates the datastring 90 | 91 | :param data: data we want to check for being a location 92 | :type data: str 93 | :return: the cleaned up datastring 94 | :rtype: str 95 | """ 96 | data = registry.get('LP_the_regex').sub('', data).strip() 97 | 98 | if len(data) > 75: 99 | return 100 | 101 | name_parser = NameParser(self.config) 102 | if not name_parser.basic_validation(data.split()): 103 | return 104 | 105 | allowed_chars = \ 106 | string.whitespace + string.ascii_letters + string.digits 107 | allowed_chars += '.,-:' 108 | 109 | if [x for x in data if x not in allowed_chars]: 110 | return 111 | 112 | return data 113 | 114 | def parse(self, data): 115 | """ 116 | parses for landmarks 117 | 118 | :param data: the string we want to parse 119 | :type data: str 120 | :return: yields parse result(s) if there are any 121 | :rtype: ParseResult 122 | """ 123 | data = self.prepare_landmark_datastring(data) 124 | 125 | if not data: 126 | return 127 | 128 | entities = self.find_matching_landmarks(data) 129 | 130 | if entities: 131 | self.confidence = \ 132 | 95-(2*len(entities)) if len(entities) > 1 else 95 133 | subtype = 'Single' if len(entities) == 1 else 'Multiple' 134 | yield self.result(subtype, self.confidence, entities) 135 | -------------------------------------------------------------------------------- /cahoots/parsers/measurement/units/imperial_area.yaml: -------------------------------------------------------------------------------- 1 | system: Imperial 2 | type: Area 3 | id: imperial_area 4 | keywords: 5 | - acre 6 | - acres 7 | - rood 8 | - roods 9 | - perch 10 | - perches 11 | - sqin 12 | - sq in 13 | - sq/in 14 | - sqft 15 | - sq ft 16 | - sq/ft 17 | - sqyd 18 | - sq yd 19 | - sq/yd 20 | - square foot 21 | - square feet 22 | - square inch 23 | - square inches 24 | - square yard 25 | - square yards 26 | - square thou 27 | - square thous 28 | - square chain 29 | - square chains 30 | - square furlong 31 | - square furlongs 32 | - square mile 33 | - square miles 34 | - square league 35 | - square leagues 36 | - square fathom 37 | - square fathoms 38 | - square cable 39 | - square cables 40 | - square nautical mile 41 | - square nautical miles 42 | - square link 43 | - square links 44 | - square rod 45 | - square rods -------------------------------------------------------------------------------- /cahoots/parsers/measurement/units/imperial_length.yaml: -------------------------------------------------------------------------------- 1 | system: Imperial 2 | type: Length 3 | id: imperial_length 4 | keywords: 5 | - '"' 6 | - "'" 7 | - yard 8 | - yards 9 | - foot 10 | - feet 11 | - inch 12 | - inches 13 | - thou 14 | - thous 15 | - chain 16 | - chains 17 | - furlong 18 | - furlongs 19 | - mile 20 | - miles 21 | - league 22 | - leagues 23 | - fathom 24 | - fathoms 25 | - cable 26 | - cables 27 | - nautical mile 28 | - nautical miles 29 | - link 30 | - links 31 | - rod 32 | - rods -------------------------------------------------------------------------------- /cahoots/parsers/measurement/units/imperial_mass.yaml: -------------------------------------------------------------------------------- 1 | system: Imperial 2 | type: Mass 3 | id: imperial_mass 4 | keywords: 5 | - grain 6 | - grains 7 | - drachm 8 | - drachms 9 | - ounce 10 | - ounces 11 | - pound 12 | - pounds 13 | - lb 14 | - lbs 15 | - stone 16 | - stones 17 | - ton 18 | - tons -------------------------------------------------------------------------------- /cahoots/parsers/measurement/units/imperial_temperature.yaml: -------------------------------------------------------------------------------- 1 | system: Imperial 2 | type: Temperature 3 | id: imperial_temperature 4 | keywords: 5 | - fahrenheit 6 | - deg fahrenheit 7 | - degrees fahrenheit 8 | - degree fahrenheit 9 | - °F 10 | - °Fahrenheit 11 | - ° F 12 | - ° Fahrenheit 13 | - deg F -------------------------------------------------------------------------------- /cahoots/parsers/measurement/units/imperial_volume.yaml: -------------------------------------------------------------------------------- 1 | system: Imperial 2 | type: Volume 3 | id: imperial_volume 4 | keywords: 5 | - cup 6 | - cups 7 | - fl oz 8 | - fluid oz 9 | - fluid ounces 10 | - gill 11 | - gills 12 | - pint 13 | - pints 14 | - quart 15 | - quarts 16 | - gallon 17 | - gallons 18 | - fluid drachm 19 | - fluid drachms 20 | - fl drachm 21 | - fl drachms 22 | - minim 23 | - minims 24 | - fluid scruple 25 | - fluid scruples 26 | - fl scruple 27 | - fl scruples -------------------------------------------------------------------------------- /cahoots/parsers/measurement/units/metric_area.yaml: -------------------------------------------------------------------------------- 1 | system: Metric 2 | type: Area 3 | id: metric_area 4 | keywords: 5 | - hectare 6 | - hectares 7 | - square meter 8 | - square meters 9 | - square metre 10 | - square metres 11 | - square nanometer 12 | - square nanometers 13 | - square micrometer 14 | - square micrometers 15 | - square millimeter 16 | - square millimeters 17 | - square centimeter 18 | - square centimeters 19 | - square decimeter 20 | - square decimeters 21 | - square hectometer 22 | - square hectometers 23 | - square kilometer 24 | - square kilometers 25 | - square nm 26 | - square mcm 27 | - square mm 28 | - square cm 29 | - square dm 30 | - square hm 31 | - square km 32 | - sq/nm 33 | - sq/mcm 34 | - sq/mm 35 | - sq/cm 36 | - sq/dm 37 | - sq/hm 38 | - sq/km 39 | - sq nm 40 | - sq mcm 41 | - sq mm 42 | - sq cm 43 | - sq dm 44 | - sq hm 45 | - sq km 46 | - sqnm 47 | - sqmcm 48 | - sqmm 49 | - sqcm 50 | - sqdm 51 | - sqhm 52 | - sqkm -------------------------------------------------------------------------------- /cahoots/parsers/measurement/units/metric_length.yaml: -------------------------------------------------------------------------------- 1 | system: Metric 2 | type: Length 3 | id: metric_Length 4 | keywords: 5 | - meter 6 | - meters 7 | - nanometer 8 | - nanometers 9 | - micrometer 10 | - micrometers 11 | - millimeter 12 | - millimeters 13 | - centimeter 14 | - centimeters 15 | - decimeter 16 | - decimeters 17 | - hectometer 18 | - hectometers 19 | - kilometer 20 | - kilometers 21 | - metre 22 | - metres 23 | - nanometre 24 | - nanometres 25 | - micrometre 26 | - micrometres 27 | - millimetre 28 | - millimetres 29 | - centimetre 30 | - centimetres 31 | - decimetre 32 | - decimetres 33 | - hectometre 34 | - hectometres 35 | - kilometre 36 | - kilometres 37 | - nm 38 | - mcm 39 | - mm 40 | - cm 41 | - dm 42 | - hm 43 | - km -------------------------------------------------------------------------------- /cahoots/parsers/measurement/units/metric_mass.yaml: -------------------------------------------------------------------------------- 1 | system: Metric 2 | type: Mass 3 | id: metric_mass 4 | keywords: 5 | - gram 6 | - grams 7 | - milligram 8 | - milligrams 9 | - centigram 10 | - centigrams 11 | - decigram 12 | - decigrams 13 | - kilogram 14 | - kilograms 15 | - mg 16 | - cg 17 | - dg 18 | - kg 19 | - tonne 20 | - tonnes 21 | - metric tonne 22 | - metric tonnes -------------------------------------------------------------------------------- /cahoots/parsers/measurement/units/metric_temperature.yaml: -------------------------------------------------------------------------------- 1 | system: Metric 2 | type: Temperature 3 | id: metric_temperature 4 | keywords: 5 | - celcius 6 | - deg celcius 7 | - degrees celcius 8 | - degree celcius 9 | - °C 10 | - °Celcius 11 | - ° C 12 | - ° Celcius 13 | - deg C 14 | - kelvin 15 | - deg kelvin 16 | - degrees kelvin 17 | - degree kelvin 18 | - °K 19 | - °Kelvin 20 | - ° K 21 | - ° Kelvin 22 | - deg K -------------------------------------------------------------------------------- /cahoots/parsers/measurement/units/metric_volume.yaml: -------------------------------------------------------------------------------- 1 | system: Metric 2 | type: Volume 3 | id: metric_volume 4 | keywords: 5 | - liter 6 | - liters 7 | - nanoliter 8 | - nanoliters 9 | - microliter 10 | - microliters 11 | - milliliter 12 | - milliliters 13 | - centiliter 14 | - centiliters 15 | - deciliter 16 | - deciliters 17 | - hectoliter 18 | - hectoliters 19 | - kiloliter 20 | - kiloliters 21 | - litre 22 | - litres 23 | - nanolitre 24 | - nanolitres 25 | - microlitre 26 | - microlitres 27 | - millilitre 28 | - millilitres 29 | - centilitre 30 | - centilitres 31 | - decilitre 32 | - decilitres 33 | - hectolitre 34 | - hectolitres 35 | - kilolitre 36 | - kilolitres 37 | - l 38 | - nl 39 | - mcl 40 | - ml 41 | - cl 42 | - dl 43 | - hl 44 | - kl 45 | - cubic meters 46 | - cubic nanometer 47 | - cubic nanometers 48 | - cubic micrometer 49 | - cubic micrometers 50 | - cubic millimeter 51 | - cubic millimeters 52 | - cubic centimeter 53 | - cubic centimeters 54 | - cubic decimeter 55 | - cubic decimeters 56 | - cubic hectometer 57 | - cubic hectometers 58 | - cubic kilometer 59 | - cubic kilometers 60 | - meters cubed 61 | - nanometer cubed 62 | - nanometers cubed 63 | - micrometer cubed 64 | - micrometers cubed 65 | - millimeter cubed 66 | - millimeters cubed 67 | - centimeter cubed 68 | - centimeters cubed 69 | - decimeter cubed 70 | - decimeters cubed 71 | - hectometer cubed 72 | - hectometers cubed 73 | - kilometer cubed 74 | - kilometers cubed 75 | - cubic metres 76 | - cubic nanometre 77 | - cubic nanometres 78 | - cubic micrometre 79 | - cubic micrometres 80 | - cubic millimetre 81 | - cubic millimetres 82 | - cubic centimetre 83 | - cubic centimetres 84 | - cubic decimetre 85 | - cubic decimetres 86 | - cubic hectometre 87 | - cubic hectometres 88 | - cubic kilometre 89 | - cubic kilometres 90 | - metres cubed 91 | - nanometre cubed 92 | - nanometres cubed 93 | - micrometre cubed 94 | - micrometres cubed 95 | - millimetre cubed 96 | - millimetres cubed 97 | - centimetre cubed 98 | - centimetres cubed 99 | - decimetre cubed 100 | - decimetres cubed 101 | - hectometre cubed 102 | - hectometres cubed 103 | - kilometre cubed 104 | - kilometres cubed 105 | - cubic nm 106 | - cubic mcm 107 | - cubic mm 108 | - cubic cm 109 | - cubic dm 110 | - cubic hm 111 | - cubic km 112 | - nm cubed 113 | - mcm cubed 114 | - mm cubed 115 | - cm cubed 116 | - dm cubed 117 | - hm cubed 118 | - km cubed -------------------------------------------------------------------------------- /cahoots/parsers/measurement/units/misc_length.yaml: -------------------------------------------------------------------------------- 1 | system: Miscellaneous 2 | type: Length 3 | id: miscellaneous_length 4 | keywords: 5 | - astronomical unit 6 | - astronomical units 7 | - AU 8 | - 'A.U.' 9 | - parsec 10 | - parsecs -------------------------------------------------------------------------------- /cahoots/parsers/programming/LICENSES/LICENSES_C: -------------------------------------------------------------------------------- 1 | LICENSES FOR ALL SNIPPETS USED IN THE PROGRAMMING PARSER BAYES TRAINER ARE INCLUDED HERE. 2 | ALL SNIPPETS ARE TAKEN FROM PROJECTS USING EITHER THE BSD OR MIT LICENSES. 3 | 4 | 5 | ------------------------------------ 6 | https://github.com/b4winckler/macvim 7 | ------------------------------------ 8 | 9 | Vim is Charityware. You can use and copy it as much as you like, but you are 10 | encouraged to make a donation to help orphans in Uganda. Please read the file 11 | "runtime/doc/uganda.txt" for details (do ":help uganda" inside Vim). 12 | 13 | Summary of the license: There are no restrictions on using or distributing an 14 | unmodified copy of Vim. Parts of Vim may also be distributed, but the license 15 | text must always be included. For modified versions a few restrictions apply. 16 | The license is GPL compatible, you may compile Vim with GPL libraries and 17 | distribute it. 18 | 19 | 20 | ----------------------------------- 21 | https://github.com/okamstudio/godot 22 | ----------------------------------- 23 | 24 | GODOT ENGINE 25 | http://www.godotengine.org 26 | ********************************************************************** 27 | Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. 28 | 29 | Permission is hereby granted, free of charge, to any person obtaining 30 | a copy of this software and associated documentation files (the 31 | "Software"), to deal in the Software without restriction, including 32 | without limitation the rights to use, copy, modify, merge, publish, 33 | distribute, sublicense, and/or sell copies of the Software, and to 34 | permit persons to whom the Software is furnished to do so, subject to 35 | the following conditions: 36 | 37 | The above copyright notice and this permission notice shall be 38 | included in all copies or substantial portions of the Software. 39 | 40 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 41 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 42 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 43 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 44 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 45 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 46 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 47 | 48 | 49 | ********************************************************************** 50 | 51 | 52 | --- 53 | https://github.com/seanpringle/goomwwm 54 | --- 55 | 56 | MIT/X11 License 57 | Copyright (c) 2012 Sean Pringle 58 | 59 | Permission is hereby granted, free of charge, to any person obtaining 60 | a copy of this software and associated documentation files (the 61 | "Software"), to deal in the Software without restriction, including 62 | without limitation the rights to use, copy, modify, merge, publish, 63 | distribute, sublicense, and/or sell copies of the Software, and to 64 | permit persons to whom the Software is furnished to do so, subject to 65 | the following conditions: 66 | 67 | The above copyright notice and this permission notice shall be 68 | included in all copies or substantial portions of the Software. 69 | 70 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 71 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 72 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 73 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 74 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 75 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 76 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 77 | 78 | -- 79 | https://github.com/kr/beanstalkd 80 | -- 81 | 82 | Copyright (c) 2007-2011 The Beanstalkd Authors. 83 | 84 | Permission is hereby granted, free of charge, to any person obtaining a copy of 85 | this software and associated documentation files (the "Software"), to deal in 86 | the Software without restriction, including without limitation the rights to 87 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 88 | of the Software, and to permit persons to whom the Software is furnished to do 89 | so, subject to the following conditions: 90 | 91 | The above copyright notice and this permission notice shall be included in all 92 | copies or substantial portions of the Software. 93 | 94 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 95 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 96 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 97 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 98 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 99 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 100 | SOFTWARE. 101 | -------------------------------------------------------------------------------- /cahoots/parsers/programming/LICENSES/LICENSES_CPP: -------------------------------------------------------------------------------- 1 | LICENSES FOR ALL SNIPPETS USED IN THE PROGRAMMING PARSER BAYES TRAINER ARE INCLUDED HERE. 2 | ALL SNIPPETS ARE TAKEN FROM PROJECTS USING EITHER THE BSD OR MIT LICENSES. 3 | 4 | 5 | ---------------------------------- 6 | https://github.com/ariya/phantomjs 7 | ---------------------------------- 8 | 9 | Copyright (C) 2012 Milian Wolff, KDAB 10 | 11 | Redistribution and use in source and binary forms, with or without 12 | modification, are permitted provided that the following conditions are met: 13 | 14 | * Redistributions of source code must retain the above copyright 15 | notice, this list of conditions and the following disclaimer. 16 | * Redistributions in binary form must reproduce the above copyright 17 | notice, this list of conditions and the following disclaimer in the 18 | documentation and/or other materials provided with the distribution. 19 | * Neither the name of the nor the 20 | names of its contributors may be used to endorse or promote products 21 | derived from this software without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 | ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 27 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 28 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 30 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 32 | THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | 35 | ---------------------------------- 36 | https://github.com/bitcoin/bitcoin 37 | ---------------------------------- 38 | 39 | // Copyright (c) 2010 Satoshi Nakamoto 40 | // Copyright (c) 2009-2015 The Bitcoin Core developers 41 | // Distributed under the MIT software license, see the accompanying 42 | // file COPYING or http://www.opensource.org/licenses/mit-license.php. 43 | 44 | Copyright (c) 2009-2015 Bitcoin Developers 45 | 46 | Permission is hereby granted, free of charge, to any person obtaining a copy 47 | of this software and associated documentation files (the "Software"), to deal 48 | in the Software without restriction, including without limitation the rights 49 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 50 | copies of the Software, and to permit persons to whom the Software is 51 | furnished to do so, subject to the following conditions: 52 | 53 | The above copyright notice and this permission notice shall be included in 54 | all copies or substantial portions of the Software. 55 | 56 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 57 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 58 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 59 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 60 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 61 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 62 | THE SOFTWARE. 63 | 64 | 65 | -------------------------------- 66 | https://github.com/Itseez/opencv 67 | -------------------------------- 68 | 69 | License Agreement 70 | For Open Source Computer Vision Library 71 | (3-clause BSD License) 72 | 73 | Redistribution and use in source and binary forms, with or without modification, 74 | are permitted provided that the following conditions are met: 75 | 76 | * Redistributions of source code must retain the above copyright notice, 77 | this list of conditions and the following disclaimer. 78 | 79 | * Redistributions in binary form must reproduce the above copyright notice, 80 | this list of conditions and the following disclaimer in the documentation 81 | and/or other materials provided with the distribution. 82 | 83 | * Neither the names of the copyright holders nor the names of the contributors 84 | may be used to endorse or promote products derived from this software 85 | without specific prior written permission. 86 | 87 | This software is provided by the copyright holders and contributors "as is" and 88 | any express or implied warranties, including, but not limited to, the implied 89 | warranties of merchantability and fitness for a particular purpose are disclaimed. 90 | In no event shall copyright holders or contributors be liable for any direct, 91 | indirect, incidental, special, exemplary, or consequential damages 92 | (including, but not limited to, procurement of substitute goods or services; 93 | loss of use, data, or profits; or business interruption) however caused 94 | and on any theory of liability, whether in contract, strict liability, 95 | or tort (including negligence or otherwise) arising in any way out of 96 | the use of this software, even if advised of the possibility of such damage. 97 | -------------------------------------------------------------------------------- /cahoots/parsers/programming/bayesian.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | from SereneRegistry import registry 25 | import simplebayes 26 | import zipfile 27 | import os 28 | 29 | 30 | class ProgrammingBayesianClassifier(object): 31 | """ 32 | Responsible for classifying an example of source 33 | code into a specific programming language 34 | """ 35 | 36 | @staticmethod 37 | # pylint: disable=unused-argument 38 | def bootstrap(config): 39 | """ 40 | Trains the bayes classifier with examples 41 | from various programming languages 42 | 43 | :param config: cahoots config 44 | :type config: cahoots.config.BaseConfig 45 | """ 46 | classifier = simplebayes.SimpleBayes( 47 | ProgrammingBayesianClassifier.bayes_tokenizer 48 | ) 49 | 50 | directory = os.path.dirname(os.path.abspath(__file__)) 51 | 52 | trainers = {} 53 | 54 | trainer_zip = zipfile.ZipFile(directory + '/trainers.zip', 'r') 55 | for filename in trainer_zip.namelist(): 56 | language = filename.split('.')[0] 57 | trainers[language] = trainer_zip.read(filename) 58 | 59 | for language in trainers: 60 | classifier.train(language, trainers[language]) 61 | 62 | registry.set('PP_bayes', classifier) 63 | 64 | @staticmethod 65 | def bayes_tokenizer(text): 66 | """ 67 | Breaks a string down into tokens for our classifier 68 | 69 | :param text: text we want to tokenize 70 | :type text: str 71 | :return: tokenized text 72 | :rtype: list 73 | """ 74 | text = text.replace('->', ' -> ') 75 | text = text.replace('.', ' . ') 76 | text = text.replace(')', ' ) ') 77 | text = text.replace('(', ' ( ') 78 | text = text.replace('{', ' { ') 79 | text = text.replace('}', ' } ') 80 | text = text.replace('[', ' [ ') 81 | text = text.replace(']', ' ] ') 82 | text = text.replace('$', ' $ ') 83 | text = text.replace(':', ' : ') 84 | text = text.replace('\\', ' \\ ') 85 | return text.split() 86 | 87 | @classmethod 88 | def classify(cls, data_string): 89 | """ 90 | Takes an string and creates a dict of 91 | programming language match probabilities 92 | 93 | :param data_string: the string we want to classify 94 | :type data_string: str 95 | :return: bayesian probabilities 96 | :rtype: dict 97 | """ 98 | classifier = registry.get('PP_bayes') 99 | 100 | scores = classifier.score(data_string) 101 | 102 | return scores 103 | -------------------------------------------------------------------------------- /cahoots/parsers/programming/languages/actionscript.yaml: -------------------------------------------------------------------------------- 1 | name: ActionScript 2 | id: actionscript 3 | keywords: 4 | - '-infinity' 5 | - '...rest' 6 | - array 7 | - as 8 | - as3 9 | - boolean 10 | - break 11 | - case 12 | - catch 13 | - const 14 | - continue 15 | - date 16 | - decodeuri 17 | - decodeuricomponent 18 | - default 19 | - delete 20 | - do 21 | - dynamic 22 | - each 23 | - else 24 | - encodeuri 25 | - encodeuricomponent 26 | - escape 27 | - extends 28 | - false 29 | - final 30 | - finally 31 | - 'flash_proxy' 32 | - for 33 | - get 34 | - if 35 | - implements 36 | - import 37 | - in 38 | - include 39 | - infinity 40 | - instanceof 41 | - int 42 | - internal 43 | - is 44 | - isfinite 45 | - isnan 46 | - isxmlname 47 | - label 48 | - namespace 49 | - nan 50 | - native 51 | - new 52 | - null 53 | - null 54 | - number 55 | - object 56 | - object_proxy 57 | - override 58 | - parsefloat 59 | - parseint 60 | - private 61 | - protected 62 | - public 63 | - return 64 | - set 65 | - static 66 | - string 67 | - super 68 | - switch 69 | - this 70 | - throw 71 | - true 72 | - try 73 | - typeof 74 | - uint 75 | - undefined 76 | - unescape 77 | - use 78 | - void 79 | - while 80 | - with -------------------------------------------------------------------------------- /cahoots/parsers/programming/languages/c.yaml: -------------------------------------------------------------------------------- 1 | name: C 2 | id: c 3 | keywords: 4 | - auto 5 | - double 6 | - int 7 | - struct 8 | - break 9 | - else 10 | - long 11 | - switch 12 | - case 13 | - enum 14 | - register 15 | - typedef 16 | - char 17 | - extern 18 | - return 19 | - union 20 | - const 21 | - float 22 | - short 23 | - unsigned 24 | - continue 25 | - for 26 | - signed 27 | - void 28 | - default 29 | - goto 30 | - sizeof 31 | - volatile 32 | - do 33 | - if 34 | - static 35 | - while -------------------------------------------------------------------------------- /cahoots/parsers/programming/languages/cpp.yaml: -------------------------------------------------------------------------------- 1 | name: 'C++' 2 | id: cpp 3 | keywords: 4 | - alignas 5 | - alignof 6 | - and 7 | - and_eq 8 | - asm 9 | - auto 10 | - bitand 11 | - bitor 12 | - bool 13 | - break 14 | - case 15 | - catch 16 | - char 17 | - char16_t 18 | - char32_t 19 | - class 20 | - compl 21 | - const 22 | - constexpr 23 | - const_cast 24 | - continue 25 | - cout 26 | - decltype 27 | - default 28 | - delete 29 | - do 30 | - double 31 | - dynamic_cast 32 | - else 33 | - endl 34 | - enum 35 | - explicit 36 | - export 37 | - extern 38 | - false 39 | - float 40 | - for 41 | - friend 42 | - goto 43 | - if 44 | - inline 45 | - int 46 | - long 47 | - mutable 48 | - namespace 49 | - new 50 | - noexcept 51 | - not 52 | - not_eq 53 | - nullptr 54 | - operator 55 | - or 56 | - or_eq 57 | - private 58 | - protected 59 | - public 60 | - register 61 | - reinterpret_cast 62 | - return 63 | - short 64 | - signed 65 | - sizeof 66 | - static 67 | - static_assert 68 | - static_cast 69 | - struct 70 | - switch 71 | - template 72 | - this 73 | - thread_local 74 | - throw 75 | - true 76 | - try 77 | - typedef 78 | - typeid 79 | - typename 80 | - union 81 | - unsigned 82 | - using 83 | - virtual 84 | - void 85 | - volatile 86 | - wchar_t 87 | - while 88 | - xor 89 | - xor_eq -------------------------------------------------------------------------------- /cahoots/parsers/programming/languages/cs.yaml: -------------------------------------------------------------------------------- 1 | name: 'C#' 2 | id: cs 3 | keywords: 4 | - abstract 5 | - break 6 | - char 7 | - continue 8 | - do 9 | - event 10 | - finally 11 | - foreach 12 | - in 13 | - internal 14 | - namespace 15 | - operator 16 | - params 17 | - readonly 18 | - sealed 19 | - static 20 | - this 21 | - typeof 22 | - unsafe 23 | - void 24 | - add 25 | - descending 26 | - get 27 | - into 28 | - orderby 29 | - remove 30 | - value 31 | - where 32 | - as 33 | - byte 34 | - checked 35 | - decimal 36 | - double 37 | - explicit 38 | - fixed 39 | - goto 40 | - in 41 | - is 42 | - new 43 | - out 44 | - private 45 | - ref 46 | - short 47 | - string 48 | - throw 49 | - uint 50 | - ushort 51 | - volatile 52 | - alias 53 | - dynamic 54 | - global 55 | - join 56 | - partial 57 | - select 58 | - var 59 | - yield 60 | - base 61 | - case 62 | - class 63 | - default 64 | - else 65 | - extern 66 | - float 67 | - if 68 | - int 69 | - lock 70 | - null 71 | - out 72 | - protected 73 | - return 74 | - sizeof 75 | - struct 76 | - true 77 | - ulong 78 | - using 79 | - while 80 | - ascending 81 | - from 82 | - group 83 | - let 84 | - partial 85 | - set 86 | - where 87 | - bool 88 | - catch 89 | - const 90 | - delegate 91 | - enum 92 | - false 93 | - for 94 | - implicit 95 | - interface 96 | - long 97 | - object 98 | - override 99 | - public 100 | - sbyte 101 | - stackalloc 102 | - switch 103 | - try 104 | - unchecked 105 | - virtual -------------------------------------------------------------------------------- /cahoots/parsers/programming/languages/java.yaml: -------------------------------------------------------------------------------- 1 | name: Java 2 | id: java 3 | keywords: 4 | - abstract 5 | - continue 6 | - goto 7 | - package 8 | - switch 9 | - assert 10 | - default 11 | - if 12 | - private 13 | - this 14 | - boolean 15 | - do 16 | - implements 17 | - protected 18 | - throw 19 | - break 20 | - double 21 | - import 22 | - public 23 | - throws 24 | - byte 25 | - else 26 | - instanceof 27 | - return 28 | - transient 29 | - case 30 | - extends 31 | - int 32 | - short 33 | - try 34 | - catch 35 | - final 36 | - interface 37 | - static 38 | - void 39 | - char 40 | - finally 41 | - long 42 | - strictfp 43 | - volatile 44 | - class 45 | - float 46 | - native 47 | - super 48 | - while 49 | - const 50 | - for 51 | - new 52 | - synchronized -------------------------------------------------------------------------------- /cahoots/parsers/programming/languages/javascript.yaml: -------------------------------------------------------------------------------- 1 | name: JavaScript 2 | id: javascript 3 | keywords: 4 | - break 5 | - case 6 | - comment 7 | - abstract 8 | - boolean 9 | - byte 10 | - char 11 | - double 12 | - false 13 | - final 14 | - float 15 | - goto 16 | - catch 17 | - class 18 | - const 19 | - debugger 20 | - alert 21 | - anchor 22 | - area 23 | - arguments 24 | - array 25 | - assign 26 | - blur 27 | - boolean 28 | - button 29 | - callee 30 | - caller 31 | - captureevents 32 | - checkbox 33 | - clearinterval 34 | - cleartimeout 35 | - close 36 | - closed 37 | - confirm 38 | - constructor 39 | - date 40 | - defaultstatus 41 | - document 42 | - document 43 | - element 44 | - escape 45 | - do 46 | - else 47 | - export 48 | - protected 49 | - public 50 | - short 51 | - static 52 | - synchronized 53 | - throws 54 | - transient 55 | - true 56 | - for 57 | - function 58 | - if 59 | - import 60 | - in 61 | - label 62 | - this 63 | - typeof 64 | - var 65 | - void 66 | - while 67 | - with 68 | - new 69 | - return 70 | - switch 71 | - throw 72 | - try 73 | - scrollto 74 | - select 75 | - self 76 | - setinterval 77 | - settimeout 78 | - status 79 | - statusbar 80 | - stop 81 | - string 82 | - submit 83 | - sun 84 | - taint 85 | - text 86 | - textarea 87 | - toolbar 88 | - top 89 | - tostring 90 | - unescape 91 | - untaint 92 | - unwatch 93 | - valueof 94 | - watch 95 | - window 96 | - window 97 | - outerheight 98 | - outerwidth 99 | - packages 100 | - pagexoffset 101 | - pageyoffset 102 | - parent 103 | - parsefloat 104 | - parseint 105 | - password 106 | - personalbar 107 | - plugin 108 | - print 109 | - prompt 110 | - prototype 111 | - radio 112 | - ref 113 | - regexp 114 | - releaseevents 115 | - reset 116 | - resizeby 117 | - resizeto 118 | - routeevent 119 | - scroll 120 | - scrollbars 121 | - scrollby 122 | - link 123 | - location 124 | - location 125 | - locationbar 126 | - math 127 | - menubar 128 | - mimetype 129 | - moveby 130 | - moveto 131 | - name 132 | - nan 133 | - navigate 134 | - navigator 135 | - navigator 136 | - netscape 137 | - number 138 | - object 139 | - onblur 140 | - onerror 141 | - onfocus 142 | - onload 143 | - onunload 144 | - open 145 | - opener 146 | - option 147 | - continue 148 | - default 149 | - delete 150 | - implements 151 | - instanceof 152 | - int 153 | - interface 154 | - long 155 | - native 156 | - null 157 | - package 158 | - private 159 | - enum 160 | - extends 161 | - finally 162 | - super 163 | - eval 164 | - fileupload 165 | - find 166 | - focus 167 | - form 168 | - frame 169 | - frames 170 | - function 171 | - getclass 172 | - hidden 173 | - history 174 | - history 175 | - home 176 | - image 177 | - infinity 178 | - innerheight 179 | - innerwidth 180 | - isfinite 181 | - isnan 182 | - java 183 | - javaarray 184 | - javaclass 185 | - javaobject 186 | - javapackage 187 | - length -------------------------------------------------------------------------------- /cahoots/parsers/programming/languages/perl.yaml: -------------------------------------------------------------------------------- 1 | name: Perl 2 | id: perl 3 | keywords: 4 | - '__data__' 5 | - '__end__' 6 | - '__file__' 7 | - '__line__' 8 | - '__package__' 9 | - and 10 | - cmp 11 | - continue 12 | - core 13 | - do 14 | - else 15 | - elsif 16 | - eq 17 | - exp 18 | - for 19 | - foreach 20 | - ge 21 | - gt 22 | - if 23 | - le 24 | - lock 25 | - lt 26 | - m 27 | - ne 28 | - no 29 | - or 30 | - package 31 | - q 32 | - qq 33 | - qr 34 | - qw 35 | - qx 36 | - s 37 | - sub 38 | - tr 39 | - unless 40 | - until 41 | - while 42 | - xor 43 | - y -------------------------------------------------------------------------------- /cahoots/parsers/programming/languages/php.yaml: -------------------------------------------------------------------------------- 1 | name: PHP 2 | id: php 3 | keywords: 4 | - '__halt_compiler' 5 | - '__construct' 6 | - abstract 7 | - array 8 | - break 9 | - as 10 | - do 11 | - for 12 | - callable 13 | - case 14 | - catch 15 | - class 16 | - clone 17 | - const 18 | - continue 19 | - declare 20 | - die 21 | - echo 22 | - else 23 | - elseif 24 | - empty 25 | - enddeclare 26 | - endfor 27 | - endforeach 28 | - endif 29 | - endswitch 30 | - endwhile 31 | - eval 32 | - exit 33 | - extends 34 | - final 35 | - for 36 | - foreach 37 | - function 38 | - global 39 | - goto 40 | - if 41 | - implements 42 | - include 43 | - include_once 44 | - instanceof 45 | - insteadof 46 | - interface 47 | - isset 48 | - list 49 | - namespace 50 | - new 51 | - or 52 | - print 53 | - private 54 | - protected 55 | - public 56 | - require 57 | - require_once 58 | - return 59 | - static 60 | - switch 61 | - throw 62 | - trait 63 | - try 64 | - unset 65 | - use 66 | - var 67 | - while 68 | - xor 69 | - '__class__' 70 | - '__dir__' 71 | - '__file__' 72 | - '__function__' 73 | - '__line__' 74 | - '__method__' 75 | - '__namespace__' 76 | - '__trait__' -------------------------------------------------------------------------------- /cahoots/parsers/programming/languages/python.yaml: -------------------------------------------------------------------------------- 1 | name: Python 2 | id: python 3 | keywords: 4 | - and 5 | - del 6 | - from 7 | - not 8 | - while 9 | - as 10 | - elif 11 | - global 12 | - or 13 | - with 14 | - assert 15 | - else 16 | - if 17 | - pass 18 | - yield 19 | - break 20 | - except 21 | - import 22 | - print 23 | - class 24 | - exec 25 | - in 26 | - raise 27 | - continue 28 | - finally 29 | - is 30 | - return 31 | - def 32 | - for 33 | - lambda 34 | - try -------------------------------------------------------------------------------- /cahoots/parsers/programming/languages/ruby.yaml: -------------------------------------------------------------------------------- 1 | name: Ruby 2 | id: ruby 3 | keywords: 4 | - alias 5 | - and 6 | - begin 7 | - break 8 | - case 9 | - class 10 | - def 11 | - 'defined?' 12 | - do 13 | - else 14 | - elsif 15 | - end 16 | - ensure 17 | - false 18 | - for 19 | - if 20 | - in 21 | - module 22 | - next 23 | - nil 24 | - not 25 | - or 26 | - redo 27 | - rescue 28 | - retry 29 | - return 30 | - self 31 | - super 32 | - then 33 | - true 34 | - undef 35 | - unless 36 | - until 37 | - when 38 | - while 39 | - yield -------------------------------------------------------------------------------- /cahoots/parsers/programming/languages/vb.yaml: -------------------------------------------------------------------------------- 1 | name: Visual Basic 2 | id: vb 3 | keywords: 4 | - addhandler 5 | - andalso 6 | - byte 7 | - catch 8 | - cdate 9 | - cint 10 | - cobj 11 | - cshort 12 | - cuint 13 | - decimal 14 | - dim 15 | - each 16 | - end 17 | - error 18 | - finally 19 | - function 20 | - global 21 | - if 22 | - imports 23 | - inherits 24 | - isnot 25 | - long 26 | - module 27 | - mybase 28 | - new constraint 29 | - not 30 | - object 31 | - option 32 | - out (generic modifier) 33 | - paramarray 34 | - protected 35 | - redim 36 | - return 37 | - shadows 38 | - static 39 | - structure constraint 40 | - then 41 | - try 42 | - ulong 43 | - wend 44 | - with 45 | - '#const' 46 | - '#if' 47 | - addressof 48 | - as 49 | - byval 50 | - cbool 51 | - cdbl 52 | - class constraint 53 | - const 54 | - csng 55 | - culng 56 | - declare 57 | - directcast 58 | - else 59 | - endif 60 | - event 61 | - for (in for…next) 62 | - get 63 | - gosub 64 | - if() 65 | - imports (xml namespace) 66 | - integer 67 | - let 68 | - loop 69 | - module statement 70 | - myclass 71 | - new operator 72 | - nothing 73 | - of 74 | - optional 75 | - overloads 76 | - partial 77 | - public 78 | - rem 79 | - sbyte 80 | - shared 81 | - step 82 | - structure statement 83 | - throw 84 | - trycast 85 | - ushort 86 | - when 87 | - withevents 88 | - '#else' 89 | - alias 90 | - boolean 91 | - call 92 | - cbyte 93 | - cdec 94 | - class statement 95 | - continue 96 | - cstr 97 | - cushort 98 | - default 99 | - do 100 | - elseif 101 | - enum 102 | - exit 103 | - for each…next 104 | - gettype 105 | - goto 106 | - implements 107 | - in 108 | - interface 109 | - lib 110 | - me 111 | - mustinherit 112 | - namespace 113 | - next 114 | - notinheritable 115 | - on 116 | - or 117 | - overridable 118 | - private 119 | - raiseevent 120 | - removehandler 121 | - select 122 | - short 123 | - stop 124 | - sub 125 | - to 126 | - typeof…is 127 | - using 128 | - while 129 | - writeonly 130 | - '#elseif' 131 | - and 132 | - byref 133 | - case 134 | - cchar 135 | - char 136 | - clng 137 | - csbyte 138 | - ctype 139 | - date 140 | - delegate 141 | - double 142 | - end statement 143 | - erase 144 | - false 145 | - friend 146 | - getxmlnamespace 147 | - handles 148 | - implements statement 149 | - in (generic modifier) 150 | - is 151 | - like 152 | - mod 153 | - mustoverride 154 | - narrowing 155 | - next (in resume) 156 | - notoverridable 157 | - operator 158 | - orelse 159 | - overrides 160 | - property 161 | - readonly 162 | - resume 163 | - set 164 | - single 165 | - string 166 | - synclock 167 | - true 168 | - uinteger 169 | - variant 170 | - widening 171 | - xor 172 | - '#end' 173 | - aggregate 174 | - binary 175 | - equals 176 | - group join 177 | - join 178 | - order by 179 | - strict 180 | - unicode 181 | - '#region' 182 | - ansi 183 | - compare 184 | - explicit 185 | - into 186 | - key (visual basic) 187 | - preserve 188 | - take 189 | - until 190 | - assembly 191 | - custom 192 | - from 193 | - isfalse 194 | - mid 195 | - skip 196 | - take while 197 | - where 198 | - auto 199 | - distinct 200 | - group by 201 | - istrue 202 | - off 203 | - skip while 204 | - text 205 | - '#externalsource' -------------------------------------------------------------------------------- /cahoots/parsers/programming/lexer.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | from pygments.lexers.agile import PerlLexer, PythonLexer, RubyLexer 25 | from pygments.lexers.compiled import CLexer, CppLexer 26 | from pygments.lexers.dotnet import CSharpLexer, VbNetLexer 27 | from pygments.lexers.jvm import JavaLexer 28 | from pygments.lexers.web import ActionScript3Lexer, PhpLexer, JavascriptLexer 29 | from pygments import lex 30 | from pygments.token import Token 31 | import threading 32 | 33 | 34 | class ProgrammingLexerThread(threading.Thread): 35 | """Represents a thread that will handle one parser parsing request""" 36 | lexer = None 37 | data_string = None 38 | result = None 39 | 40 | def __init__(self, lexer_id, lexer, data_string): 41 | """ 42 | :param lexer_id: the id of the lexer 43 | :type lexer_id: str 44 | :param lexer: The lexer this thread is going to use 45 | :type lexer: pygments.lexer.Lexer 46 | :param data_string: the string we're going to lex 47 | :type data_string: str 48 | """ 49 | self.thread_id = lexer_id 50 | self.lexer = lexer 51 | self.data_string = data_string 52 | threading.Thread.__init__(self) 53 | 54 | def run(self): 55 | """ 56 | Lexes the data to see what lexers can tokenize it. 57 | Any successful lexers are considered possible matches. 58 | """ 59 | bad_tokens = (Token.Text, Token.Name, Token.Name.Other) 60 | tokens = [tok for tok, text in lex(self.data_string, self.lexer) 61 | if tok not in bad_tokens and text != ''] 62 | token_count = len(tokens) 63 | 64 | # Errors mean we definitely didn't find the right language 65 | if Token.Error in tokens or token_count == 0: 66 | self.result = False 67 | else: 68 | self.result = token_count 69 | 70 | 71 | class ProgrammingLexer(object): 72 | """lexes a string with multiple programming lexers and returns tokens""" 73 | 74 | lexers = { 75 | 'actionscript': ActionScript3Lexer(), 76 | 'c': CLexer(), 77 | 'cpp': CppLexer(), 78 | 'cs': CSharpLexer(), 79 | 'java': JavaLexer(), 80 | 'javascript': JavascriptLexer(), 81 | 'perl': PerlLexer(), 82 | 'php': PhpLexer(startinline=True), 83 | 'python': PythonLexer(), 84 | 'ruby': RubyLexer(), 85 | 'vb': VbNetLexer(), 86 | } 87 | 88 | matched_languages = [] 89 | data = None 90 | 91 | def __init__(self, matched_langs, data_string): 92 | """ 93 | :param matched_langs: languages that we've detected tokens for 94 | :type matched_langs: list 95 | :param data_string: the data we want to lex for possible langs 96 | :type data_string: str 97 | """ 98 | self.matched_languages = matched_langs 99 | self.data = data_string 100 | 101 | def lex(self): 102 | """ 103 | For every possible matched language, we run a lexer to see if we can 104 | eliminate it as a possible match. If we detect errors, or have no 105 | lexer matches, we remove it from the list. 106 | 107 | :return: the list of lexer results 108 | :rtype: list 109 | """ 110 | results = {} 111 | threads = [] 112 | 113 | # Looping through each matched language that has a lexer 114 | for lexer_id, lexer in \ 115 | [[lexid, lxr] for lexid, lxr in 116 | self.lexers.items() if lexid in self.matched_languages]: 117 | # Creating a thread for each lexer 118 | thread = ProgrammingLexerThread(lexer_id, lexer, self.data) 119 | thread.start() 120 | threads.append(thread) 121 | 122 | for thr in threads: 123 | thr.join() 124 | 125 | for thr in [th for th in threads if th.result]: 126 | results[thr.thread_id] = thr.result 127 | 128 | return results 129 | -------------------------------------------------------------------------------- /cahoots/parsers/programming/trainers.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SerenitySoftware/cahoots/866336c51436343ff5e56f83f89dddc82a5693a3/cahoots/parsers/programming/trainers.zip -------------------------------------------------------------------------------- /cahoots/parsers/uri.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | from cahoots.parsers.base import BaseParser 25 | from cahoots.util import strings_intersect 26 | import string 27 | import socket 28 | try: # pragma: no cover 29 | # This is for Python 2 & 3 support. 30 | # pylint: disable=no-name-in-module 31 | from urllib.parse import urlparse 32 | except ImportError: # pragma: no cover 33 | # pylint: disable=import-error 34 | from urlparse import urlparse 35 | 36 | 37 | class URIParser(BaseParser): 38 | """Determines if given data is a URI of some form""" 39 | 40 | def __init__(self, config): 41 | """ 42 | :param config: cahoots config 43 | :type config: cahoots.config.BaseConfig 44 | """ 45 | BaseParser.__init__(self, config, "URI", 100) 46 | 47 | @classmethod 48 | def is_ipv6_address(cls, address): 49 | """ 50 | Checks if the data is an ipv6 address 51 | 52 | :param address: potential ip address 53 | :type address: str 54 | :return: if this is an ip addr or not 55 | :rtype: bool 56 | """ 57 | try: 58 | socket.inet_pton(socket.AF_INET6, address) 59 | return True 60 | except (socket.error, UnicodeEncodeError): 61 | pass 62 | 63 | return False 64 | 65 | @classmethod 66 | def is_ipv4_address(cls, address): 67 | """ 68 | checks if the data is an ipv4 address 69 | 70 | :param address: potential ip address 71 | :type address: str 72 | :return: if this is an ip addr or not 73 | :rtype: bool 74 | """ 75 | try: 76 | socket.inet_aton(address) 77 | return True 78 | except (socket.error, UnicodeEncodeError): 79 | pass 80 | 81 | return False 82 | 83 | @classmethod 84 | def is_valid_url(cls, url): 85 | """ 86 | Tries to parse a URL to see if it's valid 87 | 88 | :param url: text that might be a url 89 | :type url: str 90 | :return: if this is a valid url or not 91 | :rtype: bool 92 | """ 93 | pieces = urlparse(url) 94 | 95 | if not all([pieces.scheme, pieces.netloc]): 96 | return False 97 | 98 | if not set(pieces.netloc) <=\ 99 | set(string.ascii_letters + string.digits + '-.'): 100 | return False 101 | 102 | return True 103 | 104 | def parse(self, data_string): 105 | """ 106 | parses for uris 107 | 108 | :param data_string: the string we want to parse 109 | :type data_string: str 110 | :return: yields parse result(s) if there are any 111 | :rtype: ParseResult 112 | """ 113 | if len(data_string) < 4: 114 | return 115 | 116 | dot_count = data_string.count(".") 117 | 118 | if dot_count >= 2 or data_string.count(":") >= 2: 119 | if self.is_ipv4_address(data_string): 120 | # lowering the confidence because "Technically" 121 | # an ipv4 address could be a phone number 122 | self.confidence -= 5 123 | # if there's whitespace in the ip address, lower confidence 124 | if strings_intersect(string.whitespace, data_string): 125 | self.confidence -= 50 126 | yield self.result("IP Address (v4)") 127 | 128 | elif self.is_ipv6_address(data_string): 129 | yield self.result("IP Address (v6)") 130 | 131 | letters = [c for c in data_string if c in string.ascii_letters] 132 | 133 | if dot_count > 0 and len(letters) >= 4: 134 | if self.is_valid_url(data_string): 135 | yield self.result("URL") 136 | 137 | elif '://' not in data_string: 138 | if self.is_valid_url('http://' + data_string): 139 | # confidence hit since we had to modify the data 140 | self.confidence -= 25 141 | yield self.result("URL", data='http://'+data_string) 142 | -------------------------------------------------------------------------------- /cahoots/result.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=too-many-arguments 25 | 26 | 27 | class ParseResult(object): 28 | """Represents a single result from the parsing process""" 29 | 30 | type = "Unknown" 31 | subtype = "Unknown" 32 | confidence = 0 33 | result_value = None 34 | data = {} 35 | 36 | def __init__( 37 | self, 38 | p_type="Unknown", 39 | subtype="Unknown", 40 | confidence=0, 41 | value=None, 42 | additional_data=None 43 | ): 44 | """ 45 | Sets up the parse result object with result data 46 | 47 | :param p_type: Parse result type 48 | :type p_type: str 49 | :param subtype: Parse result subtype 50 | :type subtype: str 51 | :param confidence: How confidence we are in this result, 1-100 52 | :type confidence: int 53 | :param value: representation of the parsed data 54 | :type value: mixed 55 | :param additional_data: any additional data a parser wants to provide 56 | :type additional_data: mixed 57 | """ 58 | if additional_data is None: 59 | additional_data = {} 60 | 61 | self.type = p_type 62 | self.subtype = subtype 63 | self.confidence = confidence 64 | self.result_value = value 65 | self.data = additional_data 66 | -------------------------------------------------------------------------------- /cahoots/util.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name 25 | 26 | 27 | def truncate_text(text, limit=80): 28 | """ 29 | truncates text to a provided length 30 | 31 | :param text: text we want to truncate 32 | :type text: str 33 | :param limit: how long we want the resulting string to be 34 | :type limit: int 35 | :return: truncated string 36 | :rtype: str 37 | """ 38 | if len(text) > limit: 39 | text = text[:limit-3] + "..." 40 | return text 41 | 42 | 43 | def is_number(text): 44 | """ 45 | Checking if the text is a number 46 | 47 | :param text: text we want to examine 48 | :type text: str 49 | :return: whether this is a number or not 50 | :rtype: bool 51 | """ 52 | try: 53 | float(text.strip()) 54 | except ValueError: 55 | return False 56 | 57 | return True 58 | 59 | 60 | def strings_intersect(s_one, s_two): 61 | """ 62 | Checks if two strings have any intersections 63 | 64 | :param s_one: first string 65 | :type s_one: str 66 | :param s_two: second string 67 | :type s_two: str 68 | :return: whether or not these strings intercept 69 | :rtype: bool 70 | """ 71 | return not set(s_one).isdisjoint(s_two) 72 | -------------------------------------------------------------------------------- /cahootserver/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | -------------------------------------------------------------------------------- /cahootserver/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | from cahoots.config import BaseConfig 25 | import sys 26 | import os 27 | 28 | 29 | class WSGIConfig(BaseConfig): 30 | """Extends the base config to enable debug mode for cahootserver""" 31 | 32 | web_port = 8000 33 | 34 | """ 35 | Configuration for the templating system 36 | """ 37 | template = { 38 | 'lookups': [ 39 | os.path.dirname(os.path.realpath(__file__)) + '/templates/' 40 | ], 41 | 'modules': 42 | '/tmp/makocache/' 43 | if sys.version_info[0] < 3 44 | else '/tmp/makocache3/' 45 | } 46 | -------------------------------------------------------------------------------- /cahootserver/out.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=method-hidden 25 | from datetime import date, datetime 26 | from cahoots.result import ParseResult 27 | import simplejson 28 | 29 | 30 | class CahootsEncoder(simplejson.JSONEncoder): 31 | """Handles the encoding of various special types related to cahoots""" 32 | 33 | def default(self, obj): 34 | """encodes cahoots special types""" 35 | if isinstance(obj, datetime): 36 | return obj.isoformat() 37 | elif isinstance(obj, date): 38 | return obj.isoformat() 39 | elif isinstance(obj, ParseResult): 40 | return { 41 | 'type': obj.type, 42 | 'subtype': obj.subtype, 43 | 'confidence': obj.confidence, 44 | 'value': obj.result_value, 45 | 'data': obj.data 46 | } 47 | 48 | return super(CahootsEncoder, self).default(obj) 49 | 50 | 51 | def encode(data): 52 | """calls simplejson's encoding stuff with our needs""" 53 | return simplejson.dumps( 54 | data, 55 | cls=CahootsEncoder, 56 | ensure_ascii=False, 57 | encoding='utf8', 58 | indent=4 59 | ) 60 | -------------------------------------------------------------------------------- /cahootserver/static/cahoots.css: -------------------------------------------------------------------------------- 1 | html, body { 2 | font-family: Helvetica, Tahoma, Arial, sans-serif; 3 | background-color: #EDEDED; 4 | margin: 0; 5 | padding: 0; 6 | height: 100%; 7 | } 8 | 9 | body { 10 | font-size: 12px; 11 | } 12 | 13 | h1, h2, h3, h4, h5, h6 { 14 | font-family: Oswald, Helvetica, Tahoma, Arial, sans-serif; 15 | color: #444; 16 | line-height: 1.2; 17 | margin: .5em 0; 18 | padding: .5em 0; 19 | } 20 | 21 | h1 { 22 | font-size: 28px; 23 | } 24 | 25 | h2 { 26 | font-size: 24px; 27 | } 28 | 29 | h3 { 30 | font-size: 18px; 31 | } 32 | 33 | h4 { 34 | font-size: 14px; 35 | } 36 | 37 | h5 { 38 | font-size: 12px; 39 | } 40 | 41 | h6 { 42 | font-size: 8px; 43 | } 44 | 45 | a:link, a:visited { 46 | color: #072d66; 47 | } 48 | 49 | a:hover, a:active { 50 | color: #50adbf; 51 | } 52 | 53 | .container:before, 54 | .container:after { 55 | content:""; 56 | display:table; 57 | } 58 | .container:after { 59 | clear:both; 60 | } 61 | /* For IE 6/7 (trigger hasLayout) */ 62 | .container { 63 | zoom:1; 64 | width: 100%; 65 | } 66 | 67 | .container-content { 68 | margin: 0 auto; 69 | width: 800px; 70 | } 71 | 72 | input.bordered, 73 | textarea.bordered { 74 | border-color: #999; 75 | } 76 | 77 | #header { 78 | background: url('/static/dark_leather.png') top left repeat; 79 | padding: 16px 0 0 0; 80 | } 81 | 82 | #header h1 { 83 | clear: none; 84 | color: #DEDEDE; 85 | text-align: left; 86 | font-size: 32px; 87 | margin: 0; 88 | padding: 0; 89 | display: inline-block; 90 | } 91 | 92 | #header h1 a { 93 | text-decoration: none; 94 | color: #DEDEDE; 95 | } 96 | 97 | #header h1 a:hover { 98 | color: #5bc4d9; 99 | } 100 | 101 | #header h2 { 102 | clear: none; 103 | color: #DEDEDE; 104 | text-align: right; 105 | font-size: 18px; 106 | font-weight: normal; 107 | margin: 0 0 0 5px; 108 | padding: 0; 109 | display: inline-block; 110 | } 111 | 112 | #primary { 113 | height: 282px; 114 | padding-top: 65px; 115 | } 116 | 117 | #primary h2 { 118 | color: #5bc4d9; 119 | font-weight: normal; 120 | margin: 0 0 10px; 121 | padding: 0; 122 | } 123 | 124 | #input { 125 | margin: 0; 126 | padding: 0; 127 | width: 100%; 128 | } 129 | 130 | #input #snippet { 131 | color: #444; 132 | width: 788px; 133 | height: 120px; 134 | padding: 5px; 135 | font-weight: 400; 136 | font-size: 16px; 137 | font-family: Oswald, Helvetica, Tahoma, Arial, sans-serif; 138 | } 139 | 140 | #input #go { 141 | background-color: #EDEDED; 142 | clear: none; 143 | float: left; 144 | font-size: 16px; 145 | font-weight: bold; 146 | color: #333; 147 | font-family: Oswald, Helvetica, Tahoma, Arial, sans-serif; 148 | display: block; 149 | margin-top: 10px; 150 | cursor: pointer; 151 | padding: 5px 10px; 152 | } 153 | 154 | #input #go:hover { 155 | cursor: pointer; 156 | color: #111; 157 | } 158 | 159 | #multi-line { 160 | clear: none; 161 | float: right; 162 | color: #EDEDED; 163 | font-size: 11px; 164 | font-weight: bold; 165 | margin: 10px 12px 0 0; 166 | } 167 | 168 | 169 | #secondary { 170 | border-top: 5px solid #50adbf; 171 | padding-top: 15px; 172 | } 173 | 174 | #examples h2 { 175 | text-align: center; 176 | margin: 10px 0; 177 | padding: 0; 178 | } 179 | 180 | #examples .example { 181 | float: left; 182 | width: 33%; 183 | overflow: hidden; 184 | } 185 | 186 | #examples .example ul { 187 | margin: 10px 0px 10px 15px; 188 | padding: 0; 189 | } 190 | 191 | #examples .example ul li { 192 | margin: 4px 0; 193 | padding: 0; 194 | } 195 | 196 | #examples .example ul li a { 197 | white-space: nowrap; 198 | } 199 | 200 | div#footer { 201 | background: url('/static/dark_leather.png') top left repeat; 202 | margin-top: 30px; 203 | font-size: 11px; 204 | font-weight: bold; 205 | color: #EDEDED; 206 | padding: 14px 0 0; 207 | border-top: 5px solid #50adbf; 208 | } 209 | 210 | div#footer a { 211 | color: #5bc4d9; 212 | } 213 | 214 | div#footer a:hover { 215 | color: #EDEDED; 216 | } 217 | 218 | div#footer div.ss_copyright { 219 | margin: 0 auto; 220 | width: 399px; 221 | margin-bottom: 10px; 222 | } 223 | 224 | div#footer div.favicon_copyright { 225 | margin: 0 auto; 226 | width: 393px; 227 | margin-bottom: 5px; 228 | } 229 | 230 | div#footer div.background_copyright { 231 | margin: 0 auto; 232 | width: 430px; 233 | margin-bottom: 15px; 234 | } 235 | -------------------------------------------------------------------------------- /cahootserver/static/cahoots.js: -------------------------------------------------------------------------------- 1 | $(document).ready(function() { 2 | $("#examples .example ul li a").click(function(evt) { 3 | $("#snippet").val($(this).text()); 4 | $("#input").submit(); 5 | 6 | evt.stopPropagation(); 7 | evt.preventDefault(); 8 | 9 | return false; 10 | }); 11 | 12 | $("#snippet").keydown(function(evt) { 13 | if(evt.shiftKey || evt.ctrlKey) { 14 | return; 15 | } 16 | 17 | if(evt.which == 13) { 18 | $("#input").submit(); 19 | evt.preventDefault(); 20 | evt.stopPropagation(); 21 | return false; 22 | } 23 | }); 24 | }); -------------------------------------------------------------------------------- /cahootserver/static/dark_leather.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SerenitySoftware/cahoots/866336c51436343ff5e56f83f89dddc82a5693a3/cahootserver/static/dark_leather.png -------------------------------------------------------------------------------- /cahootserver/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SerenitySoftware/cahoots/866336c51436343ff5e56f83f89dddc82a5693a3/cahootserver/static/favicon.ico -------------------------------------------------------------------------------- /cahootserver/templates/master.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Cahoots - Text In, Structure Out 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 30 | 31 |
32 |
33 | ${self.secondary()} 34 |
35 |
36 | 37 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /docs/cahoots.confidence.normalizer.rst: -------------------------------------------------------------------------------- 1 | cahoots.confidence.normalizer module 2 | ==================================== 3 | 4 | .. automodule:: cahoots.confidence.normalizer 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.confidence.normalizers.base.rst: -------------------------------------------------------------------------------- 1 | cahoots.confidence.normalizers.base module 2 | ========================================== 3 | 4 | .. automodule:: cahoots.confidence.normalizers.base 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.confidence.normalizers.character.rst: -------------------------------------------------------------------------------- 1 | cahoots.confidence.normalizers.character module 2 | =============================================== 3 | 4 | .. automodule:: cahoots.confidence.normalizers.character 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.confidence.normalizers.date.rst: -------------------------------------------------------------------------------- 1 | cahoots.confidence.normalizers.date module 2 | ========================================== 3 | 4 | .. automodule:: cahoots.confidence.normalizers.date 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.confidence.normalizers.equation.rst: -------------------------------------------------------------------------------- 1 | cahoots.confidence.normalizers.equation module 2 | ============================================== 3 | 4 | .. automodule:: cahoots.confidence.normalizers.equation 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.confidence.normalizers.number.rst: -------------------------------------------------------------------------------- 1 | cahoots.confidence.normalizers.number module 2 | ============================================ 3 | 4 | .. automodule:: cahoots.confidence.normalizers.number 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.confidence.normalizers.phone.rst: -------------------------------------------------------------------------------- 1 | cahoots.confidence.normalizers.phone module 2 | =========================================== 3 | 4 | .. automodule:: cahoots.confidence.normalizers.phone 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.confidence.normalizers.rst: -------------------------------------------------------------------------------- 1 | cahoots.confidence.normalizers package 2 | ====================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | .. toctree:: 8 | 9 | cahoots.confidence.normalizers.base 10 | cahoots.confidence.normalizers.character 11 | cahoots.confidence.normalizers.date 12 | cahoots.confidence.normalizers.equation 13 | cahoots.confidence.normalizers.number 14 | cahoots.confidence.normalizers.phone 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: cahoots.confidence.normalizers 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/cahoots.confidence.rst: -------------------------------------------------------------------------------- 1 | cahoots.confidence package 2 | ========================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | cahoots.confidence.normalizers 10 | 11 | Submodules 12 | ---------- 13 | 14 | .. toctree:: 15 | 16 | cahoots.confidence.normalizer 17 | 18 | Module contents 19 | --------------- 20 | 21 | .. automodule:: cahoots.confidence 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | -------------------------------------------------------------------------------- /docs/cahoots.config.rst: -------------------------------------------------------------------------------- 1 | cahoots.config module 2 | ===================== 3 | 4 | .. automodule:: cahoots.config 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.data.rst: -------------------------------------------------------------------------------- 1 | cahoots.data module 2 | =================== 3 | 4 | .. automodule:: cahoots.data 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.parser.rst: -------------------------------------------------------------------------------- 1 | cahoots.parser module 2 | ===================== 3 | 4 | .. automodule:: cahoots.parser 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.base.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers.base module 2 | =========================== 3 | 4 | .. automodule:: cahoots.parsers.base 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.boolean.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers.boolean module 2 | ============================== 3 | 4 | .. automodule:: cahoots.parsers.boolean 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.character.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers.character module 2 | ================================ 3 | 4 | .. automodule:: cahoots.parsers.character 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.date.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers.date module 2 | =========================== 3 | 4 | .. automodule:: cahoots.parsers.date 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.email.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers.email module 2 | ============================ 3 | 4 | .. automodule:: cahoots.parsers.email 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.equation.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers.equation module 2 | =============================== 3 | 4 | .. automodule:: cahoots.parsers.equation 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.location.address.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers.location.address module 2 | ======================================= 3 | 4 | .. automodule:: cahoots.parsers.location.address 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.location.coordinate.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers.location.coordinate module 2 | ========================================== 3 | 4 | .. automodule:: cahoots.parsers.location.coordinate 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.location.landmark.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers.location.landmark module 2 | ======================================== 3 | 4 | .. automodule:: cahoots.parsers.location.landmark 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.location.postalcode.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers.location.postalcode module 2 | ========================================== 3 | 4 | .. automodule:: cahoots.parsers.location.postalcode 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.location.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers.location package 2 | ================================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | .. toctree:: 8 | 9 | cahoots.parsers.location.address 10 | cahoots.parsers.location.coordinate 11 | cahoots.parsers.location.landmark 12 | cahoots.parsers.location.postalcode 13 | 14 | Module contents 15 | --------------- 16 | 17 | .. automodule:: cahoots.parsers.location 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.measurement.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers.measurement package 2 | =================================== 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: cahoots.parsers.measurement 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.name.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers.name module 2 | =========================== 3 | 4 | .. automodule:: cahoots.parsers.name 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.number.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers.number module 2 | ============================= 3 | 4 | .. automodule:: cahoots.parsers.number 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.phone.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers.phone module 2 | ============================ 3 | 4 | .. automodule:: cahoots.parsers.phone 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.programming.bayesian.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers.programming.bayesian module 2 | =========================================== 3 | 4 | .. automodule:: cahoots.parsers.programming.bayesian 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.programming.lexer.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers.programming.lexer module 2 | ======================================== 3 | 4 | .. automodule:: cahoots.parsers.programming.lexer 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.programming.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers.programming package 2 | =================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | .. toctree:: 8 | 9 | cahoots.parsers.programming.bayesian 10 | cahoots.parsers.programming.lexer 11 | 12 | Module contents 13 | --------------- 14 | 15 | .. automodule:: cahoots.parsers.programming 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers package 2 | ======================= 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | cahoots.parsers.location 10 | cahoots.parsers.measurement 11 | cahoots.parsers.programming 12 | 13 | Submodules 14 | ---------- 15 | 16 | .. toctree:: 17 | 18 | cahoots.parsers.base 19 | cahoots.parsers.boolean 20 | cahoots.parsers.character 21 | cahoots.parsers.date 22 | cahoots.parsers.email 23 | cahoots.parsers.equation 24 | cahoots.parsers.name 25 | cahoots.parsers.number 26 | cahoots.parsers.phone 27 | cahoots.parsers.uri 28 | 29 | Module contents 30 | --------------- 31 | 32 | .. automodule:: cahoots.parsers 33 | :members: 34 | :undoc-members: 35 | :show-inheritance: 36 | -------------------------------------------------------------------------------- /docs/cahoots.parsers.uri.rst: -------------------------------------------------------------------------------- 1 | cahoots.parsers.uri module 2 | ========================== 3 | 4 | .. automodule:: cahoots.parsers.uri 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.result.rst: -------------------------------------------------------------------------------- 1 | cahoots.result module 2 | ===================== 3 | 4 | .. automodule:: cahoots.result 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/cahoots.rst: -------------------------------------------------------------------------------- 1 | cahoots package 2 | =============== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | cahoots.confidence 10 | cahoots.parsers 11 | 12 | Submodules 13 | ---------- 14 | 15 | .. toctree:: 16 | 17 | cahoots.config 18 | cahoots.data 19 | cahoots.parser 20 | cahoots.result 21 | cahoots.util 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: cahoots 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/cahoots.util.rst: -------------------------------------------------------------------------------- 1 | cahoots.util module 2 | =================== 3 | 4 | .. automodule:: cahoots.util 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. cahoots documentation master file, created by 2 | sphinx-quickstart on Wed Apr 15 22:37:46 2015. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to cahoots's documentation! 7 | =================================== 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 4 13 | 14 | cahoots 15 | 16 | 17 | Indices and tables 18 | ================== 19 | 20 | * :ref:`genindex` 21 | * :ref:`modindex` 22 | * :ref:`search` 23 | 24 | -------------------------------------------------------------------------------- /setup/bashrc: -------------------------------------------------------------------------------- 1 | __has_parent_dir () { 2 | # Utility function so we can test for things like .git/.hg without firing up a 3 | # separate process 4 | test -d "$1" && return 0; 5 | 6 | current="." 7 | while [ ! "$current" -ef "$current/.." ]; do 8 | if [ -d "$current/$1" ]; then 9 | return 0; 10 | fi 11 | current="$current/.."; 12 | done 13 | 14 | return 1; 15 | } 16 | 17 | __vcs_name() { 18 | if [ -d .svn ]; then 19 | echo "-[svn]"; 20 | elif __has_parent_dir ".git"; then 21 | echo "-[$(__git_ps1 'git %s')]"; 22 | elif __has_parent_dir ".hg"; then 23 | echo "-[hg $(hg branch)]" 24 | fi 25 | } 26 | 27 | black="\033[1;30m" 28 | red="\033[1;31m" 29 | green="\033[1;32m" 30 | yellow="\033[1;33m" 31 | dk_blue="\033[1;35m" 32 | pink="\033[1;36m" 33 | lt_blue="\033[1;37m" 34 | 35 | bold="" 36 | reset="\033[m" 37 | 38 | # Nicely formatted terminal prompt 39 | export PS1="\n\[$bold\]\[$black\][\[$dk_blue\]\@\[$black\]]-[\[$green\]\u\[$yellow\]@\[$green\]\h\[$black\]]-[\[$pink\]\w\[$black\]]\[\033[0;33m\]$(__vcs_name) \[\033[00m\]\[$reset\]\n\[$reset\]\$ " 40 | 41 | export PYTHONPATH=$PYTHONPATH:/vagrant 42 | 43 | cd /vagrant 44 | -------------------------------------------------------------------------------- /setup/dev_provision.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo " " 4 | echo "Beginning Provisioning!" 5 | 6 | echo " " 7 | echo " [Cahoots] Step 1: Adding APT Repositories and Updating APT" 8 | echo " " 9 | apt-get update 10 | 11 | echo " " 12 | echo " [Cahoots] Step 2: Upgrading Base System Packages" 13 | echo " " 14 | apt-get -y upgrade 15 | 16 | echo " " 17 | echo " [Cahoots] Step 3: Installing Required System Packages" 18 | echo " " 19 | cat setup/requirements.system.txt | xargs apt-get install -y --force-yes 20 | 21 | echo " " 22 | echo " [Cahoots] Step 4: Installing Required Python Packages" 23 | echo " " 24 | pip install -r setup/requirements.txt 25 | 26 | echo " " 27 | echo " [Cahoots] Step 5: Installing Development Python Packages" 28 | echo " " 29 | pip install -r setup/requirements.dev.txt 30 | wget https://pypi.python.org/packages/source/p/pylint/pylint-1.4.3.tar.gz 31 | tar -xvf pylint-1.4.3.tar.gz 32 | cd pylint-1.4.3 33 | python setup.py install 34 | cd .. 35 | rm -rf pylint-1.4.3 36 | rm -f pylint-1.4.3.tar.gz 37 | 38 | echo " " 39 | echo " [Cahoots] Step 6: Importing Location Database" 40 | echo " " 41 | cp cahoots/parsers/location/data/location.sqlite.dist cahoots/parsers/location/data/location.sqlite 42 | bzip2 -d -k cahoots/parsers/location/data/city.txt.bz2 43 | cat cahoots/parsers/location/data/city.sql | sqlite3 cahoots/parsers/location/data/location.sqlite 44 | rm cahoots/parsers/location/data/city.txt 45 | bzip2 -d -k cahoots/parsers/location/data/country.csv.bz2 46 | cat cahoots/parsers/location/data/country.sql | sqlite3 cahoots/parsers/location/data/location.sqlite 47 | rm cahoots/parsers/location/data/country.csv 48 | bzip2 -d -k cahoots/parsers/location/data/street_suffix.csv.bz2 49 | cat cahoots/parsers/location/data/street_suffix.sql | sqlite3 cahoots/parsers/location/data/location.sqlite 50 | rm cahoots/parsers/location/data/street_suffix.csv 51 | bzip2 -d -k cahoots/parsers/location/data/landmark.csv.bz2 52 | cat cahoots/parsers/location/data/landmark.sql | sqlite3 cahoots/parsers/location/data/location.sqlite 53 | rm cahoots/parsers/location/data/landmark.csv 54 | 55 | echo " " 56 | echo "Provisioning Complete!" 57 | echo "Ensure you have added this directory to the PYTHONPATH" 58 | echo " " 59 | -------------------------------------------------------------------------------- /setup/requirements.dev.txt: -------------------------------------------------------------------------------- 1 | nose 2 | coveralls 3 | flake8 4 | mock 5 | pylint -------------------------------------------------------------------------------- /setup/requirements.system.txt: -------------------------------------------------------------------------------- 1 | python-dev 2 | python-pip 3 | sqlite3 -------------------------------------------------------------------------------- /setup/requirements.txt: -------------------------------------------------------------------------------- 1 | flask 2 | mako 3 | dateutils 4 | pyyaml 5 | pygments 6 | pyparsing 7 | phonenumbers 8 | simplejson 9 | SereneRegistry 10 | LatLon23 11 | simplebayes 12 | validate_email -------------------------------------------------------------------------------- /setup/server_provision.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo " " 4 | echo "Beginning Provisioning!" 5 | 6 | echo " " 7 | echo " [Cahoots] Step 1: Adding APT Repositories and Updating APT" 8 | echo " " 9 | apt-get update 10 | 11 | echo " " 12 | echo " [Cahoots] Step 2: Upgrading Base System Packages" 13 | echo " " 14 | apt-get -y upgrade 15 | 16 | echo " " 17 | echo " [Cahoots] Step 3: Installing Required System Packages" 18 | echo " " 19 | cat setup/requirements.system.txt | xargs apt-get install -y --force-yes 20 | 21 | echo " " 22 | echo " [Cahoots] Step 4: Installing Required Python Packages" 23 | echo " " 24 | pip install -r setup/requirements.txt 25 | 26 | echo " " 27 | echo " [Cahoots] Step 5: Importing Location Database" 28 | echo " " 29 | cp cahoots/parsers/location/data/location.sqlite.dist cahoots/parsers/location/data/location.sqlite 30 | bzip2 -d -k cahoots/parsers/location/data/city.txt.bz2 31 | cat cahoots/parsers/location/data/city.sql | sqlite3 cahoots/parsers/location/data/location.sqlite 32 | rm cahoots/parsers/location/data/city.txt 33 | bzip2 -d -k cahoots/parsers/location/data/country.csv.bz2 34 | cat cahoots/parsers/location/data/country.sql | sqlite3 cahoots/parsers/location/data/location.sqlite 35 | rm cahoots/parsers/location/data/country.csv 36 | bzip2 -d -k cahoots/parsers/location/data/street_suffix.csv.bz2 37 | cat cahoots/parsers/location/data/street_suffix.sql | sqlite3 cahoots/parsers/location/data/location.sqlite 38 | rm cahoots/parsers/location/data/street_suffix.csv 39 | bzip2 -d -k cahoots/parsers/location/data/landmark.csv.bz2 40 | cat cahoots/parsers/location/data/landmark.sql | sqlite3 cahoots/parsers/location/data/location.sqlite 41 | rm cahoots/parsers/location/data/landmark.csv 42 | 43 | echo " " 44 | echo "Provisioning Complete!" 45 | echo "Ensure you have added this directory to the PYTHONPATH" 46 | echo " " 47 | -------------------------------------------------------------------------------- /setup/vagrant_provision.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd /vagrant 4 | 5 | echo " " 6 | echo "Beginning Provisioning!" 7 | echo "Please ignore any 'stdin' related errors. It's an Ubuntu+Vagrant bug." 8 | 9 | echo " " 10 | echo " [Cahoots] Step 1: Adding APT Repositories and Updating APT" 11 | echo " " 12 | apt-get update -o Acquire::ForceIPv4=true 13 | 14 | echo " " 15 | echo " [Cahoots] Step 2: Upgrading Base System Packages" 16 | echo " " 17 | apt-get upgrade -y --force-yes -o Acquire::ForceIPv4=true 18 | 19 | echo " " 20 | echo " [Cahoots] Step 3: Installing Required System Packages" 21 | echo " " 22 | cat setup/requirements.system.txt | xargs apt-get install -y --force-yes -o Acquire::ForceIPv4=true 23 | 24 | echo " " 25 | echo " [Cahoots] Step 4: Installing Required Python Packages" 26 | echo " " 27 | pip install -r setup/requirements.txt 28 | 29 | echo " " 30 | echo " [Cahoots] Step 5: Installing Development Python Packages" 31 | echo " " 32 | pip install -r setup/requirements.dev.txt 33 | 34 | echo " " 35 | echo " [Cahoots] Step 6: Importing Location Database" 36 | echo " " 37 | cp cahoots/parsers/location/data/location.sqlite.dist cahoots/parsers/location/data/location.sqlite 38 | bzip2 -d -k cahoots/parsers/location/data/city.txt.bz2 39 | cat cahoots/parsers/location/data/city.sql | sqlite3 cahoots/parsers/location/data/location.sqlite 40 | rm cahoots/parsers/location/data/city.txt 41 | bzip2 -d -k cahoots/parsers/location/data/country.csv.bz2 42 | cat cahoots/parsers/location/data/country.sql | sqlite3 cahoots/parsers/location/data/location.sqlite 43 | rm cahoots/parsers/location/data/country.csv 44 | bzip2 -d -k cahoots/parsers/location/data/street_suffix.csv.bz2 45 | cat cahoots/parsers/location/data/street_suffix.sql | sqlite3 cahoots/parsers/location/data/location.sqlite 46 | rm cahoots/parsers/location/data/street_suffix.csv 47 | bzip2 -d -k cahoots/parsers/location/data/landmark.csv.bz2 48 | cat cahoots/parsers/location/data/landmark.sql | sqlite3 cahoots/parsers/location/data/location.sqlite 49 | rm cahoots/parsers/location/data/landmark.csv 50 | 51 | echo " " 52 | echo " [Cahoots] Step 7: Setting Up Bash Defaults" 53 | cat /vagrant/setup/bashrc >> /home/vagrant/.bashrc 54 | 55 | echo " " 56 | echo " " 57 | echo "Provisioning Complete!" 58 | echo " " 59 | echo "Instructions for web client:" 60 | echo "1) Type 'vagrant ssh' to connect to your vm." 61 | echo "2) Type './cahootserver/server.py' to start Cahoots." 62 | echo " " 63 | echo "Instructions for unit/pylint/flake8 tests:" 64 | echo "1) Type 'vagrant ssh' to connect to your vm." 65 | echo "2) Type './tests/build.sh' to execute the test suite." 66 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | -------------------------------------------------------------------------------- /tests/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo 3 | echo 4 | echo " [Cahoots] Step 1: Executing Unit Tests." 5 | echo 6 | nosetests tests/test.py --with-coverage --cover-package=cahoots --cover-min-percentage 100 7 | rm -f .coverage* 8 | echo -e "\nExit Code:" $? 9 | 10 | echo 11 | echo " [Cahoots] Step 2: Executing pep8 and pyflakes Tests (flake8)." 12 | echo 13 | flake8 cahoots tests cahootserver 14 | echo "Exit Code:" $? 15 | 16 | echo 17 | echo " [Cahoots] Step 3: Executing pylint Tests." 18 | echo 19 | pylint cahoots tests cahootserver --reports=no 20 | echo "Exit Code:" $? 21 | -------------------------------------------------------------------------------- /tests/confidence/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | -------------------------------------------------------------------------------- /tests/confidence/normalizer.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 25 | from cahoots.confidence.normalizer import HierarchicalNormalizerChain 26 | from cahoots.confidence.normalizers.base import BaseNormalizer 27 | from cahoots.result import ParseResult 28 | from tests.config import TestConfig 29 | import unittest 30 | 31 | 32 | class NormalizerStub(BaseNormalizer): 33 | 34 | @staticmethod 35 | def test(_, __): 36 | return True 37 | 38 | @staticmethod 39 | def normalize(results): 40 | return results 41 | 42 | 43 | class HierarchicalNormalizerChainTests(unittest.TestCase): 44 | 45 | def test_normalizer_normalizes(self): 46 | res = [ 47 | ParseResult('Test', 'Test', 100), 48 | ParseResult('Test', 'Test', 0) 49 | ] 50 | 51 | conf = TestConfig() 52 | conf.enabled_confidence_normalizers.append(NormalizerStub) 53 | hnc = HierarchicalNormalizerChain(conf, [], []) 54 | results = hnc.normalize(res) 55 | 56 | self.assertEqual(1, len(results)) 57 | self.assertIsInstance(results[0], ParseResult) 58 | -------------------------------------------------------------------------------- /tests/confidence/normalizers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | -------------------------------------------------------------------------------- /tests/confidence/normalizers/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 25 | from cahoots.confidence.normalizers.base import BaseNormalizer 26 | import unittest 27 | 28 | 29 | class BaseNormalizerTests(unittest.TestCase): 30 | 31 | def test_base_raises_expected_Errors(self): 32 | self.assertRaises(NotImplementedError, BaseNormalizer.test, [], []) 33 | self.assertRaises(NotImplementedError, BaseNormalizer.normalize, []) 34 | -------------------------------------------------------------------------------- /tests/confidence/normalizers/character.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 25 | from cahoots.confidence.normalizers.character import CharacterWithoutBoolean 26 | from cahoots.result import ParseResult 27 | import unittest 28 | 29 | 30 | class CharacterWithoutBooleanTests(unittest.TestCase): 31 | 32 | def test_test(self): 33 | self.assertFalse(CharacterWithoutBoolean.test( 34 | ['Character', 'Boolean'], [] 35 | )) 36 | self.assertTrue(CharacterWithoutBoolean.test( 37 | ['Character', 'Postal Code'], [] 38 | )) 39 | 40 | def test_normalizer(self): 41 | char_result = ParseResult('Character', None, 25) 42 | pc_result = ParseResult('Postal Code', None, 80) 43 | 44 | results = CharacterWithoutBoolean.normalize([char_result, pc_result]) 45 | 46 | count = 0 47 | for res in results: 48 | if res.type == 'Character': 49 | count += 1 50 | self.assertEqual(res.confidence, 100) 51 | elif res.type == "Postal Code": 52 | count += 1 53 | self.assertEqual(res.confidence, 80) 54 | 55 | self.assertEqual(count, len(results)) 56 | -------------------------------------------------------------------------------- /tests/confidence/normalizers/date.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 25 | from cahoots.confidence.normalizers.date import DateWithPostalCode 26 | from cahoots.result import ParseResult 27 | import unittest 28 | 29 | 30 | class DateWithPostalCodeTests(unittest.TestCase): 31 | 32 | def test_test(self): 33 | self.assertFalse(DateWithPostalCode.test(['Date', 'Number'], [])) 34 | self.assertTrue(DateWithPostalCode.test(['Date', 'Postal Code'], [])) 35 | 36 | def test_normalizer_with_pc_conf_over_70(self): 37 | date_result = ParseResult('Date', None, 10) 38 | pc_result = ParseResult('Postal Code', None, 80) 39 | 40 | results = DateWithPostalCode.normalize([date_result, pc_result]) 41 | 42 | count = 0 43 | for res in results: 44 | if res.type == 'Date': 45 | count += 1 46 | self.assertEqual(res.confidence, 70) 47 | elif res.type == "Postal Code": 48 | count += 1 49 | 50 | self.assertEqual(count, len(results)) 51 | 52 | def test_normalizer_with_pc_conf_under_70(self): 53 | date_result = ParseResult('Date', None, 10) 54 | pc_result = ParseResult('Postal Code', None, 40) 55 | 56 | results = DateWithPostalCode.normalize([date_result, pc_result]) 57 | 58 | count = 0 59 | for res in results: 60 | if res.type == 'Date': 61 | count += 1 62 | self.assertEqual(res.confidence, 44) 63 | elif res.type == "Postal Code": 64 | count += 1 65 | 66 | self.assertEqual(count, len(results)) 67 | -------------------------------------------------------------------------------- /tests/confidence/normalizers/equation.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 25 | from cahoots.confidence.normalizers.equation import \ 26 | EquationWithPhonePostalCode 27 | from cahoots.result import ParseResult 28 | import unittest 29 | 30 | 31 | class EquationWithPhonePostalCodeTests(unittest.TestCase): 32 | 33 | def test_test(self): 34 | self.assertFalse(EquationWithPhonePostalCode.test( 35 | ['Equation', 'Boolean'], [] 36 | )) 37 | self.assertTrue(EquationWithPhonePostalCode.test( 38 | ['Equation', 'Postal Code'], [] 39 | )) 40 | 41 | def test_normalizer(self): 42 | eq_result = ParseResult('Equation', None, 90) 43 | pc_result = ParseResult('Postal Code', None, 80) 44 | 45 | results = EquationWithPhonePostalCode.normalize([eq_result, pc_result]) 46 | 47 | count = 0 48 | for res in results: 49 | if res.type == 'Equation': 50 | count += 1 51 | self.assertEqual(res.confidence, 75) 52 | elif res.type == "Postal Code": 53 | count += 1 54 | self.assertEqual(res.confidence, 80) 55 | 56 | self.assertEqual(count, len(results)) 57 | -------------------------------------------------------------------------------- /tests/confidence/normalizers/number.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 25 | from cahoots.confidence.normalizers.number import \ 26 | NumberWithNonNumbers, IntOctWithPhoneDatePostalCode 27 | from cahoots.result import ParseResult 28 | import unittest 29 | 30 | 31 | class NumberWithNonNumbersTests(unittest.TestCase): 32 | 33 | def test_test(self): 34 | self.assertTrue( 35 | NumberWithNonNumbers.test(['Date', 'Number'], []) 36 | ) 37 | self.assertFalse( 38 | NumberWithNonNumbers.test(['Date', 'Postal Code'], []) 39 | ) 40 | 41 | def test_normalize(self): 42 | result = ParseResult('Number', None, 70) 43 | 44 | results = NumberWithNonNumbers.normalize([result]) 45 | 46 | count = 0 47 | for res in results: 48 | count += 1 49 | self.assertEqual(res.confidence, 35) 50 | 51 | self.assertEqual(count, len(results)) 52 | 53 | 54 | class IntOctWithPhoneDatePostalCodeTests(unittest.TestCase): 55 | 56 | def test_test(self): 57 | self.assertFalse(IntOctWithPhoneDatePostalCode.test( 58 | [], 59 | ['Number', 'Integer', 'Octal'] 60 | )) 61 | self.assertTrue(IntOctWithPhoneDatePostalCode.test( 62 | [], 63 | ['Integer', 'Octal', 'Date'] 64 | )) 65 | 66 | def test_normalize(self): 67 | results = [ 68 | ParseResult('Number', 'Integer', 100), 69 | ParseResult('Number', 'Octal', 25), 70 | ParseResult('Date', 'Date', 50) 71 | ] 72 | 73 | results = IntOctWithPhoneDatePostalCode.normalize(results) 74 | 75 | count = 0 76 | for res in results: 77 | count += 1 78 | if res.subtype == 'Integer': 79 | self.assertEqual(10, res.confidence) 80 | elif res.subtype == 'Octal': 81 | self.assertEqual(5, res.confidence) 82 | else: 83 | self.assertEqual(50, res.confidence) 84 | 85 | self.assertEqual(count, 3) 86 | -------------------------------------------------------------------------------- /tests/confidence/normalizers/phone.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 25 | from cahoots.confidence.normalizers.phone import PhoneWithUri 26 | from cahoots.result import ParseResult 27 | import unittest 28 | 29 | 30 | class PhoneWithUriTests(unittest.TestCase): 31 | 32 | def test_test(self): 33 | self.assertFalse(PhoneWithUri.test( 34 | ['Phone', 'Boolean'], [] 35 | )) 36 | self.assertTrue(PhoneWithUri.test( 37 | ['Phone', 'URI'], [] 38 | )) 39 | 40 | def test_normalizer(self): 41 | eq_result = ParseResult('Phone', None, 90) 42 | uri_result = ParseResult('URI', None, 80) 43 | 44 | results = PhoneWithUri.normalize([eq_result, uri_result]) 45 | 46 | count = 0 47 | for res in results: 48 | if res.type == 'Phone': 49 | count += 1 50 | self.assertEqual(res.confidence, 65) 51 | elif res.type == "URI": 52 | count += 1 53 | self.assertEqual(res.confidence, 80) 54 | 55 | self.assertEqual(count, len(results)) 56 | -------------------------------------------------------------------------------- /tests/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 25 | from cahoots.parsers.location.postalcode import PostalCodeParser 26 | from cahoots.parsers.location.landmark import LandmarkParser 27 | from cahoots.parsers.location.coordinate import CoordinateParser 28 | from cahoots.parsers.location.address import AddressParser 29 | from cahoots.parsers.uri import URIParser 30 | from cahoots.parsers.phone import PhoneParser 31 | from cahoots.parsers.number import NumberParser 32 | from cahoots.parsers.name import NameParser 33 | from cahoots.parsers.measurement import MeasurementParser 34 | from cahoots.parsers.equation import EquationParser 35 | from cahoots.parsers.email import EmailParser 36 | from cahoots.parsers.date import DateParser 37 | from cahoots.parsers.character import CharacterParser 38 | from cahoots.parsers.boolean import BooleanParser 39 | from cahoots.config import BaseConfig 40 | 41 | 42 | class TestConfig(BaseConfig): 43 | 44 | enabled_modules = [ 45 | URIParser, 46 | PostalCodeParser, 47 | PhoneParser, 48 | NumberParser, 49 | NameParser, 50 | MeasurementParser, 51 | LandmarkParser, 52 | EquationParser, 53 | EmailParser, 54 | DateParser, 55 | CoordinateParser, 56 | CharacterParser, 57 | BooleanParser, 58 | AddressParser, 59 | ] 60 | 61 | enabled_confidence_normalizers = [ 62 | ] 63 | -------------------------------------------------------------------------------- /tests/data.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 25 | from SereneRegistry import registry 26 | from cahoots.data import DataHandler 27 | import unittest 28 | 29 | 30 | class DataHandlerTests(unittest.TestCase): 31 | 32 | def tearDown(self): 33 | registry.flush() 34 | 35 | def test_get_prepositions(self): 36 | preps = DataHandler().get_prepositions() 37 | 38 | self.assertIsInstance(preps, list) 39 | self.assertEqual(70, len(preps)) 40 | 41 | preps_cached = DataHandler().get_prepositions() 42 | 43 | self.assertEqual(preps, preps_cached) 44 | -------------------------------------------------------------------------------- /tests/parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 25 | from cahoots.config import BaseConfig 26 | from cahoots.parser import ParserThread, CahootsParser 27 | from cahoots.parsers.base import BaseParser 28 | from cahoots.result import ParseResult 29 | from tests.config import TestConfig 30 | import datetime 31 | import mock 32 | import unittest 33 | 34 | 35 | class FakeModule(BaseParser): 36 | 37 | bootstrappingComplete = False 38 | 39 | def __init__(self, config): 40 | BaseParser.__init__(self, config, "Fake") 41 | 42 | def parse(self, data): 43 | yield self.result("Subtype", 200, data) 44 | 45 | @staticmethod 46 | def bootstrap(config): 47 | FakeModule.bootstrappingComplete = True 48 | 49 | 50 | class ParserTestConfig(BaseConfig): 51 | 52 | debug = True 53 | 54 | enabled_modules = [ 55 | FakeModule, 56 | ] 57 | 58 | enabled_confidence_normalizers = [ 59 | ] 60 | 61 | 62 | class parserThreadTests(unittest.TestCase): 63 | 64 | parserThread = None 65 | 66 | def setUp(self): 67 | self.parserThread = ParserThread(TestConfig, FakeModule, 'data_string') 68 | 69 | def test_parserThreadYieldsResultAsExpected(self): 70 | self.parserThread.start() 71 | self.parserThread.join() 72 | 73 | for result in self.parserThread.results: 74 | self.assertIsInstance(result, ParseResult) 75 | self.assertEqual('Fake', result.type) 76 | self.assertEqual('Subtype', result.subtype) 77 | self.assertEqual(200, result.confidence) 78 | self.assertEqual('data_string', result.result_value) 79 | 80 | 81 | class FakeDate(datetime.datetime): 82 | # pylint: disable=arguments-differ 83 | def __new__(cls): 84 | return datetime.datetime.__new__(datetime.datetime) 85 | 86 | 87 | class CahootsParserTests(unittest.TestCase): 88 | 89 | def test_bootstrapSetsUpParserProperly(self): 90 | CahootsParser(ParserTestConfig, True) 91 | self.assertTrue(FakeModule.bootstrappingComplete) 92 | FakeModule.bootstrappingComplete = False 93 | 94 | def test_parserCreatesInstanceOfBaseConfig(self): 95 | parser = CahootsParser() 96 | self.assertIsInstance(parser.config, BaseConfig) 97 | 98 | def test_parserInstantiatesBaseConfig(self): 99 | parser = CahootsParser(BaseConfig()) 100 | self.assertIsInstance(parser.config, BaseConfig) 101 | 102 | @mock.patch('datetime.datetime', FakeDate) 103 | def test_parserReturnsExpectedParserResult(self): 104 | FakeDate.now = classmethod(lambda cls: 'thetimeisnow') 105 | parser = CahootsParser(ParserTestConfig) 106 | result = parser.parse('data_string') 107 | 108 | self.assertEqual(5, len(result)) 109 | self.assertEqual('data_string', result['query']) 110 | self.assertEqual('thetimeisnow', result['date']) 111 | self.assertIsInstance(result['top'], ParseResult) 112 | self.assertEqual(1, result['results']['count']) 113 | self.assertEqual(['Fake'], result['results']['types']) 114 | self.assertEqual(1, len(result['results']['matches'])) 115 | self.assertIsInstance(result['results']['matches'][0], ParseResult) 116 | -------------------------------------------------------------------------------- /tests/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | -------------------------------------------------------------------------------- /tests/parsers/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 25 | from cahoots.parsers.base import BaseParser 26 | from cahoots.result import ParseResult 27 | from tests.config import TestConfig 28 | import unittest 29 | 30 | 31 | class BaseParserTests(unittest.TestCase): 32 | """Unit Testing of the BaseParser""" 33 | 34 | bp = None 35 | 36 | def setUp(self): 37 | BaseParser.bootstrap(TestConfig()) 38 | self.bp = BaseParser(TestConfig()) 39 | 40 | def tearDown(self): 41 | self.bp = None 42 | 43 | def test_parse(self): 44 | self.assertRaises(NotImplementedError, self.bp.parse, "") 45 | 46 | def test_result(self): 47 | self.assertIsInstance(self.bp.result(), ParseResult) 48 | -------------------------------------------------------------------------------- /tests/parsers/boolean.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 25 | from cahoots.parsers.boolean import BooleanParser 26 | from cahoots.result import ParseResult 27 | from tests.config import TestConfig 28 | import unittest 29 | 30 | 31 | class BooleanParserTests(unittest.TestCase): 32 | """Unit Testing of the BooleanParser""" 33 | 34 | bp = None 35 | 36 | trueValues = ["true", "yes", "yep", "yup", "1", "t", "one"] 37 | falseValues = ["false", "no", "nope", "0", "f", "zero"] 38 | junkValues = ["asdfasdf", "burp", "2"] 39 | 40 | def setUp(self): 41 | self.bp = BooleanParser(TestConfig()) 42 | 43 | def tearDown(self): 44 | self.bp = None 45 | 46 | def test_is_true(self): 47 | 48 | for testValue in self.trueValues: 49 | self.assertTrue(self.bp.is_true(testValue)) 50 | 51 | for testValue in self.junkValues: 52 | self.assertFalse(self.bp.is_true(testValue)) 53 | 54 | for testValue in self.falseValues: 55 | self.assertFalse(self.bp.is_true(testValue)) 56 | 57 | def test_is_false(self): 58 | 59 | for testValue in self.trueValues: 60 | self.assertFalse(self.bp.is_false(testValue)) 61 | 62 | for testValue in self.junkValues: 63 | self.assertFalse(self.bp.is_false(testValue)) 64 | 65 | for testValue in self.falseValues: 66 | self.assertTrue(self.bp.is_false(testValue)) 67 | 68 | def test_parseLongStringYieldsNothing(self): 69 | resultTest = None 70 | 71 | for result in self.bp.parse("LookALongString"): 72 | resultTest = result 73 | 74 | self.assertIsNone(resultTest) 75 | 76 | def test_parseTrueValuesYieldsExpectedConfidence(self): 77 | valueConfidence = [("true", 100), 78 | ("yes", 100), 79 | ("yep", 75), 80 | ("yup", 75), 81 | ("1", 50), 82 | ("t", 50), 83 | ("one", 50)] 84 | 85 | for value, confidence in valueConfidence: 86 | for result in self.bp.parse(value): 87 | self.assertIsInstance(result, ParseResult) 88 | self.assertEqual(result.confidence, confidence) 89 | self.assertTrue(result.result_value) 90 | 91 | def test_parseFalseValuesYieldsExpectedConfidence(self): 92 | valueConfidence = [("false", 100), 93 | ("no", 100), 94 | ("nope", 75), 95 | ("0", 50), 96 | ("f", 50), 97 | ("zero", 50)] 98 | 99 | for value, confidence in valueConfidence: 100 | for result in self.bp.parse(value): 101 | self.assertIsInstance(result, ParseResult) 102 | self.assertEqual(result.confidence, confidence) 103 | self.assertFalse(result.result_value) 104 | 105 | def test_parseTrueValuesYieldsNothing(self): 106 | resultTest = None 107 | 108 | for value in self.junkValues: 109 | for result in self.bp.parse(value): 110 | resultTest = result 111 | 112 | self.assertIsNone(resultTest) 113 | -------------------------------------------------------------------------------- /tests/parsers/character.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 25 | from cahoots.parsers.character import CharacterParser 26 | from tests.config import TestConfig 27 | import unittest 28 | 29 | 30 | class CharacterParserTests(unittest.TestCase): 31 | """Unit Testing of the CharacterParser""" 32 | 33 | cp = None 34 | 35 | def setUp(self): 36 | self.cp = CharacterParser(TestConfig()) 37 | 38 | def tearDown(self): 39 | self.cp = None 40 | 41 | def test_is_letter(self): 42 | self.assertTrue(self.cp.is_letter("a")) 43 | self.assertFalse(self.cp.is_letter("1")) 44 | self.assertFalse(self.cp.is_letter(".")) 45 | self.assertFalse(self.cp.is_letter(" ")) 46 | self.assertFalse(self.cp.is_letter("asdf")) 47 | 48 | def test_is_punctuation(self): 49 | self.assertTrue(self.cp.is_punctuation(".")) 50 | self.assertFalse(self.cp.is_punctuation("1")) 51 | self.assertFalse(self.cp.is_punctuation("a")) 52 | self.assertFalse(self.cp.is_punctuation(" ")) 53 | self.assertFalse(self.cp.is_punctuation("asdf")) 54 | 55 | def test_is_whitespace(self): 56 | self.assertTrue(self.cp.is_whitespace(" ")) 57 | self.assertFalse(self.cp.is_whitespace("1")) 58 | self.assertFalse(self.cp.is_whitespace("a")) 59 | self.assertFalse(self.cp.is_whitespace(".")) 60 | self.assertFalse(self.cp.is_whitespace("asdf")) 61 | 62 | def test_parseReturnsNothingWithStringLongerThanOneCharacter(self): 63 | count = 0 64 | for _ in self.cp.parse("asdf"): 65 | count += 1 66 | self.assertEqual(count, 0) 67 | 68 | def test_parseNonAsciiCharacterReturnNone(self): 69 | count = 0 70 | # pylint: disable=anomalous-unicode-escape-in-string 71 | for _ in self.cp.parse('\u0080'): 72 | count += 1 73 | self.assertEqual(count, 0) 74 | 75 | def test_parseLetterYieldsProperResult(self): 76 | count = 0 77 | for result in self.cp.parse('a'): 78 | count += 1 79 | self.assertEqual(result.subtype, 'Letter') 80 | self.assertEqual(result.data, {'char-code': 97}) 81 | self.assertEqual(result.confidence, 25) 82 | self.assertEqual(count, 1) 83 | 84 | def test_parsePunctuationYieldsProperResult(self): 85 | count = 0 86 | for result in self.cp.parse('.'): 87 | count += 1 88 | self.assertEqual(result.subtype, 'Punctuation') 89 | self.assertEqual(result.data, {'char-code': 46}) 90 | self.assertEqual(result.confidence, 25) 91 | self.assertEqual(count, 1) 92 | 93 | def test_parseWhitespaceYieldsProperResult(self): 94 | count = 0 95 | for result in self.cp.parse(' '): 96 | count += 1 97 | self.assertEqual(result.subtype, 'Whitespace') 98 | self.assertEqual(result.data, {'char-code': 32}) 99 | self.assertEqual(result.confidence, 25) 100 | self.assertEqual(count, 1) 101 | 102 | def test_parseBooleanLetterYieldsLowerConfidence(self): 103 | count = 0 104 | for result in self.cp.parse('T'): 105 | count += 1 106 | self.assertEqual(result.subtype, 'Letter') 107 | self.assertEqual(result.data, {'char-code': 84}) 108 | self.assertEqual(result.confidence, 25) 109 | self.assertEqual(count, 1) 110 | -------------------------------------------------------------------------------- /tests/parsers/email.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 25 | from cahoots.parsers.email import EmailParser 26 | from tests.config import TestConfig 27 | import unittest 28 | 29 | 30 | class EmailParserTests(unittest.TestCase): 31 | """Unit Testing of the EmailParser""" 32 | 33 | ep = None 34 | 35 | def setUp(self): 36 | EmailParser.bootstrap(TestConfig()) 37 | self.ep = EmailParser(TestConfig()) 38 | 39 | def tearDown(self): 40 | self.ep = None 41 | 42 | def test_parseWithNoAtSymbolReturnsNothing(self): 43 | count = 0 44 | for _ in self.ep.parse("Foo"): 45 | count += 1 46 | self.assertEqual(0, count) 47 | 48 | def test_parseDeterminesEmailValidityProperly(self): 49 | count = 0 50 | for result in self.ep.parse("foo@bar.com"): 51 | count += 1 52 | self.assertEqual("Email Address", result.subtype) 53 | self.assertEqual(100, result.confidence) 54 | self.assertEqual(1, count) 55 | -------------------------------------------------------------------------------- /tests/parsers/location/landmark.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 25 | from cahoots.parsers.location.landmark import LandmarkParser 26 | from tests.parsers.location import SQLite3Mock 27 | from SereneRegistry import registry 28 | from tests.config import TestConfig 29 | import unittest 30 | import sqlite3 31 | import mock 32 | 33 | 34 | class LandmarkParserTests(unittest.TestCase): 35 | 36 | lp = None 37 | 38 | def setUp(self): 39 | LandmarkParser.bootstrap(TestConfig()) 40 | self.lp = LandmarkParser(TestConfig()) 41 | 42 | def tearDown(self): 43 | SQLite3Mock.reset() 44 | registry.flush() 45 | self.lp = None 46 | 47 | def test_parse_longer_than_75_chars(self): 48 | test_string = 'asdfgasdfgasdfgasdfgasdfgasdfgasdfgasdfgasdfgasdfg' + \ 49 | 'asdfgasdfgasdfgasdfgasdfgasdfgasdfgasdfg' 50 | 51 | result = self.lp.parse(test_string) 52 | 53 | count = 1 54 | for _ in result: 55 | count += 2 56 | 57 | self.assertEqual(1, count) 58 | 59 | def test_parse_with_invalid_words(self): 60 | result = self.lp.parse('abc123 def456') 61 | 62 | count = 1 63 | for _ in result: 64 | count += 2 65 | 66 | self.assertEqual(1, count) 67 | 68 | def test_parse_with_invalid_characters(self): 69 | result = self.lp.parse('The E|n|d') 70 | 71 | count = 1 72 | for _ in result: 73 | count += 2 74 | 75 | self.assertEqual(1, count) 76 | 77 | @mock.patch('sqlite3.connect', SQLite3Mock.connect) 78 | def test_parse_with_sqlite3_error(self): 79 | SQLite3Mock.fetchall_returns = [sqlite3.Error('Error')] 80 | result = self.lp.parse('The End Of Time') 81 | 82 | count = 0 83 | for _ in result: 84 | count += 1 85 | 86 | self.assertEqual(0, count) 87 | 88 | self.assertEqual( 89 | SQLite3Mock.execute_calls, 90 | [ 91 | ('PRAGMA temp_store = 2', None), 92 | ( 93 | 'SELECT * FROM landmark WHERE resource like ?', 94 | ('End Of Time%',) 95 | ) 96 | ] 97 | ) 98 | 99 | @mock.patch('sqlite3.connect', SQLite3Mock.connect) 100 | def test_parse(self): 101 | SQLite3Mock.fetchall_returns = [ 102 | [('us', 'united states')], 103 | [ 104 | ('a', 'b', 'c', 'd', 'e', 'f'), 105 | ] 106 | ] 107 | 108 | results = self.lp.parse('The End Of Time') 109 | 110 | count = 0 111 | for result in results: 112 | self.assertEqual(result.result_value, [ 113 | { 114 | "city": "c", 115 | "resource": "a", 116 | "country": { 117 | "abbreviation": "US", 118 | "name": "United States" 119 | }, 120 | "county": "d", 121 | "state": "e", 122 | "address": "b" 123 | } 124 | ]) 125 | self.assertEqual(result.confidence, 95) 126 | count += 1 127 | self.assertEqual( 128 | SQLite3Mock.execute_calls, 129 | [ 130 | ('PRAGMA temp_store = 2', None), 131 | ( 132 | 'SELECT * FROM landmark WHERE resource like ?', 133 | ('End Of Time%',) 134 | ), 135 | ('SELECT * FROM country WHERE abbreviation = ?', ('f',)) 136 | ] 137 | ) 138 | self.assertEqual(1, count) 139 | -------------------------------------------------------------------------------- /tests/parsers/name.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | The MIT License (MIT) 4 | 5 | Copyright (c) Serenity Software, LLC 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | """ 25 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 26 | from cahoots.parsers.name import NameParser 27 | from tests.config import TestConfig 28 | from SereneRegistry import registry 29 | import unittest 30 | 31 | 32 | class NameParserTests(unittest.TestCase): 33 | """Unit Testing of the NameParser""" 34 | 35 | np = None 36 | 37 | def setUp(self): 38 | NameParser.bootstrap(TestConfig()) 39 | self.np = NameParser(TestConfig()) 40 | 41 | def tearDown(self): 42 | registry.flush() 43 | self.np = None 44 | 45 | def test_basic_validation(self): 46 | 47 | self.assertFalse(self.np.basic_validation(['foo', 'Bar', '2nd'])) 48 | self.assertFalse(self.np.basic_validation(['Foo', 'Bar', 'a123'])) 49 | self.assertFalse(self.np.basic_validation(['Foo', 'Bar', '$123'])) 50 | self.assertFalse(self.np.basic_validation(['Foo', 'Bar', '123'])) 51 | 52 | self.assertTrue(self.np.basic_validation(['Foo', 'Bar', '2nd'])) 53 | 54 | def test_is_prefix(self): 55 | 56 | self.assertFalse(self.np.is_prefix('foo')) 57 | 58 | self.assertTrue(self.np.is_prefix('Mr')) 59 | 60 | def test_is_suffix(self): 61 | 62 | self.assertFalse(self.np.is_suffix('foo')) 63 | 64 | self.assertTrue(self.np.is_suffix('Sr')) 65 | self.assertTrue(self.np.is_suffix('IV')) 66 | 67 | def test_is_initial(self): 68 | 69 | self.assertFalse(self.np.is_initial('Hello')) 70 | self.assertFalse(self.np.is_initial('1')) 71 | self.assertFalse(self.np.is_initial('1.')) 72 | self.assertFalse(self.np.is_initial('1,')) 73 | self.assertFalse(self.np.is_initial('A,')) 74 | 75 | self.assertTrue(self.np.is_initial('Q')) 76 | self.assertTrue(self.np.is_initial('Q.')) 77 | 78 | def test_parseWithNoUpperCaseLettersYieldsNothing(self): 79 | count = 0 80 | for _ in self.np.parse('foo'): 81 | count += 1 82 | self.assertEqual(count, 0) 83 | 84 | def test_parseWithGreaterThanTenWordsYieldsNothing(self): 85 | count = 0 86 | for _ in self.np.parse( 87 | 'Foo bar baz buns barf blarg bleh bler blue sner sneh snaf.' 88 | ): 89 | count += 1 90 | self.assertEqual(count, 0) 91 | 92 | def test_parseWithNonBasicValidatedAttributesYieldsNothing(self): 93 | count = 0 94 | for _ in self.np.parse('Foo bar The Third'): 95 | count += 1 96 | self.assertEqual(count, 0) 97 | 98 | def test_parseYieldsExpectedConfidenceWithFiveWordName(self): 99 | count = 0 100 | for result in self.np.parse('Dr. Foo Bar Bleh Bar Sr.'): 101 | self.assertEqual(result.confidence, 52) 102 | self.assertEqual(result.subtype, 'Name') 103 | count += 1 104 | self.assertEqual(count, 1) 105 | 106 | def test_parseYieldsExpectedConfidenceWithThreeWordName(self): 107 | count = 0 108 | for result in self.np.parse('Dr. Foo Q. Ben Sr.'): 109 | self.assertEqual(result.confidence, 95) 110 | self.assertEqual(result.subtype, 'Name') 111 | count += 1 112 | self.assertEqual(count, 1) 113 | 114 | def test_parseYieldsNothingWithOneWordName(self): 115 | count = 0 116 | for _ in self.np.parse('Foo'): 117 | count += 1 118 | self.assertEqual(count, 0) 119 | 120 | def test_parseYieldsNothingWithNonPrintableCharacters(self): 121 | count = 0 122 | for _ in self.np.parse('40.244° N 79.123° W'): 123 | count += 1 124 | self.assertEqual(count, 0) 125 | -------------------------------------------------------------------------------- /tests/parsers/programming/bayesian.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 25 | from cahoots.parsers.programming.bayesian import \ 26 | ProgrammingBayesianClassifier 27 | from SereneRegistry import registry 28 | from tests.config import TestConfig 29 | from inspect import ismethod, isfunction 30 | import mock 31 | import unittest 32 | 33 | 34 | class SimpleBayesStub(object): 35 | 36 | Redis = None 37 | Tokenizer = None 38 | Prefix = None 39 | 40 | Languages = {} 41 | 42 | Flushed = False 43 | 44 | data_string = None 45 | 46 | def __init__(self, tokenizer=None): 47 | SimpleBayesStub.Tokenizer = tokenizer 48 | 49 | @classmethod 50 | def train(cls, language, sample): 51 | SimpleBayesStub.Languages[language] = sample 52 | 53 | @classmethod 54 | def score(cls, data_string): 55 | SimpleBayesStub.data_string = data_string 56 | return 'FooBar' 57 | 58 | 59 | class ZipFileStub(object): 60 | 61 | called = [] 62 | 63 | def __init__(self, filename, filemode): 64 | filename = filename.split('/').pop() 65 | ZipFileStub.called.append('init-' + filename + '-' + filemode) 66 | 67 | @classmethod 68 | def namelist(cls): 69 | ZipFileStub.called.append('namelist') 70 | return ['foo.def', 'bar.def'] 71 | 72 | @classmethod 73 | def read(cls, filename): 74 | ZipFileStub.called.append('read-' + filename) 75 | return filename + '-text' 76 | 77 | 78 | class ProgrammingBayesianClassifierTests(unittest.TestCase): 79 | 80 | def setUp(self): 81 | registry.set('PP_bayes', SimpleBayesStub()) 82 | 83 | def tearDown(self): 84 | registry.flush() 85 | SimpleBayesStub.Tokenizer = None 86 | SimpleBayesStub.Languages = {} 87 | SimpleBayesStub.data_string = None 88 | ZipFileStub.called = [] 89 | 90 | @mock.patch('simplebayes.SimpleBayes', SimpleBayesStub) 91 | @mock.patch('zipfile.ZipFile', ZipFileStub) 92 | def test_bootstrapSetsUpClassifierAsExpected(self): 93 | 94 | ProgrammingBayesianClassifier.bootstrap(TestConfig) 95 | 96 | self.assertEqual( 97 | ZipFileStub.called, 98 | [ 99 | 'init-trainers.zip-r', 100 | 'namelist', 101 | 'read-foo.def', 102 | 'read-bar.def' 103 | ] 104 | ) 105 | 106 | self.assertTrue( 107 | ismethod(SimpleBayesStub.Tokenizer) or 108 | isfunction(SimpleBayesStub.Tokenizer) 109 | ) 110 | self.assertIsInstance(registry.get('PP_bayes'), SimpleBayesStub) 111 | 112 | self.assertEqual( 113 | SimpleBayesStub.Languages, 114 | { 115 | 'foo': 'foo.def-text', 116 | 'bar': 'bar.def-text' 117 | } 118 | ) 119 | 120 | @mock.patch('simplebayes.SimpleBayes', SimpleBayesStub) 121 | def test_classifierProducesExpectedResult(self): 122 | 123 | ProgrammingBayesianClassifier.bootstrap(TestConfig) 124 | 125 | classifier = ProgrammingBayesianClassifier() 126 | result = classifier.classify('echo "Hello World";') 127 | 128 | self.assertEqual('echo "Hello World";', SimpleBayesStub.data_string) 129 | self.assertEqual('FooBar', result) 130 | 131 | def test_tokenizerProducesExpectedList(self): 132 | result = ProgrammingBayesianClassifier.bayes_tokenizer('Hello World') 133 | self.assertEqual(2, len(result)) 134 | -------------------------------------------------------------------------------- /tests/parsers/programming/lexer.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 25 | from cahoots.parsers.programming.lexer import \ 26 | ProgrammingLexerThread, \ 27 | ProgrammingLexer 28 | from pygments.lexers.web import PhpLexer 29 | import unittest 30 | 31 | 32 | class ProgrammingLexerThreadTests(unittest.TestCase): 33 | 34 | def test_phpLexerDetectsExpectedTokens(self): 35 | data_string = "echo 'Hello World';" 36 | lexer = ProgrammingLexerThread( 37 | 'php', 38 | PhpLexer(startinline=True), 39 | data_string 40 | ) 41 | 42 | lexer.start() 43 | lexer.join() 44 | 45 | self.assertEqual(3, lexer.result) 46 | 47 | def test_phpLexerDetectsExpectedNoTokens(self): 48 | data_string = " " 49 | lexer = ProgrammingLexerThread( 50 | 'php', 51 | PhpLexer(startinline=True), 52 | data_string 53 | ) 54 | 55 | lexer.start() 56 | lexer.join() 57 | 58 | self.assertFalse(lexer.result) 59 | 60 | 61 | class ProgrammingLexerTests(unittest.TestCase): 62 | 63 | def test_phpIsDetectedUsingProgrammingLexer(self): 64 | data_string = "echo 'Hello World';" 65 | lexer = ProgrammingLexer(['php'], data_string) 66 | result = lexer.lex() 67 | 68 | self.assertEqual({'php': 3}, result) 69 | -------------------------------------------------------------------------------- /tests/parsers/uri.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 25 | from cahoots.parsers.uri import URIParser 26 | from tests.config import TestConfig 27 | import unittest 28 | 29 | 30 | class URIParserTests(unittest.TestCase): 31 | """Unit Testing of the URIParser""" 32 | 33 | up = None 34 | 35 | def setUp(self): 36 | self.up = URIParser(TestConfig()) 37 | 38 | def tearDown(self): 39 | self.up = None 40 | 41 | def test_is_ipv6_address(self): 42 | self.assertTrue(self.up.is_ipv6_address("2607:f0d0:1002:51::4")) 43 | self.assertFalse(self.up.is_ipv6_address("2607:f0z0:1002:51::4")) 44 | self.assertFalse(self.up.is_ipv6_address("Your mother was a hamster")) 45 | 46 | def test_is_ipv4_address(self): 47 | self.assertTrue(self.up.is_ipv4_address("192.168.15.1")) 48 | self.assertTrue(self.up.is_ipv4_address("8.8.8.8")) 49 | self.assertFalse(self.up.is_ipv4_address("1.800.123.4567")) 50 | self.assertFalse(self.up.is_ipv4_address("Your mother was a hamster")) 51 | 52 | def test_is_valid_url(self): 53 | self.assertTrue(self.up.is_valid_url("http://www.google.com/")) 54 | self.assertFalse(self.up.is_valid_url("http://www.go_ogle.com/")) 55 | self.assertFalse(self.up.is_valid_url("www.google.com/")) 56 | self.assertFalse(self.up.is_valid_url("Your mother was a hamster")) 57 | 58 | def test_parseWithLessThanFourCharactersYieldsNothing(self): 59 | count = 0 60 | for _ in self.up.parse('htt'): 61 | count += 1 62 | self.assertEqual(count, 0) 63 | 64 | def test_parseIPV4AddressReturnsExpectedConfidence(self): 65 | count = 0 66 | for result in self.up.parse('192.168.0.1'): 67 | self.assertEqual(result.confidence, 95) 68 | self.assertEqual(result.subtype, 'IP Address (v4)') 69 | count += 1 70 | self.assertEqual(count, 1) 71 | 72 | def test_parseIPV6AddressReturnsExpectedConfidence(self): 73 | count = 0 74 | for result in self.up.parse('2607:f0d0:1002:51::4'): 75 | self.assertEqual(result.confidence, 100) 76 | self.assertEqual(result.subtype, 'IP Address (v6)') 77 | count += 1 78 | self.assertEqual(count, 1) 79 | 80 | def test_parseURLReturnsExpectedConfidence(self): 81 | count = 0 82 | for result in self.up.parse('http://www.google.com/'): 83 | self.assertEqual(result.confidence, 100) 84 | self.assertEqual(result.subtype, 'URL') 85 | count += 1 86 | self.assertEqual(count, 1) 87 | 88 | def test_parseNonHTTPURLReturnsExpectedConfidence(self): 89 | count = 0 90 | for result in self.up.parse('www.google.com/'): 91 | self.assertEqual(result.confidence, 75) 92 | self.assertEqual(result.subtype, 'URL') 93 | count += 1 94 | self.assertEqual(count, 1) 95 | 96 | def test_parseWithStrangePossibilityReturnsExpectedConfidence(self): 97 | count = 0 98 | for result in self.up.parse('23.5234 -56.7286'): 99 | self.assertEqual(result.confidence, 45) 100 | self.assertEqual(result.subtype, 'IP Address (v4)') 101 | count += 1 102 | self.assertEqual(count, 1) 103 | -------------------------------------------------------------------------------- /tests/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """ 3 | The MIT License (MIT) 4 | 5 | Copyright (c) Serenity Software, LLC 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | """ 25 | # pylint: disable=unused-wildcard-import,wildcard-import,unused-import 26 | import os 27 | import sys 28 | sys.path.append(os.path.dirname(os.path.realpath(__file__))[:-11]) 29 | sys.path.append(os.path.dirname(os.path.realpath(__file__))) 30 | 31 | import cahoots # flake8: noqa 32 | import cahoots.confidence.normalizer # flake8: noqa 33 | import cahoots.confidence.normalizers.base # flake8: noqa 34 | import cahoots.confidence.normalizers.character # flake8: noqa 35 | import cahoots.confidence.normalizers.date # flake8: noqa 36 | import cahoots.confidence.normalizers.equation # flake8: noqa 37 | import cahoots.confidence.normalizers.number # flake8: noqa 38 | import cahoots.confidence.normalizers.phone # flake8: noqa 39 | import cahoots.config # flake8: noqa 40 | import cahoots.parsers.base # flake8: noqa 41 | import cahoots.parsers.boolean # flake8: noqa 42 | import cahoots.parsers.character # flake8: noqa 43 | import cahoots.parsers.date # flake8: noqa 44 | import cahoots.parsers.email # flake8: noqa 45 | import cahoots.parsers.equation # flake8: noqa 46 | import cahoots.parsers.location # flake8: noqa 47 | import cahoots.parsers.location.address # flake8: noqa 48 | import cahoots.parsers.location.coordinate # flake8: noqa 49 | import cahoots.parsers.location.landmark # flake8: noqa 50 | import cahoots.parsers.location.postalcode # flake8: noqa 51 | import cahoots.parsers.measurement # flake8: noqa 52 | import cahoots.parsers.name # flake8: noqa 53 | import cahoots.parsers.number # flake8: noqa 54 | import cahoots.parsers.phone # flake8: noqa 55 | import cahoots.parsers.programming # flake8: noqa 56 | import cahoots.parsers.programming.bayesian # flake8: noqa 57 | import cahoots.parsers.programming.lexer # flake8: noqa 58 | import cahoots.parsers.uri # flake8: noqa 59 | import cahoots.data # flake8: noqa 60 | import cahoots.parser # flake8: noqa 61 | import cahoots.result # flake8: noqa 62 | import cahoots.util # flake8: noqa 63 | 64 | from tests.confidence.normalizer import * # flake8: noqa 65 | from tests.confidence.normalizers.base import * # flake8: noqa 66 | from tests.confidence.normalizers.character import * # flake8: noqa 67 | from tests.confidence.normalizers.date import * # flake8: noqa 68 | from tests.confidence.normalizers.equation import * # flake8: noqa 69 | from tests.confidence.normalizers.number import * # flake8: noqa 70 | from tests.confidence.normalizers.phone import * # flake8: noqa 71 | from tests.parsers.base import * # flake8: noqa 72 | from tests.parsers.boolean import * # flake8: noqa 73 | from tests.parsers.character import * # flake8: noqa 74 | from tests.parsers.date import * # flake8: noqa 75 | from tests.parsers.email import * # flake8: noqa 76 | from tests.parsers.equation import * # flake8: noqa 77 | from tests.parsers.location import * # flake8: noqa 78 | from tests.parsers.location.address import * # flake8: noqa 79 | from tests.parsers.location.coordinate import * # flake8: noqa 80 | from tests.parsers.location.landmark import * # flake8: noqa 81 | from tests.parsers.location.postalcode import * # flake8: noqa 82 | from tests.parsers.measurement import * # flake8: noqa 83 | from tests.parsers.name import * # flake8: noqa 84 | from tests.parsers.number import * # flake8: noqa 85 | from tests.parsers.phone import * # flake8: noqa 86 | from tests.parsers.programming import * # flake8: noqa 87 | from tests.parsers.programming.bayesian import * # flake8: noqa 88 | from tests.parsers.programming.lexer import * # flake8: noqa 89 | from tests.parsers.uri import * # flake8: noqa 90 | from tests.data import * # flake8: noqa 91 | from tests.parser import * # flake8: noqa 92 | from tests.util import * # flake8: noqa 93 | -------------------------------------------------------------------------------- /tests/util.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) Serenity Software, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | # pylint: disable=invalid-name,too-many-public-methods,missing-docstring 25 | from cahoots.util import truncate_text, is_number, strings_intersect 26 | import unittest 27 | 28 | 29 | class TruncateTextTests(unittest.TestCase): 30 | 31 | testString = 'The quick brown fox jumps over the lazy dog' 32 | 33 | def test_too_short_string(self): 34 | self.assertEquals(truncate_text(self.testString), self.testString) 35 | 36 | def test_short_limit(self): 37 | self.assertEquals(truncate_text(self.testString, 10), 'The qui...') 38 | 39 | def test_too_long_string(self): 40 | testString = 'Lorem ipsum dolor sit amet, consectetur adipiscing' \ 41 | ' elit. Suspendisse non risus risus amet.' 42 | truncatedTestString = 'Lorem ipsum dolor sit amet, consectetur' \ 43 | ' adipiscing elit. Suspendisse non risu...' 44 | 45 | self.assertEquals(truncate_text(testString), truncatedTestString) 46 | 47 | 48 | class IsNumberTests(unittest.TestCase): 49 | 50 | def test_is_number(self): 51 | 52 | self.assertTrue(is_number("123.123")) 53 | self.assertTrue(is_number("123")) 54 | 55 | self.assertFalse(is_number("7 divided by 2")) 56 | 57 | 58 | class StringsIntersectTests(unittest.TestCase): 59 | 60 | def test_strings_intersect(self): 61 | 62 | self.assertFalse(strings_intersect("abc", "def")) 63 | 64 | self.assertTrue(strings_intersect("abc", "cde")) 65 | --------------------------------------------------------------------------------