├── .gitignore ├── MANIFEST.in ├── requirements.txt ├── setup.py ├── LICENSE ├── README.rst └── spotlight ├── tests.py └── __init__.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst LICENSE 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Testing also requires nose>=1.2.1 2 | requests==1.2.3 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | from setuptools import setup 4 | from setuptools import find_packages 5 | 6 | 7 | classifiers = [ 8 | "Intended Audience :: Developers", 9 | "Programming Language :: Python", 10 | "Operating System :: OS Independent", 11 | "Topic :: Software Development :: Libraries", 12 | "Environment :: Web Environment", 13 | "License :: OSI Approved :: BSD License", 14 | "Development Status :: 5 - Production/Stable", 15 | ] 16 | 17 | requires = ["requests==1.2.3", ] 18 | 19 | # This might not be the best idea. 20 | try: 21 | import json 22 | except ImportError: 23 | requires.append('simplejson>=2.0') 24 | 25 | 26 | # Python 2.6 does not ship with an OrderedDict implementation. 27 | # God save the cheeseshop! 28 | try: 29 | from collections import OrderedDict 30 | except ImportError: 31 | requires.append('ordereddict>=1.1') 32 | 33 | 34 | setup(name='pyspotlight', 35 | version='0.6.5.2', 36 | license='BSD', 37 | url='https://github.com/newsgrape/pyspotlight', 38 | packages=find_packages(), 39 | description='Python interface to the DBPedia Spotlight REST API', 40 | long_description=open('README.rst').read(), 41 | keywords="dbpedia spotlight semantic", 42 | classifiers=classifiers, 43 | install_requires=requires, 44 | ) 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013, Luis Nell 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | Redistributions in binary form must reproduce the above copyright notice, this 11 | list of conditions and the following disclaimer in the documentation and/or 12 | other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 18 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | 2 | 3 | Maintenance of this repository has stopped. You can find a new, maintained, version at https://github.com/aolieman/pyspotlight 4 | 5 | 6 | =========== 7 | pyspotlight 8 | =========== 9 | 10 | is a thin python wrapper around `DBpedia Spotlight`_'s `REST Interface`_. 11 | 12 | The tested DBpedia Spotlight versions are 0.5 and 0.6.5, though it seems to also work with 0.7 as confirmed by some users. 13 | As long as there are no major API overhauls, this wrapper might also 14 | work with future versions. If you encounter a bug with a newer DBpedia version, 15 | feel free to create an issue here on github. 16 | 17 | Note that we're trying to track DBpedia Spotlight release version numbers, so you can 18 | easily see which pyspotlight version has been tested with which Spotlight 19 | release. Therefore all pyspotlight 0.5 releases are tested against 20 | Spotlight 0.5 etc. 21 | 22 | .. _`DBpedia Spotlight`: https://github.com/dbpedia-spotlight/dbpedia-spotlight#dbpedia-spotlight 23 | .. _`REST Interface`: https://github.com/dbpedia-spotlight/dbpedia-spotlight/wiki/Web-service 24 | 25 | Installation 26 | ============ 27 | 28 | The newest stable release can be found on the `Python Package Index (PyPi) `__. 29 | 30 | Therefore installation is as easy as:: 31 | 32 | pip install pyspotlight 33 | 34 | Requirements for installation from source/github 35 | ================================================ 36 | 37 | This module has been tested with Python 2.6 and Python 2.7. 38 | 39 | As long as you use the ``setup.py`` for the installation 40 | (``python setup.py install``), you'll be fine because Python takes care of the 41 | dependencies for you. 42 | 43 | If you decide not to use the ``setup.py`` you will need the ``requests`` 44 | library. In case you are running a Python Version older than 2.7, you will 45 | also need to install the ``ordereddict`` module. 46 | 47 | All of these packages can be found on the `Python PackageIndex`_ and easily 48 | installed via either ``easy_install`` or, `the recommended`_, ``pip``. 49 | 50 | Using ``pip`` it is especially easy because you can just do this:: 51 | 52 | pip install -r requirements.txt 53 | 54 | and it will install all packages from that file. 55 | 56 | .. _`Python PackageIndex`: http://pypi.python.org/ 57 | .. _`the recommended`: http://stackoverflow.com/questions/3220404/why-use-pip-over-easy-install 58 | 59 | Usage 60 | ===== 61 | 62 | if you just want to play around with spotlight, there is a running version 63 | available under ``http://spotlight.sztaki.hu:LANG_PORT/rest/annotate``, where ``LANG_PORT`` is one of the following depending on the language you want to annotate (thx to @robert-boulanger in Issue #10):: 64 | 65 | LANG_PORTS = { 66 | "english": '2222', 67 | "german": '2226', 68 | "dutch": '2232', 69 | "hungarian": '2229', 70 | "french": '2225', 71 | "portuguese": '2228', 72 | "italian": '2230', 73 | "russian": '2227', 74 | "turkish": '2235', 75 | "spanish": '2231' 76 | } 77 | 78 | (Also the public server doesn't like the ``LingPipeSpotter``, which is used by *pyspotlight* by default. To work around this, simply pass ``spotter='Default'`` to the ``annotate()`` call) 79 | 80 | Usage is simple and easy, just as is the API:: 81 | 82 | >>> import spotlight 83 | >>> annotations = spotlight.annotate('http://localhost/rest/annotate', 84 | ... 'Your test text', 85 | ... confidence=0.4, support=20) 86 | 87 | This should return a list of all resources found within the given text. 88 | Assuming we did this for the following text:: 89 | 90 | President Obama on Monday will call for a new minimum tax rate for individuals making more than $1 million a year to ensure that they pay at least the same percentage of their earnings as other taxpayers, according to administration officials. 91 | 92 | We might get this back:: 93 | 94 | >>> annotation 95 | [{u'URI': u'http://dbpedia.org/resource/Presidency_of_Barack_Obama', 96 | u'offset': 0, 97 | u'percentageOfSecondRank': -1.0, 98 | u'similarityScore': 0.10031112283468246, 99 | u'support': 134, 100 | u'surfaceForm': u'President Obama', 101 | u'types': u'DBpedia:OfficeHolder,DBpedia:Person,Schema:Person,Freebase:/book/book_subject,Freebase:/book,Freebase:/book/periodical_subject,Freebase:/media_common/quotation_subject,Freebase:/media_common'},…(truncated remaining elements)…] 102 | 103 | The same parameters apply to the ``spotlight.candidates`` function. 104 | 105 | The following exceptions can occur: 106 | 107 | * ``ValueError`` when: 108 | 109 | - the JSON response could not be decoded. 110 | 111 | * ``SpotlightException`` when: 112 | 113 | - the JSON response did not contain any needed fields or was not formed as 114 | excepted. 115 | - You forgot to explicitly specify a protocol (http/https) in the API URL. 116 | 117 | Usually the exception's message is telling you *exactly* what is wrong. If 118 | not, I might have forgotten some error handling. So just open up an issue on 119 | github. 120 | 121 | * ``requests.exceptions.HTTPError`` 122 | 123 | Is thrown when the response http status code was *not* ``200``. This could happen 124 | if you have a load balancer like nginx in front of your spotlight cluster and 125 | there is not a single server available, so nginx throws a ``502 Bad Gateway``. 126 | 127 | 128 | Note that the API also supports a ``disambiguate`` interface, however I wasn't 129 | able to get it running. Therefore there is *no* ``disambiguate`` function 130 | available. Feel free to contribute :-)! 131 | 132 | Tips 133 | ==== 134 | 135 | I'd highly recommend playing around with the *confidence* and *support* values. 136 | Furthermore it might be preferable to filter out more annotations by looking 137 | at their *smiliarityScore* (read: contextual score). 138 | 139 | If you want to change the default values, feel free to use ``itertools.partial`` 140 | to create a little wrapper with simplified signature:: 141 | 142 | >>> from spotlight import annotate 143 | >>> from functools import partial 144 | >>> api = partial(annotate, 'http://localhost/rest/annotate', 145 | ... confidence=0.4, support=20, 146 | ... spotter='AtLeastOneNounSelector') 147 | >>> api('This is your test text. This function has other confidence, 148 | ... support and uses another spotter. Furthermore all calls go 149 | ... directl to localhost/rest/annotate.') 150 | 151 | As you can see this reduces the function's complexity greatly. 152 | I did not feel the need to create fancy classes, they would've just lead to 153 | more complexity. 154 | 155 | Tests 156 | ===== 157 | 158 | If you want to run the tests, you will have to install ``nose`` (1.2.1) from the 159 | package index. Then you can simply run ``nosetests`` from the command line in 160 | this or the ``spotlight/`` directory. 161 | 162 | Bugs 163 | ==== 164 | 165 | In case you spot a bug, please open an issue and attach the raw response you 166 | sent. Have a look at `Issue #3`_ for a great example on how to file a bug report. 167 | 168 | .. _`Issue #3`: https://github.com/newsgrape/pyspotlight/issues/3 169 | -------------------------------------------------------------------------------- /spotlight/tests.py: -------------------------------------------------------------------------------- 1 | SKIP_ORDERED_DICT_TESTS = False 2 | try: 3 | from collections import OrderedDict 4 | except ImportError: 5 | SKIP_ORDERED_DICT_TESTS = True 6 | import sys 7 | sys.stderr.write('Skipping _dict_cleanup due to OrderedDict not being ' 8 | 'available.\n') 9 | 10 | from collections import namedtuple 11 | from nose.tools import eq_, nottest, raises 12 | 13 | import spotlight 14 | 15 | 16 | @nottest 17 | def fake_request_post(self, *args, **kwargs): 18 | RawResponse = namedtuple('RawResponse', ['reason', ]) 19 | hear_me_RawR = RawResponse(reason='Just a fake reason.') 20 | 21 | class FakeResponse(spotlight.requests.models.Response): 22 | content = kwargs['headers']['fake_response'] 23 | 24 | def raise_for_status(self): 25 | self.raw = hear_me_RawR 26 | self.status_code = (kwargs['headers']['fake_status'] 27 | if 'fake_status' in kwargs['headers'] 28 | else spotlight.requests.codes.ok) 29 | return super(FakeResponse, self).raise_for_status() 30 | return FakeResponse() 31 | spotlight.requests.post = fake_request_post 32 | 33 | 34 | def test_number_convert(): 35 | eq_(spotlight._convert_number('0'), 0) 36 | eq_(spotlight._convert_number('0.2'), 0.2) 37 | eq_(spotlight._convert_number(True), True) 38 | eq_(spotlight._convert_number('evi'), 'evi') 39 | # Testing the footnote workaround. 40 | eq_(spotlight._convert_number([1]), '[1]') 41 | 42 | 43 | @raises(spotlight.SpotlightException) 44 | def test_protocol_missing(): 45 | spotlight.annotate('localhost', 'asdasdasd', 46 | headers={'fake_response': 'invalid json', 47 | 'fake_status': 502}) 48 | 49 | 50 | @raises(spotlight.requests.exceptions.HTTPError) 51 | def test_http_fail(): 52 | spotlight.annotate('http://localhost', 'asdasdasd', 53 | headers={'fake_response': 'invalid json', 54 | 'fake_status': 502}) 55 | 56 | 57 | @raises(ValueError) 58 | def test_annotation_invalid_json(): 59 | spotlight.annotate('http://localhost', 'asdasdasd', 60 | headers={'fake_response': 'invalid json'}) 61 | 62 | 63 | @raises(spotlight.SpotlightException) 64 | def test_missing_resources(): 65 | spotlight.annotate('http://localhost', 'asdasdasd', 66 | headers={'fake_response': '{"Test": "Win"}'}) 67 | 68 | 69 | @raises(ValueError) 70 | def test_candidates_invalid_json(): 71 | spotlight.annotate('http://localhost', 'asdasdasd', 72 | headers={'fake_response': 'invalid json'}) 73 | 74 | 75 | @raises(spotlight.SpotlightException) 76 | def test_missing_annotation(): 77 | spotlight.candidates('http://localhost', 'asdasdasd', 78 | headers={'fake_response': '{"Test": "Win"}'}) 79 | 80 | 81 | @raises(spotlight.SpotlightException) 82 | def test_missing_surfaceForms(): 83 | spotlight.candidates('http://localhost', 'asdasdasd', 84 | headers={'fake_response': '{"annotation": {"Test": "Win"}}'}) 85 | 86 | 87 | def test_single_candidate(): 88 | # Test with a single returned candidate, as was reported by issue #3. 89 | # Thanks to aolieman for the awesome test data! 90 | data = """ 91 | { 92 | "annotation":{ 93 | "@text":"Industrial Design at the Technische Universiteit Delft", 94 | "surfaceForm":{ 95 | "@name":"Technische Universiteit Delft", 96 | "@offset":"25", 97 | "resource":[ 98 | { 99 | "@label":"Technische Universiteit Delft", 100 | "@uri":"Technische_Universiteit_Delft", 101 | "@contextualScore":"0.9991813164782087", 102 | "@percentageOfSecondRank":"0.1422872887244497", 103 | "@support":"3", 104 | "@priorScore":"2.8799662606192636E-8", 105 | "@finalScore":"0.8754365122251001", 106 | "@types":"" 107 | }, 108 | { 109 | "@label":"Delft University of Technology", 110 | "@uri":"Delft_University_of_Technology", 111 | "@contextualScore":"8.186418452925803E-4", 112 | "@percentageOfSecondRank":"0.0", 113 | "@support":"521", 114 | "@priorScore":"5.001541405942121E-6", 115 | "@finalScore":"0.12456348777489806", 116 | "@types":"DBpedia:Agent, Schema:Organization, DBpedia:Organisation, Schema:EducationalOrganization, DBpedia:EducationalInstitution, Schema:CollegeOrUniversity, DBpedia:University" 117 | } 118 | ] 119 | } 120 | } 121 | } 122 | """ 123 | candidates = spotlight.candidates('http://localhost', 'asdasdasd', 124 | headers={'fake_response': data}) 125 | expected_out = [ 126 | {u'resource': 127 | [ 128 | { 129 | u'finalScore': 0.8754365122251001, 130 | u'support': 3, 131 | u'uri': u'Technische_Universiteit_Delft', 132 | u'label': u'Technische Universiteit Delft', 133 | u'types': u'', 134 | u'percentageOfSecondRank': 0.1422872887244497, 135 | u'priorScore': 2.8799662606192636e-08, 136 | u'contextualScore': 0.9991813164782087 137 | }, 138 | { 139 | u'finalScore': 0.12456348777489806, 140 | u'support': 521, 141 | u'uri': u'Delft_University_of_Technology', 142 | u'label': u'Delft University of Technology', 143 | u'types': u'DBpedia:Agent, Schema:Organization, DBpedia:Organisation, Schema:EducationalOrganization, DBpedia:EducationalInstitution, Schema:CollegeOrUniversity, DBpedia:University', 144 | u'percentageOfSecondRank': 0.0, 145 | u'priorScore': 5.001541405942121e-06, 146 | u'contextualScore': 0.0008186418452925803 147 | }, 148 | ], 149 | u'name': u'Technische Universiteit Delft', 150 | u'offset': 25 151 | } 152 | ] 153 | eq_(candidates, expected_out) 154 | 155 | 156 | if not SKIP_ORDERED_DICT_TESTS: 157 | def test_dict_key_cleanup(): 158 | dirty_dict = OrderedDict() 159 | dirty_dict['@dirty'] = 'value' 160 | dirty_dict['@empty'] = None # None values should be removed. 161 | dirty_dict['@recursive'] = OrderedDict() 162 | dirty_dict['@recursive']['tests'] = '1' 163 | dirty_dict['@recursive']['stuff'] = OrderedDict() 164 | more = OrderedDict() 165 | more['something'] = 'isgoingon' 166 | moremore = OrderedDict() 167 | moremore['@moar'] = True 168 | moar_iterable = [more, moremore] 169 | dirty_dict['@recursive']['stuff'] = moar_iterable 170 | 171 | clean_dict = OrderedDict() 172 | clean_dict['dirty'] = 'value' 173 | clean_dict['recursive'] = OrderedDict() 174 | clean_dict['recursive']['tests'] = 1 175 | clean_dict['recursive']['stuff'] = OrderedDict() 176 | more = OrderedDict() 177 | more['something'] = 'isgoingon' 178 | moremore = OrderedDict() 179 | moremore['moar'] = True 180 | moar_iterable = [more, moremore] 181 | clean_dict['recursive']['stuff'] = moar_iterable 182 | eq_(spotlight._dict_cleanup(dirty_dict, dict_type=OrderedDict), 183 | clean_dict) 184 | -------------------------------------------------------------------------------- /spotlight/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Python DBpedia Spotlight API Wrapper 3 | ==================================== 4 | 5 | This is just a simple interface to a Spotlight API. 6 | 7 | Tested with DBPedia Spotlight 0.5 and 0.6.5. 8 | 9 | Note that I'm trying to track Spotlight release version numbers, so you can 10 | easily see which pyspotlight version has been tested with which Spotlight 11 | release. 12 | 13 | I hope the code and the small documentation speaks for itself :-) 14 | 15 | If you should encounter any problems, feel free to contact me on github 16 | (originell). I'm happy to help out with anything related to my code. 17 | """ 18 | __version_info__ = (0, 6, 5) 19 | __version__ = '.'.join(map(str, __version_info__)) 20 | 21 | 22 | import requests 23 | 24 | 25 | class SpotlightException(Exception): 26 | """ 27 | Exception raised on Spotlight failures. 28 | 29 | Basically this exception is raised if there was no valid JSON response 30 | from Spotlight. 31 | """ 32 | pass 33 | 34 | 35 | # Some helper functions. 36 | def _convert_number(value): 37 | """ 38 | Try to convert a string to an int or float. 39 | """ 40 | if isinstance(value, bool): 41 | return value 42 | # Workaround for footnotes being put into Resources.surfaceForm and then 43 | # having them parsed by the JSON parser into a list. (issue #4) 44 | if isinstance(value, list): 45 | value = unicode(value) 46 | 47 | try: 48 | return int(value) 49 | except ValueError: 50 | try: 51 | return float(value) 52 | except ValueError: 53 | return value 54 | 55 | 56 | def _dict_cleanup(dic, dict_type=dict): 57 | """ 58 | Clean the response dictionary from ugly @ signs in keys. 59 | 60 | TODO: Make this an iteration based recursion instead of function based. 61 | That way we can avoid stack fails. 62 | """ 63 | clean = dict_type() 64 | for key, value in dic.iteritems(): 65 | if value is None: 66 | continue 67 | 68 | key = key.replace('@', '') 69 | try: 70 | try: 71 | # If this is a string or bool, 72 | # go straight to type conversion. 73 | if (isinstance(value, basestring) or 74 | isinstance(value, bool)): 75 | raise AttributeError 76 | # Test for an iterable (list, tuple, set) 77 | value[0] 78 | # Clean up each element in the iterable 79 | clean[key] = [_dict_cleanup(element, dict_type) 80 | for element in value] 81 | except KeyError: 82 | clean[key] = _dict_cleanup(value, dict_type) 83 | except AttributeError: 84 | clean[key] = _convert_number(value) 85 | return clean 86 | 87 | 88 | # Main functions. 89 | # 90 | # I was inspired to go back to a function based approach after seeing this 91 | # awesome talk by Jack Diederich: Stop Writing Classes 92 | # http://pyvideo.org/video/880/stop-writing-classes 93 | # Most of the class-based approach had the problems he described. 94 | # Embarrassing! 95 | def annotate(address, text, confidence=0.0, support=0, 96 | spotter='LingPipeSpotter', disambiguator='Default', 97 | policy='whitelist', headers=None): 98 | """ 99 | Annotate a text. 100 | 101 | Can raise :exc:`requests.exceptions.HTTPError` or 102 | :exc:`SpotlightException`, depending on where the failure is (HTTP status 103 | code not 200 or the response not containing valid json). 104 | 105 | :param address: 106 | The absolute address of the annotate REST API. 107 | :type address: string 108 | 109 | :param text: 110 | The text to be sent. 111 | :type text: string 112 | 113 | :param confidence: 114 | Filter out annotations below a given confidence. 115 | Based on my experience I would suggest you set this to something 116 | above 0.4, however your experience might vary from text to text. 117 | :type confidence: float 118 | 119 | :param support: 120 | Only output annotations above a given prominence (support). 121 | Based on my experience I would suggest you set this to something 122 | above 20, however your experience might vary from text to text. 123 | :type support: int 124 | 125 | :param spotter: 126 | One of spotters available on your DBPedia Spotlight server. 127 | For example one of: LingPipeSpotter, AtLeastOneNounSelector, 128 | CoOccurrenceBasedSelector 129 | :type spotter: string 130 | 131 | :param disambiguator: 132 | The disambiguator to use on the annotation. 133 | :type disambiguator: string 134 | 135 | :param policy: 136 | The policy to be used. 137 | :type disambiguator: string 138 | 139 | :param headers: 140 | Additional headers to be set on the request. 141 | :type headers: dictionary 142 | 143 | :rtype: list of resources 144 | """ 145 | payload = {'confidence': confidence, 'support': support, 146 | 'spotter': spotter, 'disambiguator': disambiguator, 147 | 'policy': policy, 'text': text} 148 | reqheaders = {'accept': 'application/json'} 149 | reqheaders.update(headers or {}) 150 | 151 | # Its better for the user to have to explicitly provide a protocl in the 152 | # URL, since transmissions might happen over HTTPS or any other secure or 153 | # faster (spdy :D) channel. 154 | if not '://' in address: 155 | raise SpotlightException('Oops. Looks like you forgot the protocol ' 156 | '(http/https) in your url (%s).' % address) 157 | 158 | response = requests.post(address, data=payload, headers=reqheaders) 159 | if response.status_code != requests.codes.ok: 160 | # Every http code besides 200 shall raise an exception. 161 | response.raise_for_status() 162 | 163 | pydict = response.json() 164 | if pydict is None: 165 | raise SpotlightException("Spotlight's response did not contain valid " 166 | "JSON: %s" % response.text) 167 | 168 | if not 'Resources' in pydict: 169 | raise SpotlightException( 170 | 'No Resources found in spotlight response: %s' % pydict) 171 | 172 | return [_dict_cleanup(resource) for resource in pydict['Resources']] 173 | 174 | 175 | # This is more or less a duplicate of the annotate function, with just 176 | # the return line being the difference haha. 177 | def candidates(address, text, confidence=0.0, support=0, 178 | spotter='LingPipeSpotter', disambiguator='Default', 179 | policy='whitelist', headers=None): 180 | """ 181 | Get the candidates from a text. 182 | 183 | Uses the same arguments as :meth:`annotate`. 184 | 185 | :rtype: list of surface forms 186 | """ 187 | payload = {'confidence': confidence, 'support': support, 188 | 'spotter': spotter, 'disambiguator': disambiguator, 189 | 'policy': policy, 'text': text} 190 | reqheaders = {'accept': 'application/json'} 191 | reqheaders.update(headers or {}) 192 | response = requests.post(address, data=payload, headers=reqheaders) 193 | if response.status_code != requests.codes.ok: 194 | # Every http code besides 200 shall raise an exception. 195 | response.raise_for_status() 196 | 197 | pydict = response.json() 198 | if not 'annotation' in pydict: 199 | raise SpotlightException( 200 | 'No annotations found in spotlight response: %s' % pydict) 201 | if not 'surfaceForm' in pydict['annotation']: 202 | raise SpotlightException( 203 | 'No surface forms found in spotlight response: %s' % pydict) 204 | 205 | # Previously we assumed that the surfaceForm is *always* a list, however 206 | # depending on how many are returned, this does not have to be the case. 207 | # So we are doing some good ol' duck typing here. 208 | try: 209 | pydict['annotation']['surfaceForm'][0] 210 | except KeyError: 211 | # However note that we will *always* return a list. 212 | return [_dict_cleanup(pydict['annotation']['surfaceForm']), ] 213 | return [_dict_cleanup(form) 214 | for form in pydict['annotation']['surfaceForm']] 215 | --------------------------------------------------------------------------------