├── .gitignore
├── MANIFEST.in
├── requirements.txt
├── setup.py
├── LICENSE
├── README.rst
└── spotlight
    ├── tests.py
    └── __init__.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.rst LICENSE
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # Testing also requires nose>=1.2.1
2 | requests==1.2.3
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | from setuptools import setup
 4 | from setuptools import find_packages
 5 | 
 6 | 
 7 | classifiers = [
 8 |     "Intended Audience :: Developers",
 9 |     "Programming Language :: Python",
10 |     "Operating System :: OS Independent",
11 |     "Topic :: Software Development :: Libraries",
12 |     "Environment :: Web Environment",
13 |     "License :: OSI Approved :: BSD License",
14 |     "Development Status :: 5 - Production/Stable",
15 | ]
16 | 
17 | requires = ["requests==1.2.3", ]
18 | 
19 | # This might not be the best idea.
20 | try:
21 |     import json
22 | except ImportError:
23 |     requires.append('simplejson>=2.0')
24 | 
25 | 
26 | # Python 2.6 does not ship with an OrderedDict implementation.
27 | # God save the cheeseshop!
28 | try:
29 |     from collections import OrderedDict
30 | except ImportError:
31 |     requires.append('ordereddict>=1.1')
32 | 
33 | 
34 | setup(name='pyspotlight',
35 |       version='0.6.5.2',
36 |       license='BSD',
37 |       url='https://github.com/newsgrape/pyspotlight',
38 |       packages=find_packages(),
39 |       description='Python interface to the DBPedia Spotlight REST API',
40 |       long_description=open('README.rst').read(),
41 |       keywords="dbpedia spotlight semantic",
42 |       classifiers=classifiers,
43 |       install_requires=requires,
44 | )
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013, Luis Nell
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification,
 5 | are permitted provided that the following conditions are met:
 6 | 
 7 |   Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 |   Redistributions in binary form must reproduce the above copyright notice, this
11 |   list of conditions and the following disclaimer in the documentation and/or
12 |   other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
18 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | Maintenance of this repository has stopped. You can find a new, maintained, version at https://github.com/aolieman/pyspotlight
  4 | 
  5 | 
  6 | ===========
  7 | pyspotlight
  8 | ===========
  9 | 
 10 | is a thin python wrapper around `DBpedia Spotlight`_'s `REST Interface`_.
 11 | 
 12 | The tested DBpedia Spotlight versions are 0.5 and 0.6.5, though it seems to also work with 0.7 as confirmed by some users.
 13 | As long as there are no major API overhauls, this wrapper might also
 14 | work with future versions. If you encounter a bug with a newer DBpedia version,
 15 | feel free to create an issue here on github.
 16 | 
 17 | Note that we're trying to track DBpedia Spotlight release version numbers, so you can
 18 | easily see which pyspotlight version has been tested with which Spotlight
 19 | release. Therefore all pyspotlight 0.5 releases are tested against
 20 | Spotlight 0.5 etc.
 21 | 
 22 | .. _`DBpedia Spotlight`: https://github.com/dbpedia-spotlight/dbpedia-spotlight#dbpedia-spotlight
 23 | .. _`REST Interface`: https://github.com/dbpedia-spotlight/dbpedia-spotlight/wiki/Web-service
 24 | 
 25 | Installation
 26 | ============
 27 | 
 28 | The newest stable release can be found on the `Python Package Index (PyPi) <https://pypi.python.org/pypi>`__.
 29 | 
 30 | Therefore installation is as easy as::
 31 | 
 32 |     pip install pyspotlight
 33 | 
 34 | Requirements for installation from source/github
 35 | ================================================
 36 | 
 37 | This module has been tested with Python 2.6 and Python 2.7.
 38 | 
 39 | As long as you use the ``setup.py`` for the installation
 40 | (``python setup.py install``), you'll be fine because Python takes care of the
 41 | dependencies for you.
 42 | 
 43 | If you decide not to use the ``setup.py`` you will need the ``requests``
 44 | library. In case you are running a Python Version older than 2.7, you will
 45 | also need to install the ``ordereddict`` module.
 46 | 
 47 | All of these packages can be found on the `Python PackageIndex`_ and easily
 48 | installed via either ``easy_install`` or, `the recommended`_, ``pip``.
 49 | 
 50 | Using ``pip`` it is especially easy because you can just do this::
 51 | 
 52 |     pip install -r requirements.txt
 53 | 
 54 | and it will install all packages from that file.
 55 | 
 56 | .. _`Python PackageIndex`: http://pypi.python.org/
 57 | .. _`the recommended`: http://stackoverflow.com/questions/3220404/why-use-pip-over-easy-install
 58 | 
 59 | Usage
 60 | =====
 61 | 
 62 | if you just want to play around with spotlight, there is a running version
 63 | available under ``http://spotlight.sztaki.hu:LANG_PORT/rest/annotate``, where ``LANG_PORT`` is one of the following depending on the language you want to annotate (thx to @robert-boulanger in Issue #10)::
 64 |     
 65 |     LANG_PORTS = {
 66 |         "english": '2222', 
 67 |         "german": '2226', 
 68 |         "dutch": '2232', 
 69 |         "hungarian": '2229', 
 70 |         "french": '2225', 
 71 |         "portuguese": '2228', 
 72 |         "italian": '2230', 
 73 |         "russian": '2227', 
 74 |         "turkish": '2235', 
 75 |         "spanish": '2231'
 76 |     }
 77 | 
 78 | (Also the public server doesn't like the ``LingPipeSpotter``, which is used by *pyspotlight* by default. To work around this, simply pass ``spotter='Default'`` to the ``annotate()`` call)
 79 | 
 80 | Usage is simple and easy, just as is the API::
 81 | 
 82 |     >>> import spotlight
 83 |     >>> annotations = spotlight.annotate('http://localhost/rest/annotate',
 84 |     ...                                  'Your test text',
 85 |     ...                                  confidence=0.4, support=20)
 86 | 
 87 | This should return a list of all resources found within the given text.
 88 | Assuming we did this for the following text::
 89 | 
 90 |     President Obama on Monday will call for a new minimum tax rate for individuals making more than $1 million a year to ensure that they pay at least the same percentage of their earnings as other taxpayers, according to administration officials.
 91 | 
 92 | We might get this back::
 93 | 
 94 |     >>> annotation
 95 |     [{u'URI': u'http://dbpedia.org/resource/Presidency_of_Barack_Obama',
 96 |       u'offset': 0,
 97 |       u'percentageOfSecondRank': -1.0,
 98 |       u'similarityScore': 0.10031112283468246,
 99 |       u'support': 134,
100 |       u'surfaceForm': u'President Obama',
101 |       u'types': u'DBpedia:OfficeHolder,DBpedia:Person,Schema:Person,Freebase:/book/book_subject,Freebase:/book,Freebase:/book/periodical_subject,Freebase:/media_common/quotation_subject,Freebase:/media_common'},…(truncated remaining elements)…]
102 | 
103 | The same parameters apply to the ``spotlight.candidates`` function.
104 | 
105 | The following exceptions can occur:
106 | 
107 | * ``ValueError`` when:
108 | 
109 |   - the JSON response could not be decoded.
110 | 
111 | * ``SpotlightException`` when:
112 | 
113 |   - the JSON response did not contain any needed fields or was not formed as
114 |     excepted.
115 |   - You forgot to explicitly specify a protocol (http/https) in the API URL.
116 | 
117 |   Usually the exception's message is telling you *exactly* what is wrong. If
118 |   not, I might have forgotten some error handling. So just open up an issue on
119 |   github.
120 | 
121 | * ``requests.exceptions.HTTPError``
122 | 
123 |   Is thrown when the response http status code was *not* ``200``. This could happen
124 |   if you have a load balancer like nginx in front of your spotlight cluster and
125 |   there is not a single server available, so nginx throws a ``502 Bad Gateway``.
126 | 
127 | 
128 | Note that the API also supports a ``disambiguate`` interface, however I wasn't
129 | able to get it running. Therefore there is *no* ``disambiguate`` function
130 | available. Feel free to contribute :-)!
131 | 
132 | Tips
133 | ====
134 | 
135 | I'd highly recommend playing around with the *confidence* and *support* values.
136 | Furthermore it might be preferable to filter out more annotations by looking
137 | at their *smiliarityScore* (read: contextual score).
138 | 
139 | If you want to change the default values, feel free to use ``itertools.partial``
140 | to create a little wrapper with simplified signature::
141 | 
142 |     >>> from spotlight import annotate
143 |     >>> from functools import partial
144 |     >>> api = partial(annotate, 'http://localhost/rest/annotate',
145 |     ...               confidence=0.4, support=20,
146 |     ...               spotter='AtLeastOneNounSelector')
147 |     >>> api('This is your test text. This function has other confidence,
148 |     ...      support and uses another spotter. Furthermore all calls go
149 |     ...      directl to localhost/rest/annotate.')
150 | 
151 | As you can see this reduces the function's complexity greatly.
152 | I did not feel the need to create fancy classes, they would've just lead to
153 | more complexity.
154 | 
155 | Tests
156 | =====
157 | 
158 | If you want to run the tests, you will have to install ``nose`` (1.2.1) from the
159 | package index. Then you can simply run ``nosetests`` from the command line in
160 | this or the ``spotlight/`` directory.
161 | 
162 | Bugs
163 | ====
164 | 
165 | In case you spot a bug, please open an issue and attach the raw response you
166 | sent. Have a look at `Issue #3`_ for a great example on how to file a bug report.
167 | 
168 | .. _`Issue #3`: https://github.com/newsgrape/pyspotlight/issues/3
169 | 


--------------------------------------------------------------------------------
/spotlight/tests.py:
--------------------------------------------------------------------------------
  1 | SKIP_ORDERED_DICT_TESTS = False
  2 | try:
  3 |     from collections import OrderedDict
  4 | except ImportError:
  5 |     SKIP_ORDERED_DICT_TESTS = True
  6 |     import sys
  7 |     sys.stderr.write('Skipping _dict_cleanup due to OrderedDict not being '
  8 |                      'available.\n')
  9 | 
 10 | from collections import namedtuple
 11 | from nose.tools import eq_, nottest, raises
 12 | 
 13 | import spotlight
 14 | 
 15 | 
 16 | @nottest
 17 | def fake_request_post(self, *args, **kwargs):
 18 |     RawResponse = namedtuple('RawResponse', ['reason', ])
 19 |     hear_me_RawR = RawResponse(reason='Just a fake reason.')
 20 | 
 21 |     class FakeResponse(spotlight.requests.models.Response):
 22 |         content = kwargs['headers']['fake_response']
 23 | 
 24 |         def raise_for_status(self):
 25 |             self.raw = hear_me_RawR
 26 |             self.status_code = (kwargs['headers']['fake_status']
 27 |                                 if 'fake_status' in kwargs['headers']
 28 |                                 else spotlight.requests.codes.ok)
 29 |             return super(FakeResponse, self).raise_for_status()
 30 |     return FakeResponse()
 31 | spotlight.requests.post = fake_request_post
 32 | 
 33 | 
 34 | def test_number_convert():
 35 |     eq_(spotlight._convert_number('0'), 0)
 36 |     eq_(spotlight._convert_number('0.2'), 0.2)
 37 |     eq_(spotlight._convert_number(True), True)
 38 |     eq_(spotlight._convert_number('evi'), 'evi')
 39 |     # Testing the footnote workaround.
 40 |     eq_(spotlight._convert_number([1]), '[1]')
 41 | 
 42 | 
 43 | @raises(spotlight.SpotlightException)
 44 | def test_protocol_missing():
 45 |     spotlight.annotate('localhost', 'asdasdasd',
 46 |                        headers={'fake_response': 'invalid json',
 47 |                                 'fake_status': 502})
 48 | 
 49 | 
 50 | @raises(spotlight.requests.exceptions.HTTPError)
 51 | def test_http_fail():
 52 |     spotlight.annotate('http://localhost', 'asdasdasd',
 53 |                        headers={'fake_response': 'invalid json',
 54 |                                 'fake_status': 502})
 55 | 
 56 | 
 57 | @raises(ValueError)
 58 | def test_annotation_invalid_json():
 59 |     spotlight.annotate('http://localhost', 'asdasdasd',
 60 |                        headers={'fake_response': 'invalid json'})
 61 | 
 62 | 
 63 | @raises(spotlight.SpotlightException)
 64 | def test_missing_resources():
 65 |     spotlight.annotate('http://localhost', 'asdasdasd',
 66 |             headers={'fake_response': '{"Test": "Win"}'})
 67 | 
 68 | 
 69 | @raises(ValueError)
 70 | def test_candidates_invalid_json():
 71 |     spotlight.annotate('http://localhost', 'asdasdasd',
 72 |                        headers={'fake_response': 'invalid json'})
 73 | 
 74 | 
 75 | @raises(spotlight.SpotlightException)
 76 | def test_missing_annotation():
 77 |     spotlight.candidates('http://localhost', 'asdasdasd',
 78 |             headers={'fake_response': '{"Test": "Win"}'})
 79 | 
 80 | 
 81 | @raises(spotlight.SpotlightException)
 82 | def test_missing_surfaceForms():
 83 |     spotlight.candidates('http://localhost', 'asdasdasd',
 84 |             headers={'fake_response': '{"annotation": {"Test": "Win"}}'})
 85 | 
 86 | 
 87 | def test_single_candidate():
 88 |     # Test with a single returned candidate, as was reported by issue #3.
 89 |     # Thanks to aolieman for the awesome test data!
 90 |     data = """
 91 | {
 92 |    "annotation":{
 93 |       "@text":"Industrial Design at the Technische Universiteit Delft",
 94 |       "surfaceForm":{
 95 |          "@name":"Technische Universiteit Delft",
 96 |          "@offset":"25",
 97 |          "resource":[
 98 |             {
 99 |                "@label":"Technische Universiteit Delft",
100 |                "@uri":"Technische_Universiteit_Delft",
101 |                "@contextualScore":"0.9991813164782087",
102 |                "@percentageOfSecondRank":"0.1422872887244497",
103 |                "@support":"3",
104 |                "@priorScore":"2.8799662606192636E-8",
105 |                "@finalScore":"0.8754365122251001",
106 |                "@types":""
107 |             },
108 |             {
109 |                "@label":"Delft University of Technology",
110 |                "@uri":"Delft_University_of_Technology",
111 |                "@contextualScore":"8.186418452925803E-4",
112 |                "@percentageOfSecondRank":"0.0",
113 |                "@support":"521",
114 |                "@priorScore":"5.001541405942121E-6",
115 |                "@finalScore":"0.12456348777489806",
116 |                "@types":"DBpedia:Agent, Schema:Organization, DBpedia:Organisation, Schema:EducationalOrganization, DBpedia:EducationalInstitution, Schema:CollegeOrUniversity, DBpedia:University"
117 |             }
118 |          ]
119 |       }
120 |    }
121 | }
122 |     """
123 |     candidates = spotlight.candidates('http://localhost', 'asdasdasd',
124 |                                       headers={'fake_response': data})
125 |     expected_out = [
126 |         {u'resource':
127 |             [
128 |                 {
129 |                     u'finalScore': 0.8754365122251001,
130 |                     u'support': 3,
131 |                     u'uri': u'Technische_Universiteit_Delft',
132 |                     u'label': u'Technische Universiteit Delft',
133 |                     u'types': u'',
134 |                     u'percentageOfSecondRank': 0.1422872887244497,
135 |                     u'priorScore': 2.8799662606192636e-08,
136 |                     u'contextualScore': 0.9991813164782087
137 |                 },
138 |                 {
139 |                     u'finalScore': 0.12456348777489806,
140 |                     u'support': 521,
141 |                     u'uri': u'Delft_University_of_Technology',
142 |                     u'label': u'Delft University of Technology',
143 |                     u'types': u'DBpedia:Agent, Schema:Organization, DBpedia:Organisation, Schema:EducationalOrganization, DBpedia:EducationalInstitution, Schema:CollegeOrUniversity, DBpedia:University',
144 |                     u'percentageOfSecondRank': 0.0,
145 |                     u'priorScore': 5.001541405942121e-06,
146 |                     u'contextualScore': 0.0008186418452925803
147 |                 },
148 |              ],
149 |          u'name': u'Technische Universiteit Delft',
150 |          u'offset': 25
151 |         }
152 |     ]
153 |     eq_(candidates, expected_out)
154 | 
155 | 
156 | if not SKIP_ORDERED_DICT_TESTS:
157 |     def test_dict_key_cleanup():
158 |         dirty_dict = OrderedDict()
159 |         dirty_dict['@dirty'] = 'value'
160 |         dirty_dict['@empty'] = None  # None values should be removed.
161 |         dirty_dict['@recursive'] = OrderedDict()
162 |         dirty_dict['@recursive']['tests'] = '1'
163 |         dirty_dict['@recursive']['stuff'] = OrderedDict()
164 |         more = OrderedDict()
165 |         more['something'] = 'isgoingon'
166 |         moremore = OrderedDict()
167 |         moremore['@moar'] = True
168 |         moar_iterable = [more, moremore]
169 |         dirty_dict['@recursive']['stuff'] = moar_iterable
170 | 
171 |         clean_dict = OrderedDict()
172 |         clean_dict['dirty'] = 'value'
173 |         clean_dict['recursive'] = OrderedDict()
174 |         clean_dict['recursive']['tests'] = 1
175 |         clean_dict['recursive']['stuff'] = OrderedDict()
176 |         more = OrderedDict()
177 |         more['something'] = 'isgoingon'
178 |         moremore = OrderedDict()
179 |         moremore['moar'] = True
180 |         moar_iterable = [more, moremore]
181 |         clean_dict['recursive']['stuff'] = moar_iterable
182 |         eq_(spotlight._dict_cleanup(dirty_dict, dict_type=OrderedDict),
183 |             clean_dict)
184 | 


--------------------------------------------------------------------------------
/spotlight/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Python DBpedia Spotlight API Wrapper
  3 | ====================================
  4 | 
  5 | This is just a simple interface to a Spotlight API.
  6 | 
  7 | Tested with DBPedia Spotlight 0.5 and 0.6.5.
  8 | 
  9 | Note that I'm trying to track Spotlight release version numbers, so you can
 10 | easily see which pyspotlight version has been tested with which Spotlight
 11 | release.
 12 | 
 13 | I hope the code and the small documentation speaks for itself :-)
 14 | 
 15 | If you should encounter any problems, feel free to contact me on github
 16 | (originell). I'm happy to help out with anything related to my code.
 17 | """
 18 | __version_info__ = (0, 6, 5)
 19 | __version__ = '.'.join(map(str, __version_info__))
 20 | 
 21 | 
 22 | import requests
 23 | 
 24 | 
 25 | class SpotlightException(Exception):
 26 |     """
 27 |     Exception raised on Spotlight failures.
 28 | 
 29 |     Basically this exception is raised if there was no valid JSON response
 30 |     from Spotlight.
 31 |     """
 32 |     pass
 33 | 
 34 | 
 35 | # Some helper functions.
 36 | def _convert_number(value):
 37 |     """
 38 |     Try to convert a string to an int or float.
 39 |     """
 40 |     if isinstance(value, bool):
 41 |         return value
 42 |     # Workaround for footnotes being put into Resources.surfaceForm and then
 43 |     # having them parsed by the JSON parser into a list. (issue #4)
 44 |     if isinstance(value, list):
 45 |         value = unicode(value)
 46 | 
 47 |     try:
 48 |         return int(value)
 49 |     except ValueError:
 50 |         try:
 51 |             return float(value)
 52 |         except ValueError:
 53 |             return value
 54 | 
 55 | 
 56 | def _dict_cleanup(dic, dict_type=dict):
 57 |     """
 58 |     Clean the response dictionary from ugly @ signs in keys.
 59 | 
 60 |     TODO: Make this an iteration based recursion instead of function based.
 61 |           That way we can avoid stack fails.
 62 |     """
 63 |     clean = dict_type()
 64 |     for key, value in dic.iteritems():
 65 |         if value is None:
 66 |             continue
 67 | 
 68 |         key = key.replace('@', '')
 69 |         try:
 70 |             try:
 71 |                 # If this is a string or bool,
 72 |                 # go straight to type conversion.
 73 |                 if (isinstance(value, basestring) or
 74 |                         isinstance(value, bool)):
 75 |                     raise AttributeError
 76 |                 # Test for an iterable (list, tuple, set)
 77 |                 value[0]
 78 |                 # Clean up each element in the iterable
 79 |                 clean[key] = [_dict_cleanup(element, dict_type)
 80 |                                 for element in value]
 81 |             except KeyError:
 82 |                 clean[key] = _dict_cleanup(value, dict_type)
 83 |         except AttributeError:
 84 |             clean[key] = _convert_number(value)
 85 |     return clean
 86 | 
 87 | 
 88 | # Main functions.
 89 | #
 90 | # I was inspired to go back to a function based approach after seeing this
 91 | # awesome talk by Jack Diederich: Stop Writing Classes
 92 | # http://pyvideo.org/video/880/stop-writing-classes
 93 | # Most of the class-based approach had the problems he described.
 94 | # Embarrassing!
 95 | def annotate(address, text, confidence=0.0, support=0,
 96 |              spotter='LingPipeSpotter', disambiguator='Default',
 97 |              policy='whitelist', headers=None):
 98 |     """
 99 |     Annotate a text.
100 | 
101 |     Can raise :exc:`requests.exceptions.HTTPError` or
102 |     :exc:`SpotlightException`, depending on where the failure is (HTTP status
103 |     code not 200 or the response not containing valid json).
104 | 
105 |     :param address:
106 |         The absolute address of the annotate REST API.
107 |     :type address: string
108 | 
109 |     :param text:
110 |         The text to be sent.
111 |     :type text: string
112 | 
113 |     :param confidence:
114 |         Filter out annotations below a given confidence.
115 |         Based on my experience I would suggest you set this to something
116 |         above 0.4, however your experience might vary from text to text.
117 |     :type confidence: float
118 | 
119 |     :param support:
120 |         Only output annotations above a given prominence (support).
121 |         Based on my experience I would suggest you set this to something
122 |         above 20, however your experience might vary from text to text.
123 |     :type support: int
124 | 
125 |     :param spotter:
126 |         One of spotters available on your DBPedia Spotlight server.
127 |         For example one of: LingPipeSpotter, AtLeastOneNounSelector,
128 |                             CoOccurrenceBasedSelector
129 |     :type spotter: string
130 | 
131 |     :param disambiguator:
132 |         The disambiguator to use on the annotation.
133 |     :type disambiguator: string
134 | 
135 |     :param policy:
136 |         The policy to be used.
137 |     :type disambiguator: string
138 | 
139 |     :param headers:
140 |         Additional headers to be set on the request.
141 |     :type headers: dictionary
142 | 
143 |     :rtype: list of resources
144 |     """
145 |     payload = {'confidence': confidence, 'support': support,
146 |                'spotter': spotter, 'disambiguator': disambiguator,
147 |                'policy': policy, 'text': text}
148 |     reqheaders = {'accept': 'application/json'}
149 |     reqheaders.update(headers or {})
150 | 
151 |     # Its better for the user to have to explicitly provide a protocl in the
152 |     # URL, since transmissions might happen over HTTPS or any other secure or
153 |     # faster (spdy :D) channel.
154 |     if not '://' in address:
155 |         raise SpotlightException('Oops. Looks like you forgot the protocol '
156 |                                  '(http/https) in your url (%s).' % address)
157 | 
158 |     response = requests.post(address, data=payload, headers=reqheaders)
159 |     if response.status_code != requests.codes.ok:
160 |         # Every http code besides 200 shall raise an exception.
161 |         response.raise_for_status()
162 | 
163 |     pydict = response.json()
164 |     if pydict is None:
165 |         raise SpotlightException("Spotlight's response did not contain valid "
166 |                                  "JSON: %s" % response.text)
167 | 
168 |     if not 'Resources' in pydict:
169 |         raise SpotlightException(
170 |                 'No Resources found in spotlight response: %s' % pydict)
171 | 
172 |     return [_dict_cleanup(resource) for resource in pydict['Resources']]
173 | 
174 | 
175 | # This is more or less a duplicate of the annotate function, with just
176 | # the return line being the difference haha.
177 | def candidates(address, text, confidence=0.0, support=0,
178 |              spotter='LingPipeSpotter', disambiguator='Default',
179 |              policy='whitelist', headers=None):
180 |     """
181 |     Get the candidates from a text.
182 | 
183 |     Uses the same arguments as :meth:`annotate`.
184 | 
185 |     :rtype: list of surface forms
186 |     """
187 |     payload = {'confidence': confidence, 'support': support,
188 |                'spotter': spotter, 'disambiguator': disambiguator,
189 |                'policy': policy, 'text': text}
190 |     reqheaders = {'accept': 'application/json'}
191 |     reqheaders.update(headers or {})
192 |     response = requests.post(address, data=payload, headers=reqheaders)
193 |     if response.status_code != requests.codes.ok:
194 |         # Every http code besides 200 shall raise an exception.
195 |         response.raise_for_status()
196 | 
197 |     pydict = response.json()
198 |     if not 'annotation' in pydict:
199 |         raise SpotlightException(
200 |                 'No annotations found in spotlight response: %s' % pydict)
201 |     if not 'surfaceForm' in pydict['annotation']:
202 |         raise SpotlightException(
203 |                 'No surface forms found in spotlight response: %s' % pydict)
204 | 
205 |     # Previously we assumed that the surfaceForm is *always* a list, however
206 |     # depending on how many are returned, this does not have to be the case.
207 |     # So we are doing some good ol' duck typing here.
208 |     try:
209 |         pydict['annotation']['surfaceForm'][0]
210 |     except KeyError:
211 |         # However note that we will *always* return a list.
212 |         return [_dict_cleanup(pydict['annotation']['surfaceForm']), ]
213 |     return [_dict_cleanup(form)
214 |             for form in pydict['annotation']['surfaceForm']]
215 | 


--------------------------------------------------------------------------------