├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── api_json.py
├── api_old.py
├── convert_tei_json_to_simple_json
    ├── __init__.py
    ├── book_chapter.py
    ├── book_line.py
    └── convert_all_perseus_xml.py
├── gunicorn_start.sh
├── metadata
    ├── __init__.py
    ├── commentary
    │   └── __init__.py
    ├── criticism
    │   ├── __init__.py
    │   └── criticism.py
    ├── definition
    │   ├── _init_.py
    │   └── views.py
    ├── entities
    │   ├── __init__.py
    │   ├── dbpedia.py
    │   ├── entity.py
    │   ├── pleiades.py
    │   ├── viaf.py
    │   └── wikipedia.py
    ├── media
    │   └── __init__.py
    ├── pos
    │   ├── __init__.py
    │   ├── constants.py
    │   └── views.py
    ├── prosody
    │   ├── __init__.py
    │   ├── scansion.py
    │   └── scansion_to_html.py
    ├── stem
    │   ├── __init__.py
    │   └── views.py
    ├── text_reuse
    │   └── __init__.py
    ├── tokenize
    │   └── __init__.py
    ├── translations
    │   ├── __init__.py
    │   └── map_translation.py
    └── vector
    │   └── __init__.py
├── perseus_parsing_notes.txt
├── requirements.txt
├── tests.py
└── util
    ├── __init__.py
    ├── jsonp.py
    ├── numerals.py
    └── text.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *\~
 2 | venv
 3 | 
 4 | # Byte-compiled / optimized / DLL files
 5 | __pycache__/
 6 | *.py[cod]
 7 | 
 8 | # C extensions
 9 | *.so
10 | 
11 | # Distribution / packaging
12 | .Python
13 | env/
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | 
29 | # PyInstaller
30 | #  Usually these files are written by a python script from a template
31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 | 
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 | 
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .coverage
43 | .coverage.*
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | *,cover
48 | 
49 | # Translations
50 | *.mo
51 | *.pot
52 | 
53 | # Django stuff:
54 | *.log
55 | 
56 | # Sphinx documentation
57 | docs/_build/
58 | 
59 | # PyBuilder
60 | target/
61 | 
62 | #########################
63 | # Emacs temporary files #
64 | #########################
65 | *~
66 | \#*\#
67 | /.emacs.desktop
68 | /.emacs.desktop.lock
69 | *.elc
70 | auto-save-list
71 | tramp
72 | .\#*
73 | 
74 | #################
75 | # Other Editors #
76 | #################
77 | *.sw[po]
78 | .idea/
79 | *.iml
80 | *.iws
81 | 
82 | ##########################
83 | # Temporary backup files #
84 | ##########################
85 | Backup of*.docx
86 | 
87 | ######################
88 | # OS generated files #
89 | ######################
90 | .DS_Store
91 | .DS_Store?
92 | ._*
93 | .Spotlight-V100
94 | .Trashes
95 | ehthumbs.db
96 | Thumbs.db
97 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: required
 2 | 
 3 | language: python
 4 | 
 5 | python:
 6 |   - "3.5"
 7 | 
 8 | before_script:
 9 |   - pip install --upgrade pip
10 |   - pip install -r requirements.txt
11 | 
12 | script:
13 |   # Notes on nose:
14 |   # Travis CI pre-installs `nose`
15 |   - nosetests
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Classical Language Toolkit
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Build Status](https://travis-ci.org/cltk/cltk_api.svg?branch=master)](https://travis-ci.org/cltk/cltk_api)
 2 | 
 3 | [![Join the chat at https://gitter.im/cltk/cltk_api](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/cltk/cltk_api?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 4 | 
 5 | # Notice
 6 | 
 7 | The Classics Archive application is currently under active development and is not ready for production.
 8 | 
 9 | # About
10 | 
11 | A simple Flask app for accessing corpora from the CLTK corpora.  Currently under development.
12 | 
13 | To run with gunicorn: `gunicorn -w 4 -b 0.0.0.0:5000 api_json:app`.
14 | 
15 | ## Development
16 | 
17 | To get started developing, you'll need Python3.5 and Mongo installed.
18 | 
19 | Create a virtual environment and activate it:
20 | 
21 | `$ pyvenv venv`
22 | `$ source venv/bin/activate`
23 | 
24 | Install dependencies:
25 | 
26 | `$ pip install -r requirements.txt`
27 | 
28 | Finally, start the app with the following command:
29 | 
30 | `$ python api_json.py`
31 | 


--------------------------------------------------------------------------------
/api_json.py:
--------------------------------------------------------------------------------
  1 | """Open JSON file and serve."""
  2 | 
  3 | import json
  4 | import os
  5 | 
  6 | from flask import Flask
  7 | from flask import request  # for getting query string
  8 | # eg: request.args.get('user') will get '?user=some-value'
  9 | from flask_restful import Resource, Api
 10 | from util.jsonp import jsonp
 11 | from metadata.pos.views import POSTagger
 12 | from metadata.stem.views import Stem
 13 | from metadata.definition.views import Definition
 14 | 
 15 | from flask_restful import reqparse
 16 | 
 17 | app = Flask(__name__)
 18 | api = Api(app)
 19 | 
 20 | 
 21 | # example
 22 | class HelloWorld(Resource):
 23 |     def get(self):
 24 |         return {'hello': 'world'}
 25 | 
 26 | 
 27 | # example
 28 | class TodoSimple(Resource):
 29 |     def get(self, todo_id):
 30 |         return {'example with token': todo_id}
 31 | 
 32 | 
 33 | def open_json(fp):
 34 |     """Open json file, return json."""
 35 |     with open(fp) as fo:
 36 |         return json.load(fo)
 37 | 
 38 | 
 39 | def get_cltk_text_dir(lang, corpus='perseus'):
 40 |     """Take relative filepath, return absolute"""
 41 |     cltk_home = os.path.expanduser('~/cltk_data')
 42 |     text_dir = os.path.join(cltk_home, lang.casefold(), 'text', lang.casefold() + '_text_' + corpus, 'json')
 43 |     return text_dir
 44 | 
 45 | def get_cltk_translation_dir(lang, translation_lang, corpus='perseus'):
 46 |     """Take relative filepath, return absolute"""
 47 |     cltk_home = os.path.expanduser('~/cltk_data')
 48 |     translation_dir = os.path.join(cltk_home, lang.casefold(), 'text', lang.casefold() + '_text_' + corpus, 'translation', translation_lang)
 49 |     return translation_dir
 50 | 
 51 | def get_cltk_commentary_dir(lang, corpus='perseus'):
 52 |     """Take relative filepath, return absolute"""
 53 |     cltk_home = os.path.expanduser('~/cltk_data')
 54 |     commentary_dir = os.path.join(cltk_home, lang.casefold(), 'text', lang.casefold() + '_text_' + corpus, 'commentary')
 55 |     return commentary_dir
 56 | 
 57 | class Text(Resource):
 58 | 
 59 |     def get(self, lang, corpus, author, work):
 60 | 
 61 |         parser = reqparse.RequestParser()
 62 |         parser.add_argument('translation')
 63 |         parser.add_argument('commentary')
 64 |         args = parser.parse_args()
 65 |         translation_lang = args.get('translation')
 66 |         commentary_author = args.get('commentary')
 67 | 
 68 |         if(commentary_author):
 69 |             _dir = get_cltk_commentary_dir(lang)
 70 |             file = author + "__" + work + ".json";
 71 |             json_fp = os.path.join(_dir, file);
 72 | 
 73 |             try:
 74 |                 file_dict = open_json(json_fp)
 75 |             except Exception as e:
 76 |                 return
 77 | 
 78 |             commentary = []
 79 |             if(commentary_author == "all"):
 80 |                 # Add all commentary
 81 |                 commentary = file_dict["commentary"]
 82 |             else:
 83 |                 # Add commentary by specific author
 84 |                 for item in file_dict["commentary"]:
 85 |                     print(item)
 86 |                     if item['author'] == commentary_author:
 87 |                         commentary.append(item)
 88 | 
 89 |             return {'language': lang,
 90 |                     'corpus': corpus,
 91 |                     'author': author,
 92 |                     'work': work,
 93 |                     'commentary': commentary,
 94 |                     'meta': file_dict['meta'],
 95 |                     }
 96 | 
 97 |         elif(translation_lang):
 98 |             # Assumes translation data file name as "author__work__language.json"
 99 |             _dir = get_cltk_translation_dir(lang, translation_lang)
100 |             file = author + "__" + work + ".json";
101 |             json_fp = os.path.join(_dir, file);
102 | 
103 |             try:
104 |                 file_dict = open_json(json_fp)
105 |             except Exception as e:
106 |                 return
107 |                 
108 |             return {'language': lang,
109 |                     'corpus': corpus,
110 |                     'author': author,
111 |                     'work': work,
112 |                     'translations': file_dict['translations'],
113 |                     'meta': file_dict['meta'],
114 |                     }
115 | 
116 |         else:
117 |             _dir = get_cltk_text_dir(lang)
118 |             file = author + "__" + work + ".json";
119 | 
120 |             json_fp = os.path.join(_dir, file)
121 | 
122 |             try:
123 |                 file_dict = open_json(json_fp)
124 |             except Exception as e:
125 |                 return
126 | 
127 |             text = file_dict['text']
128 | 
129 |             chunk1 = request.args.get('chunk1')
130 |             chunk2 = request.args.get('chunk2')
131 |             chunk3 = request.args.get('chunk3')
132 | 
133 |             if chunk1:
134 |                 text = text[chunk1]
135 | 
136 |             if chunk2:
137 |                 text = text[chunk2]
138 | 
139 |             if chunk3:
140 |                 text = text[chunk3]
141 | 
142 |             return {'language': lang,
143 |                     'corpus': corpus,
144 |                     'author': author,
145 |                     'work': work,
146 |                     'text': text,
147 |                     'meta': file_dict['meta'],
148 |                     }
149 | 
150 | 
151 | class Lang(Resource):
152 |     def get(self):
153 | 
154 |         cltk_home = os.path.expanduser('~/cltk_data')
155 |         dirs = os.listdir(cltk_home)
156 |         langs_with_perseus_corpus = []
157 |         for _dir_lang in dirs:
158 |             is_perseus_corpus = get_cltk_text_dir(_dir_lang)
159 |             if os.path.isdir(is_perseus_corpus):
160 |                 langs_with_perseus_corpus.append(_dir_lang)
161 | 
162 |         return {'languages': langs_with_perseus_corpus}
163 | 
164 | 
165 | class Corpus(Resource):
166 | 
167 |     def get(self, lang):
168 | 
169 |         possible_perseus_corpora_json = get_cltk_text_dir(lang)
170 |         possible_perseus_corpora = os.path.split(possible_perseus_corpora_json)[0]
171 |         is_perseus = os.path.isdir(possible_perseus_corpora)
172 |         corpora = []
173 |         if is_perseus and possible_perseus_corpora.endswith('_perseus'):
174 |             corpus_name = os.path.split(possible_perseus_corpora)[1]
175 |             corpora.append('perseus')
176 | 
177 |         return {'language': lang,
178 |                 'corpora': corpora}
179 | 
180 | class Author(Resource):
181 |     def get(self, lang, corpus):
182 | 
183 |         possible_perseus_corpora_json = get_cltk_text_dir(lang)
184 | 
185 |         authors = set()   # use set to avoid dupes
186 |         if os.path.isdir(possible_perseus_corpora_json):
187 |             files = os.listdir(possible_perseus_corpora_json)
188 |             for file in files:
189 |                 author = file.split('__')[0]
190 |                 authors.add(author)
191 |         else:
192 |             print('Corpus not installed into "~/cltk_data".')
193 | 
194 |         return {'language': lang,
195 |                 'authors': list(authors)}  # cast to list, set() not serializable
196 | 
197 | class Texts(Resource):
198 |     def get(self, lang, corpus, author):
199 |         home_dir = os.path.expanduser('~/cltk_data')
200 |         possible_corpus = os.path.join(home_dir, lang, 'text', lang + '_text_' + corpus, 'json')
201 |         dir_contents = os.listdir(possible_corpus)
202 | 
203 |         texts = []
204 |         for file in dir_contents:
205 |             if file.startswith(author):
206 |                 text = file.split('__')[1][:-5]
207 |                 texts.append(text)
208 | 
209 |         return {'language': lang,
210 |                 'corpus': corpus,
211 |                 'author': author,
212 |                 'texts': texts}
213 | 
214 | # http://localhost:5000/lang/latin/corpus/perseus/author/vergil/text
215 | # http://localhost:5000/lang/greek/corpus/perseus/author/homer/text
216 | api.add_resource(Texts, '/lang/<string:lang>/corpus/<string:corpus>/author/<string:author>/text')
217 | 
218 | # http://localhost:5000/lang/latin/corpus/perseus/author
219 | api.add_resource(Author, '/lang/<string:lang>/corpus/<string:corpus>/author')
220 | 
221 | # http://localhost:5000/lang/latin/corpus
222 | api.add_resource(Corpus, '/lang/<string:lang>/corpus')
223 | 
224 | 
225 | # http://localhost:5000/lang
226 | api.add_resource(Lang, '/lang')
227 | 
228 | 
229 | # http://localhost:5000/lang/greek/corpus/perseus/author/achilles_tatius/text/leucippe_et_clitophon?chunk1=1&chunk2=1&chunk3=1
230 | # http://localhost:5000/lang/greek/corpus/perseus/author/homer/text/odyssey
231 | # http://localhost:5000/lang/greek/corpus/perseus/author/homer/text/odyssey?chunk1=1&chunk2=1
232 | # http://localhost:5000/lang/greek/corpus/perseus/author/homer/text/odyssey?translation=english
233 | # http://localhost:5000/lang/greek/corpus/perseus/author/homer/text/odyssey?commentary=all
234 | # http://localhost:5000/lang/greek/corpus/perseus/author/homer/text/odyssey?commentary=E. T. Merril
235 | api.add_resource(Text, '/lang/<string:lang>/corpus/<string:corpus>/author/<string:author>/text/<string:work>')
236 | #api.add_resource(Text, '/lang/<string:lang>/corpus/<string:corpus>/author/<string:author>/text/<string:work>/<string:chunk1>')
237 | 
238 | # CLTK core pos
239 | api.add_resource(POSTagger, '/core/pos', endpoint='pos')
240 | 
241 | # CLTK core stemmer
242 | api.add_resource(Stem, '/core/stem/<string:sentence>')
243 | 
244 | # CLTK definitions 
245 | # http://localhost:5000/lang/latin/define/abante
246 | api.add_resource(Definition, '/lang/<string:lang>/define/<string:word>')
247 | 
248 | # simple examples
249 | api.add_resource(TodoSimple, '/todo/<string:todo_id>')
250 | api.add_resource(HelloWorld, '/hello')
251 | 
252 | if __name__ == '__main__':
253 |     #app.run(debug=True)
254 |     app.run(host='0.0.0.0', debug=True)
255 | 


--------------------------------------------------------------------------------
/api_old.py:
--------------------------------------------------------------------------------
  1 | """Main API file for backend CLTK webapp.
  2 | 
  3 | The Texts class parses files to get their metadata. This is super cludgy and needs to be redone somehow.
  4 | """
  5 | 
  6 | import os
  7 | from flask import Flask
  8 | from flask import request  # for getting query string
  9 | from flask import json, jsonify
 10 | # eg: request.args.get('user') will get '?user=some-value'
 11 | from flask_restful import Resource, Api
 12 | from flask.ext.pymongo import PyMongo
 13 | from ingest.resources import Ingest
 14 | from api.resources import Query
 15 | from util.jsonp import jsonp
 16 | 
 17 | app = Flask(__name__)
 18 | mongo = PyMongo(app)
 19 | api = Api(app)
 20 | 
 21 | 
 22 | class Authors(Resource):
 23 | 
 24 |     @jsonp
 25 |     def get(self, lang, corpus_name):
 26 |         # assert lang in ['greek', 'latin']
 27 |         text_path = os.path.expanduser('~/cltk_data/' + lang + '/text/' + lang + '_text_' + corpus_name)
 28 | 
 29 |         dir_contents = os.listdir(text_path)
 30 | 
 31 |         # Sulpicia dir has no Latin texts
 32 |         # Isocrates dir has no Greek texts
 33 |         remove_files = ['README.md', '.git', 'LICENSE.md', 'perseus_compiler.py', '.DS_Store', 'Sulpicia' , 'Isocrates']
 34 | 
 35 |         dir_contents = [f for f in dir_contents if f not in remove_files]
 36 | 
 37 |         return {'authors': sorted(dir_contents) }
 38 | 
 39 | 
 40 | class Texts(Resource):
 41 | 
 42 |     @jsonp
 43 |     def get(self, lang, corpus_name, author_name):
 44 |         text_path = os.path.expanduser(
 45 |             '~/cltk_data/' + lang + '/text/' + lang + '_text_' + corpus_name + '/' + author_name.casefold() + '/opensource')  # casefold() prob not nec
 46 |         dir_contents = os.listdir(text_path)
 47 |         ending = ''
 48 |         if corpus_name == 'perseus' and lang == 'greek':
 49 |             ending = '_gk.xml.json'
 50 |             if author_name.casefold() == 'aratus':
 51 |                 ending = '.xml.json'
 52 |             elif author_name.casefold() == 'jebborators':
 53 |                 ending = '.xml.json'
 54 |             elif author_name.casefold() == 'lucretius':
 55 |                 ending = '_lat.xml.json'
 56 |             elif author_name.casefold() == 'lycophron':
 57 |                 ending = '.xml.json'
 58 |             elif author_name.casefold() == 'nonnos':
 59 |                 ending = '.xml.json'
 60 |             elif author_name.casefold() == 'tryphiodorus':
 61 |                 ending = '.xml.json'
 62 |             elif author_name.casefold() == 'callimachus':
 63 |                 ending = '.xml.json'
 64 |         elif corpus_name == 'perseus' and lang == 'latin':
 65 |             ending = '_lat.xml.json'
 66 |             # weird exceptions
 67 |             if author_name.casefold() == 'histaugust':
 68 |                 ending = '.xml.json'
 69 |             elif author_name.casefold() == 'quintus':
 70 |                 ending = '.xml.json'
 71 |         dir_contents = [f for f in dir_contents if f.endswith(ending)]
 72 |         dir_contents = [f.casefold() for f in dir_contents]  # this probably isn't nec
 73 |         return json.dumps( {'texts': sorted(dir_contents)} )
 74 | 
 75 | 
 76 | class Text(Resource):
 77 | 
 78 |     @jsonp
 79 |     def get(self, lang, corpus_name, author_name, fname):
 80 | 
 81 |         text_path = os.path.expanduser(
 82 |             '~/cltk_data/') + lang + '/text/' + lang + '_text_' + corpus_name + '/' + author_name + '/opensource/' + fname
 83 |         ending = ''
 84 |         if corpus_name == 'perseus' and lang == 'greek':
 85 |             ending = '_gk.xml.json'
 86 |             if author_name.casefold() == 'aratus':
 87 |                 ending = '.xml.json'
 88 |             elif author_name.casefold() == 'jebborators':
 89 |                 ending = '.xml.json'
 90 |             elif author_name.casefold() == 'lucretius':
 91 |                 ending = '_lat.xml.json'
 92 |             elif author_name.casefold() == 'lycophron':
 93 |                 ending = '.xml.json'
 94 |             elif author_name.casefold() == 'nonnos':
 95 |                 ending = '.xml.json'
 96 |             elif author_name.casefold() == 'tryphiodorus':
 97 |                 ending = '.xml.json'
 98 |             elif author_name.casefold() == 'callimachus':
 99 |                 if fname.startswith('call_0'):
100 |                     ending = '.xml.json'
101 |         elif corpus_name == 'perseus' and lang == 'latin':
102 |             ending = '_lat.xml.json'
103 |             # weird exceptions
104 |             if author_name.casefold() == 'histaugust' or author_name.casefold() == 'quintus':
105 |                 ending = '.xml.json'
106 | 
107 |         text_path += ending
108 |         with open(text_path, "r") as f:  # TODO: use json.loads() for all this
109 |             file_string = f.read()
110 |         file_json = json.loads(file_string)
111 | 
112 |         # Some files are odd
113 |         if author_name.casefold() in ['quintus', 'aratus', 'callimachus', 'colluthus', 'lycophron', 'nonnos', 'tryphiodorus']:
114 |             encoding_desc = file_json['TEI.2']['teiHeader']['encodingDesc']
115 |             if type(encoding_desc) is list:
116 |                 for desc in encoding_desc:
117 |                     try:
118 |                         quintus = True
119 |                         refs_decls = desc.get('refsDecl')
120 |                         break
121 |                     except Exception:
122 |                         pass
123 |         # everyone else
124 |         else:
125 |             refs_decls = file_json['TEI.2']['teiHeader']['encodingDesc']['refsDecl']
126 | 
127 |         section_types = []  # list of lists
128 |         if type(refs_decls) is list:
129 |             for refs_decl in refs_decls:
130 |                 if refs_decl.get('@doctype') == 'TEI.2' and 'state' in refs_decl:
131 |                     states = refs_decl['state']
132 |                     if type(states) is list:
133 |                         units = []
134 |                         for state in states:
135 |                             unit = state['@unit']
136 |                             units.append(unit)
137 |                         section_types.append(units)
138 |                     elif type(states) is dict:
139 |                         state = states
140 |                         unit = state['@unit']
141 |                         section_types.append([unit])
142 |                 elif 'state' in refs_decl:
143 |                     states = refs_decl['state']
144 |                     if type(states) is list:
145 |                         units = []
146 |                         for state in states:
147 |                             unit = state['@unit']
148 |                             units.append(unit)
149 |                         section_types.append(units)
150 | 
151 |         elif type(refs_decls) is dict:
152 |             refs_decl = refs_decls
153 |             if refs_decl.get('@doctype') == 'TEI.2' and 'state' in refs_decl:
154 |                 states = refs_decl['state']
155 |                 if type(states) is list:
156 |                     units = []
157 |                     for state in states:
158 |                         unit = state['@unit']
159 |                         units.append(unit)
160 |                     section_types = [units]
161 |                 elif type(states) is dict:
162 |                     state = refs_decl['state']
163 |                     unit = state['@unit']
164 |                     section_types.append([unit])
165 |             elif refs_decl.get('@doctype') == 'TEI.2' and 'step' in refs_decl:
166 |                 steps = refs_decl['step']
167 |                 if type(steps) is list:
168 |                     units = []
169 |                     for state in steps:
170 |                         unit = state['@refunit']
171 |                         units.append(unit)
172 |                     section_types = [units]
173 |                 elif type(steps) is dict:
174 |                     step = refs_decl['step']
175 |                     unit = step['@refunit']
176 |                     section_types.append([unit])
177 |             elif refs_decl.get('@doctype') != 'TEI.2' and 'step' in refs_decl:
178 |                 print('*' * 40)
179 |                 steps = refs_decl['step']
180 |                 if type(steps) is list:
181 |                     units = []
182 |                     for state in steps:
183 |                         unit = state['@refunit']
184 |                         units.append(unit)
185 |                     section_types = [units]
186 |                 elif type(steps) is dict:
187 |                     step = refs_decl['step']
188 |                     unit = step['@refunit']
189 |                     section_types.append([unit])
190 | 
191 |             # Some entries missing `{'@doctype': 'TEI.2'}` (eg, Pliny's `pliny.min.letters`)
192 |             elif refs_decl.get('@doctype') != 'TEI.2' and 'state' in refs_decl:
193 |                 states = refs_decl['state']
194 |                 if type(states) is list:
195 |                     units = []
196 |                     for state in states:
197 |                         unit = state['@unit']
198 |                         units.append(unit)
199 |                     section_types = [units]
200 |                 elif type(states) is dict:
201 |                     state = refs_decl['state']
202 |                     unit = state['@unit']
203 |                     section_types.append([unit])
204 | 
205 | 
206 |         # Parse query strings
207 |         q_section_1 = request.args.get('section_1')
208 |         q_section_2 = request.args.get('section_2')
209 |         q_section_3 = request.args.get('section_3')
210 |         q_section_4 = request.args.get('section_4')
211 |         q_section_5 = request.args.get('section_5')
212 | 
213 |         # If no query string, return text object
214 |         if not q_section_1:
215 |             return {'refs_decl': refs_decls,
216 |                     'filepath': text_path,
217 |                     'section_types': section_types,
218 |                     'text': file_json['TEI.2']['text']
219 |                     }
220 | 
221 |         # Parse text according to query string
222 |         section_1_object = file_json['TEI.2']['text']['body']['div1']
223 | 
224 |         if type(section_1_object) is list:
225 |             for section_1 in section_1_object:
226 |                 try:
227 |                     section_1_number = section_1['@n']  # str
228 |                 except KeyError:
229 |                     # http://localhost:5000/lang/greek/corpus/perseus/author/Aeschylus/text/aesch.ag?section_1=1
230 |                     # Something funny. Redefine section_1 to s.th. deeper embedded
231 |                     #! This pathway is broke and I don't know if I want to make this more convoluted than it is. Dammit.
232 |                     section_1 = section_1['div2']['sp']
233 | 
234 |                 if section_1_number == q_section_1:
235 |                     section_1_object = section_1['l']  # list
236 | 
237 |                     # cleanup lines
238 |                     return_section_1_object = []
239 |                     for line in section_1_object:
240 |                         if type(line) is dict:
241 |                             line = line['#text']
242 |                         return_section_1_object.append(line)
243 | 
244 |                     if not q_section_2:
245 |                         # http://localhost:5000/lang/latin/corpus/perseus/author/Vergil/text/verg.a?section_1=12
246 |                         # http://localhost:5000/lang/greek/corpus/perseus/author/Homer/text/hom.od?section_1=1
247 |                         return {'refs_decl': refs_decls,
248 |                                 'filepath': text_path,
249 |                                 'section_types': section_types,
250 |                                 'text': return_section_1_object
251 |                                 }
252 | 
253 |                     for counter, section_2_item in enumerate(section_1_object):
254 |                         if type(section_2_item) is dict:
255 |                             section_2_item = section_2_item['#text']
256 |                         if counter + 1 == int(q_section_2):
257 |                             returned_text = section_2_item
258 | 
259 |             if not q_section_3:
260 |                 return {'refs_decl': refs_decls,
261 |                         'filepath': text_path,
262 |                         'section_types': section_types,
263 |                         'text': returned_text,
264 |                         }
265 | 
266 |         elif type(section_1_object) is dict:
267 |             # http://localhost:5000/lang/greek/corpus/perseus/author/Hesiod/text/hes.th?section_1=1
268 |             section_1_type = section_1_object['@type']
269 |             section_1_number = section_1_object['@n']
270 |             section_1_list = section_1_object['l']
271 | 
272 |             # cleanup lines
273 |             return_section_1_object = []
274 |             for line in section_1_list:
275 |                 if type(line) is dict:
276 |                     line = line['#text']
277 |                 return_section_1_object.append(line)
278 | 
279 |             for counter, section_1_item in enumerate(section_1_list):
280 |                 if type(section_1_item) is dict:
281 |                     section_1_item = section_1_item['#text']
282 |                 if counter + 1 == int(q_section_1):
283 |                     returned_text = section_1_item
284 | 
285 |             return {'refs_decl': refs_decls,
286 |                     'filepath': text_path,
287 |                     'section_types': section_types,
288 |                     'text': returned_text
289 |                     }
290 | 
291 | 
292 | # http://localhost:5000/lang/greek/corpus/perseus/authors
293 | api.add_resource(Authors, '/lang/<string:lang>/corpus/<string:corpus_name>/authors')
294 | 
295 | # http://localhost:5000/lang/greek/corpus/perseus/author/Homer/texts
296 | api.add_resource(Texts, '/lang/<string:lang>/corpus/<string:corpus_name>/author/<string:author_name>/texts')
297 | 
298 | # http://localhost:5000/lang/latin/corpus/perseus/author/Vergil/text/verg.a
299 | # http://localhost:5000/lang/greek/corpus/perseus/author/Homer/text/hom.od
300 | 
301 | # http://localhost:5000/lang/latin/corpus/perseus/author/Vergil/text/verg.a?section_1=1&section_2=1
302 | # http://localhost:5000/lang/greek/corpus/perseus/author/Homer/text/hom.od?section_1=1&section_2=1
303 | api.add_resource(Text,
304 |                  '/lang/<string:lang>/corpus/<string:corpus_name>/author/<string:author_name>/text/<string:fname>')
305 | 
306 | # Trigger new document ingest
307 | api.add_resource(Ingest, '/ingest')
308 | 
309 | # Feed GET params to query to DB
310 | api.add_resource(Query, '/query')
311 | 
312 | if __name__ == '__main__':
313 |     app.run(debug=True)
314 |     #app.run(host='0.0.0.0')
315 | 


--------------------------------------------------------------------------------
/convert_tei_json_to_simple_json/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cltk/cltk_api/eb736ec9f20c008436e93cd0be4bcd05be7d455c/convert_tei_json_to_simple_json/__init__.py


--------------------------------------------------------------------------------
/convert_tei_json_to_simple_json/book_chapter.py:
--------------------------------------------------------------------------------
  1 | """Example: Ammianus"""
  2 | 
  3 | import json
  4 | import os
  5 | 
  6 | from book_line import file_to_dict
  7 | from book_line import dict_to_file
  8 | 
  9 | 
 10 | 
 11 | def book_chapter_convert(fp):
 12 |     """Take filepath, try to make new file.
 13 |     {'author': author_name,
 14 |       'text': [
 15 |         {'book': 1,
 16 |          'chapters':
 17 |            [{'chapter': 1, 'text': real_text}, …]
 18 |         }
 19 |       ]
 20 |     }
 21 |     """
 22 |     final_file_dict = {}
 23 |     books_list = []
 24 |     file_dict = file_to_dict(fp)
 25 | 
 26 |     tei = file_dict['TEI.2']  # dict
 27 |     text = tei['text']  # dict
 28 |     header = tei['teiHeader']  # dict
 29 | 
 30 |     # Get work's title, add to final dict
 31 |     title_list = header['fileDesc']['titleStmt']['title']
 32 |     for obj in title_list:
 33 |         if type(obj) is str:
 34 |             title_name = obj
 35 |             final_file_dict['title'] = title_name
 36 |             break
 37 |         try:
 38 |             if obj['@type'] == 'work':
 39 |                 title_name = obj['#text']
 40 |                 final_file_dict['title'] = title_name
 41 |                 break
 42 |         except KeyError:
 43 |             raise
 44 | 
 45 |     encoding = header['encodingDesc']  # dict
 46 |     body = text['body']  # dict
 47 |     div1 = body['div1']  # list of dict
 48 |     #print(len(div1))  # eg 12 for Aeneid, 24 for Iliad
 49 |     for div1_dict in div1:  # !Book loop
 50 |         #print(div1_dict.keys())
 51 |         book_dict = {}
 52 |         div1_dict_div2 = div1_dict['div2']  # list of dict; where the text at
 53 |         div1_dict_type = div1_dict['@type']   # book
 54 |         try:
 55 |             div1_dict_pb = div1_dict['pb']  # dict or list of dict: [{'@id': 'v2.p.16'}, {'@id': 'v2.p.30'}, {'@id': 'v2.p.68'}]
 56 |         except KeyError:
 57 |             div1_dict_pb = None
 58 |         div1_dict_head = div1_dict['head']  # str, eg: 'Liber XVII'
 59 |         div1_dict_number = div1_dict['@n']  # str, eg: 17, 'val1'
 60 |         book_number = div1_dict_number
 61 |         #print('Book:', book_number)
 62 |         book_dict['book'] = book_number
 63 | 
 64 |         chapters_list = []  # a list of {<chp_number>: <chptr text>}
 65 |         for div2 in div1_dict_div2:  # !Chapter Loop
 66 | 
 67 |             chapter_dict = {}
 68 |             chapter_text = []
 69 |             #print(type(div2))  # dict
 70 |             div2_type = div2['@type']  # str: chapter
 71 |             div2_number = div2['@n']  # 6, 12, 4
 72 |             chapter_number = div2_number
 73 |             #print('Chapter:', chapter_number)
 74 |             try:
 75 |                 div2_argument = div2['argument']  # dict: {'p': 'Quo patre natus sit, et quas res princeps gesserit.'} (text in here)
 76 |                 div2_text_section = div2_argument['p']  # ! Summary text here, not useful (I think)
 77 |                 #print('div2_text_section', div2_text_section)
 78 |                 if type(div2_text_section) is dict:
 79 |                     #print(div2_text_section.keys())  # ['note', '#text'] or ['corr', '#text']
 80 |                     div2_text_section_note = div2_text_section['note']  # summaries
 81 |                     div2_text_section_text = div2_text_section['text']  # empty
 82 |                     div2_text_section_corr = div2_text_section['corr']  # empty
 83 |                 elif type(div2_text_section) is str:
 84 |                     pass
 85 |                     #print(div2_text_section)  # ! real text here! (I think)
 86 |             except KeyError:
 87 |                 div2_argument = None
 88 |             div2_ps = div2['p']  # list of dicts or dict (text in here)
 89 |             if type(div2_ps) is dict:
 90 |                 #print(div2_ps.keys())  # ['note', 'quote', 'milestone', 'pb', '#text']
 91 |                 try:
 92 |                     div2_ps_note = div2_ps['note']  # [{'hi': {'#text': 'et ad molliora,', '@rend': 'italics'}, '#text': 'added in G; V omits.'}, … ]
 93 |                 except KeyError:
 94 |                     div2_ps_note = None
 95 |                 try:
 96 |                     div2_ps_quote = div2_ps['quote']  # ['Nemo', 'vereatur: habeo firmiter quod tenebam.', {'@rend': 'blockquote', 'l': [{'foreign': {'@lang': 'greek', '#text': 'Zeu\\s o(/tan ei)s platu\\ te/rma mo/lh| klutou= u(droxo/oio,'}}, … ]
 97 |                 except KeyError:
 98 |                     div2_ps_quote = None
 99 |                 div2_ps_milestone = div2_ps['milestone']
100 |                 try:
101 |                     div2_ps_pb = div2_ps['pb']  # [{'@id': 'v3.p.316'}, {'@id': 'v3.p.318'}] or {'@id': 'v2.p.190'}
102 |                 except KeyError:
103 |                     div2_ps_pb = None
104 |                 div2_ps_text = div2_ps['#text']  # ! actual text!
105 |                 real_text = div2_ps_text
106 |                 chapter_text.append(real_text)
107 |                 #print('div2_ps_text', div2_ps_text)
108 |             elif type(div2_ps) is list:
109 |                 for div2_ps_item in div2_ps:  # all dicts
110 |                     #print(div2_ps_item.keys())  # ['pb', 'milestone', 'note', '#text', 'quote']
111 |                     #div2_ps_item_pb = div2_ps_item['pb']
112 |                     #div2_ps_item_milestone = div2_ps_item['milestone']
113 |                     #div2_ps_item_note = div2_ps_item['note']
114 |                     try:
115 |                         div2_ps_item_text = div2_ps_item['#text']  # ! real text here
116 |                         real_text = div2_ps_item_text
117 |                         chapter_text.append(real_text)
118 |                         #print(div2_ps_item_text)
119 |                     except KeyError:
120 |                         div2_ps_item_text = None
121 |                     #div2_ps_item_quote = div2_ps_item['quote']
122 |             chapter_text_str = ' '.join(chapter_text)
123 |             chapter_dict[chapter_number] = chapter_text_str
124 |             chapters_list.append(chapter_dict)
125 |         book_dict['book'] = book_number
126 |         book_dict['chapters']= chapters_list
127 |         books_list.append(book_dict)
128 | 
129 |     # Get author name from 'latin_key.json'
130 |     key_fp = os.path.expanduser('~/cltk_data/latin/text/latin_text_perseus/latin_key.json')
131 |     with open(key_fp) as fo:
132 |         meta_authors = json.load(fo)
133 |     for meta_author in meta_authors:
134 |         orig_filename = meta_author['title']
135 |         if orig_filename == os.path.split(fp)[1]:
136 |             author_name = meta_author['name']
137 |             #print(author_name)
138 |             structure_meta = meta_author['encoding']['state']
139 |             #book_dict['structure_meta'] = structure_meta
140 |             #final_file_dict['structure_meta'] = structure_meta
141 |             #book_dict['author_name'] = author_name
142 |             final_file_dict['author'] = author_name
143 |             break
144 | 
145 |     final_file_dict['text'] = books_list
146 | 
147 | 
148 |     author_dir, author_file = os.path.split(fp)[0], os.path.split(fp)[1]
149 |     author_file = author_file.replace('xml.', '')
150 |     opensource_dir = os.path.split(author_dir)[0]
151 |     perseus_root = os.path.split(opensource_dir)[0]
152 |     # next write new perseus dir and put in there; check if present
153 |     cltk_perseus_dir = 'cltk_formatted'
154 |     cltk_perseus_path = os.path.expanduser(os.path.join(perseus_root, cltk_perseus_dir, author_name.casefold() + '_' + author_file))
155 |     print('Wrote new file to: "{}".'.format(cltk_perseus_path))
156 |     try:
157 |         dict_to_file(final_file_dict, cltk_perseus_path)
158 |     except FileNotFoundError:
159 |         _dir = os.path.split(cltk_perseus_path)[0]
160 |         os.mkdir(_dir)
161 |         dict_to_file(final_file_dict, cltk_perseus_path)
162 | 
163 | 
164 | if __name__ == "__main__":
165 |     fp = '/Users/kyle/cltk_data/latin/text/latin_text_perseus/Ammianus/opensource/amm_lat.xml.json'
166 |     book_chapter_convert(fp)
167 | 
168 | 


--------------------------------------------------------------------------------
/convert_tei_json_to_simple_json/book_line.py:
--------------------------------------------------------------------------------
  1 | """Take the JSON conversion of the original Perseus XML, then convert it into
  2 |  easier-to-parse JSON.
  3 | 
  4 | TODO: Perhaps get full author name and work name out of XML.
  5 | """
  6 | 
  7 | import json
  8 | import os
  9 | import sys
 10 | 
 11 | 
 12 | def file_to_dict(fp):
 13 |     """Open a json file and return Python dict."""
 14 |     with open(os.path.expanduser(fp)) as fo:
 15 |         return json.load(fo)
 16 | 
 17 | 
 18 | def dict_to_file(obj, fp):
 19 |     """Write dict to json file."""
 20 |     with open(os.path.expanduser(fp), 'w') as fo:
 21 |         json.dump(obj, fo)
 22 | 
 23 | 
 24 | def book_line_convert(fp):
 25 |     """Take filepath, try to make new file.
 26 |     {'author': 'Vergil',
 27 |      'text': [
 28 |        {'book': 1,
 29 |         'line': ['aaaaa', 'bbbbb', 'cccc']
 30 |         }
 31 |      ]
 32 |     }
 33 |     """
 34 |     final_file_dict = {}
 35 |     text_books_list = []
 36 |     file_dict = file_to_dict(fp)
 37 | 
 38 |     tei = file_dict['TEI.2']  # dict
 39 |     text = tei['text']  # dict
 40 |     header = tei['teiHeader']  # dict
 41 | 
 42 |     # Get work's title, add to final dict
 43 |     title_list = header['fileDesc']['titleStmt']['title']
 44 |     for obj in title_list:
 45 |         if type(obj) is str:
 46 |             title_name = obj
 47 |             final_file_dict['title'] = title_name
 48 |             break
 49 |         try:
 50 |             if obj['@type'] == 'work':
 51 |                 title_name = obj['#text']
 52 |                 final_file_dict['title'] = title_name
 53 |                 break
 54 |         except KeyError:
 55 |             raise
 56 | 
 57 |     encoding = header['encodingDesc']  # dict
 58 |     body = text['body']  # dict
 59 |     div1 = body['div1']  # list of dict
 60 |     #print(len(div1))  # eg 12 for Aeneid, 24 for Iliad
 61 |     for div1_dict in div1:
 62 |         book_object = {}
 63 |         text_lines = []
 64 |         milestone = div1_dict['milestone']  # list, not useful
 65 |         _type = div1_dict['@type']  # str, 'Book'
 66 |         book_number = int(div1_dict['@n'])  # str cast as int
 67 |         div1_dict_list = div1_dict['l']  # list of str or dict
 68 |         for counter, div1_dict_list_object in enumerate(div1_dict_list, start=1):
 69 |             if type(div1_dict_list_object) is dict:
 70 |                 try:
 71 |                     div1_dict_list_object_number = div1_dict_list_object['@n']  # str
 72 |                 except KeyError:
 73 |                     div1_dict_list_object_number = None
 74 |                 try:
 75 |                     div1_dict_list_object_milestone = div1_dict_list_object['milestone']  # dict, eg Aen and Il: {'@ed': 'P', '@unit': 'para'}
 76 |                 except KeyError:
 77 |                     div1_dict_list_object_milestone = None
 78 |                 div1_dict_list_object_text = div1_dict_list_object['#text']  # the actual text
 79 |                 div1_dict_list_object = div1_dict_list_object_text  # str
 80 |             else:
 81 |                 pass
 82 |             #print(book_number, counter, div1_dict_list_object)
 83 |             text_lines.append(div1_dict_list_object)
 84 |         book_object['text'] = text_lines
 85 |         book_object['book'] = book_number
 86 | 
 87 |         # Get author name from 'latin_key.json'
 88 |         key_fp = os.path.expanduser('~/cltk_data/latin/text/latin_text_perseus/latin_key.json')
 89 |         with open(key_fp) as fo:
 90 |             meta_authors = json.load(fo)
 91 |         for meta_author in meta_authors:
 92 |             orig_filename = meta_author['title']
 93 |             if orig_filename == os.path.split(fp)[1]:
 94 |                 author_name = meta_author['name']
 95 |                 #structure_meta = meta_author['encoding']['state']
 96 |                 #book_object['structure_meta'] = structure_meta
 97 |                 #book_object['author_name'] = author_name
 98 |                 final_file_dict['author'] = author_name
 99 |                 break
100 | 
101 |         text_books_list.append(book_object)
102 |     #print(len(text_books_list))  # eg 12 for Aen, 4 for Georgics, 24 for Od
103 | 
104 |     final_file_dict['text'] = text_books_list
105 | 
106 |     author_dir, author_file = os.path.split(fp)[0], os.path.split(fp)[1]
107 |     author_file = author_file.replace('xml.', '')
108 |     opensource_dir = os.path.split(author_dir)[0]
109 |     perseus_root = os.path.split(opensource_dir)[0]
110 |     # next write new perseus dir and put in there; check if present
111 |     cltk_perseus_dir = 'cltk_formatted'
112 |     cltk_perseus_path = os.path.expanduser(os.path.join(perseus_root, cltk_perseus_dir, author_file))
113 |     print('Wrote new file to: "{}".'.format(cltk_perseus_path))
114 |     try:
115 |         dict_to_file(final_file_dict, cltk_perseus_path)
116 |     except FileNotFoundError:
117 |         _dir = os.path.split(cltk_perseus_path)[0]
118 |         os.mkdir(_dir)
119 |         dict_to_file(final_file_dict, cltk_perseus_path)
120 | 
121 | if __name__ == "__main__":
122 | 
123 |     examples_files = ['~/cltk_data/latin/text/latin_text_perseus/Vergil/opensource/verg.a_lat.xml.json',
124 |                       #'~/cltk_data/latin/text/latin_text_perseus/Vergil/opensource/verg.ecl_lat.xml.json',  # KeyError: 'milestone'
125 |                       '~/cltk_data/latin/text/latin_text_perseus/Vergil/opensource/verg.g_lat.xml.json',
126 |                       '~/cltk_data/latin/text/latin_text_perseus/Ovid/opensource/ovid.met_lat.xml.json',
127 |                       #'~/cltk_data/latin/text/latin_text_perseus/Ovid/opensource/ovid.am_lat.xml.json',  # KeyError: 'body'
128 |                       #'~/cltk_data/latin/text/latin_text_perseus/Ovid/opensource/ovid.fast_lat.xml.json',  # KeyError: 'milestone'
129 |                       #'~/cltk_data/latin/text/latin_text_perseus/Ovid/opensource/ovid.ibis_lat.xml.json',  # TypeError: string indices must be integers
130 |                       #'~/cltk_data/latin/text/latin_text_perseus/Ovid/opensource/ovid.pont_lat.xml.json',  # KeyError: 'milestone'
131 |                       #'~/cltk_data/latin/text/latin_text_perseus/Ovid/opensource/ovid.tr_lat.xml.json',  # KeyError: 'milestone'
132 |                       '~/cltk_data/greek/text/greek_text_perseus/Homer/opensource/hom.il_gk.xml.json',
133 |                       '~/cltk_data/greek/text/greek_text_perseus/Homer/opensource/hom.od_gk.xml.json'
134 |                       ]
135 | 
136 |     for fp in examples_files:
137 |         book_line_convert(fp)


--------------------------------------------------------------------------------
/convert_tei_json_to_simple_json/convert_all_perseus_xml.py:
--------------------------------------------------------------------------------
 1 | """Look for all Perseus files, then try to convert with available converters.
 2 |  If error rises, then try another converter.
 3 | 
 4 | Outputs to: '~/cltk_data/greek/text/greek_text_perseus/cltk_formatted' and
 5 |  '~/cltk_data/latin/text/latin_text_perseus/cltk_formatted'.
 6 | """
 7 | 
 8 | import os
 9 | import sys
10 | 
11 | from book_line import book_line_convert
12 | from book_chapter import book_chapter_convert
13 | 
14 | 
15 | def os_walk(fp, ending='_lat.xml.json'):
16 |     """Recursively find files in path."""
17 |     for dir_path, dir_names, files in os.walk(fp):  # pylint: disable=W0612
18 |         for name in files:
19 |             if name.endswith(ending):
20 |                 yield os.path.join(dir_path, name)
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     perseus_dirs = ['~/cltk_data/latin/text/latin_text_perseus/', '~/cltk_data/greek/text/greek_text_perseus/']
25 |     #perseus_dirs = ['~/cltk_data/latin/text/latin_text_perseus/']
26 |     xml_converter = [book_line_convert, book_chapter_convert]
27 |     success_count = 0
28 |     fail_count = 0
29 |     for perseus_dir in perseus_dirs:
30 |         for fp in os_walk(os.path.expanduser(perseus_dir)):
31 |             for converter in xml_converter:
32 |                 try:
33 |                     converter(fp)
34 |                     success_count += 1
35 |                     break
36 |                 except:
37 |                     pass
38 |             fail_count += 1
39 |     print('Sucess:', success_count)
40 |     print('Fail:', fail_count)


--------------------------------------------------------------------------------
/gunicorn_start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | NAME="CLTK API"
 4 | SOCKFILE=/home/cltk/cltk_api/binding.sock 
 5 | NUM_WORKERS=4
 6 | 
 7 | echo "Starting $NAME"
 8 | 
 9 | #activate virtual environment
10 | source /home/cltk/venv/bin/activate
11 | cd /home/cltk/cltk_api
12 | 	
13 | # Start gunicorn server
14 | exec gunicorn api_json:app -b 127.0.0.1:5000 \
15 |   --workers $NUM_WORKERS \
16 |   --bind=unix:$SOCKFILE
17 | 


--------------------------------------------------------------------------------
/metadata/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cltk/cltk_api/eb736ec9f20c008436e93cd0be4bcd05be7d455c/metadata/__init__.py


--------------------------------------------------------------------------------
/metadata/commentary/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cltk/cltk_api/eb736ec9f20c008436e93cd0be4bcd05be7d455c/metadata/commentary/__init__.py


--------------------------------------------------------------------------------
/metadata/criticism/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cltk/cltk_api/eb736ec9f20c008436e93cd0be4bcd05be7d455c/metadata/criticism/__init__.py


--------------------------------------------------------------------------------
/metadata/criticism/criticism.py:
--------------------------------------------------------------------------------
 1 | """Ingest citations for criticism"""
 2 | import time
 3 | import string
 4 | import random
 5 | import urllib.request
 6 | from bs4 import BeautifulSoup
 7 | from cltk_api.util.db import mongo
 8 | 
 9 | class Criticism:
10 | 
11 | 	def __init__(self, dbname):
12 | 		"""Setup db connection to mongo"""
13 | 		self.dbname = dbname
14 | 		self.punctuation_transtable = {ord(c): None for c in string.punctuation}
15 | 
16 | 		return
17 | 
18 | 	def ingest(self, line):
19 | 		"""Ingest citation data to the database and mark done for later processing"""
20 | 
21 | 		try:
22 | 
23 | 			cites = self.search_jstor(line)
24 | 			for cite in cites:
25 | 				cite['line'] = line
26 | 				self.save(cite)
27 | 
28 | 		except:
29 | 			return False
30 | 
31 | 
32 | 		return True
33 | 
34 | 	def search_jstor(self, line):
35 | 		"""Search for line via JSTOR API"""
36 | 		cites = []
37 | 		pages = []
38 | 
39 | 		# Make URL to query
40 | 		sline = line['line']['text'].translate(self.punctuation_transtable).lower()
41 | 		sline = sline.replace(" ","+").lower()
42 | 		sline = sline.replace("—", "")
43 | 
44 | 		url = "http://dfr.jstor.org/?view=text&qk0=ft&qw0=1.0&qv0=%22" + sline + "%22&qf0=any&sk=ca"
45 | 
46 | 		# Get the page
47 | 		res = urllib.request.urlopen(url)
48 | 		html = res.read()
49 | 		soup = BeautifulSoup(html)
50 | 		pagination = soup.select(".pagination a")
51 | 		cites.extend(self._parse_jstor_page(soup))
52 | 
53 | 		# Get the paginated results
54 | 		for elem in pagination:
55 | 			#If elem doesn't have classes "prevnextlink" and "currentpage"
56 | 			try:
57 | 				if "prevnextlink" not in elem['class'] and "currentpage" not in elem['class']:
58 | 					pages.append("http://dfr.jstor.org/" + elem['href'])
59 | 			except:
60 | 				try:
61 | 					pages.append("http://dfr.jstor.org/" + elem['href'])
62 | 				except:
63 | 					pass
64 | 
65 | 		time.sleep(random.randint( 2, 5 ))
66 | 		for i, page_link in enumerate(pages):
67 | 			print(" -- querying page", i + 2)
68 | 			res = urllib.request.urlopen(page_link)
69 | 			html = res.read()
70 | 			soup = BeautifulSoup(html)
71 | 			cites.extend(self._parse_jstor_page(soup))
72 | 			time.sleep(random.randint( 2, 5 ))
73 | 
74 | 		return cites
75 | 
76 | 	def _parse_jstor_page(self, soup):
77 | 		c = []
78 | 		res = soup.select("ul.results_item")
79 | 		for el in res:
80 | 			c.append({
81 | 					'title' : el.select(".title")[0].text,
82 | 					'author' : el.select(".author")[0].text,
83 | 					'cite' : el.select('li')[2].text
84 | 				})
85 | 
86 | 		return c
87 | 
88 | 	def save(self, cite):
89 | 		"""Save the citation to the db for processing"""
90 | 		db = mongo(self.dbname)
91 | 		db.criticism.insert(cite)
92 | 		return
93 | 


--------------------------------------------------------------------------------
/metadata/definition/_init_.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cltk/cltk_api/eb736ec9f20c008436e93cd0be4bcd05be7d455c/metadata/definition/_init_.py


--------------------------------------------------------------------------------
/metadata/definition/views.py:
--------------------------------------------------------------------------------
 1 | from flask_restful import Resource
 2 | import json
 3 | import os
 4 | 
 5 | # File name suffix for the json files
 6 | DATA_FILE_SUFFFIX = "-analyses.json"
 7 | 
 8 | def get_cltk_treebank_dir(lang, corpus='perseus'):
 9 |     """Take relative filepath, return absolute"""
10 |     cltk_home = os.path.expanduser('~/cltk_data')
11 |     treebank_path =  lang.casefold() + '_treebank_' + corpus;
12 |     treebank_dir = os.path.join(cltk_home, lang.casefold(), 'treebank', treebank_path, treebank_path)
13 |     return treebank_dir
14 | 
15 | class Definition(Resource):
16 | 
17 | 	'''
18 | 	GET 	/lang/<string:lang>/define/<string:word>
19 | 			Return the available definitions for a word of given language
20 | 	'''
21 | 	def get(self, lang, word):
22 | 		# File name would something like "latin-analyses.json"
23 | 		filename = lang + DATA_FILE_SUFFFIX
24 | 		_dir = get_cltk_treebank_dir(lang)
25 | 		file = os.path.join(_dir, filename)
26 | 		with open(file, "r") as infile:
27 | 			word_list = json.load(infile)
28 | 		try:
29 | 			return word_list[word]
30 | 		except KeyError as e:
31 | 			return []
32 | 


--------------------------------------------------------------------------------
/metadata/entities/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cltk/cltk_api/eb736ec9f20c008436e93cd0be4bcd05be7d455c/metadata/entities/__init__.py


--------------------------------------------------------------------------------
/metadata/entities/dbpedia.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Interface with DBpedia 
 3 | """
 4 | 
 5 | class DBpedia:
 6 | 
 7 | 	def __init__(self):
 8 | 
 9 | 
10 | 		return


--------------------------------------------------------------------------------
/metadata/entities/entity.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A class for working with Entities retrieved from the NER core functionality
 3 | of the CLTK
 4 | """
 5 | 
 6 | import string
 7 | import os
 8 | import json
 9 | import re
10 | import random
11 | from time import sleep
12 | from urllib.request import urlopen, urlretrieve
13 | from urllib import error
14 | from bs4 import BeautifulSoup
15 | from cltk_api.metadata.entities.wikipedia import Wikipedia
16 | 
17 | 
18 | class Entity:
19 | 
20 | 	def __init__(self, name_english, name_original):
21 | 
22 | 		self.name_english = name_english
23 | 		self.name_original = name_original
24 | 		self.punctuation_transtable = {ord(c): None for c in string.punctuation}
25 | 
26 | 		# External resources
27 | 		self.wikipedia_entity = {}
28 | 
29 | 		return
30 | 
31 | 	def fetch_wikipedia(self):
32 | 		"""
33 | 		Fetch metadata, images, and summaries about an entity from Wikipedia
34 | 		"""
35 | 		self.wikipedia_entity = Wikipedia.query(self.name_english)
36 | 
37 | 		return
38 | 


--------------------------------------------------------------------------------
/metadata/entities/pleiades.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | Interface with Pleiades
 4 | """
 5 | 
 6 | class Pleiades:
 7 | 
 8 | 	def __init__(self):
 9 | 
10 | 
11 | 		return
12 | 


--------------------------------------------------------------------------------
/metadata/entities/viaf.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Interface with Viaf
 3 | """
 4 | 
 5 | class VIAF:
 6 | 
 7 | 	def __init__(self):
 8 | 
 9 | 
10 | 		return
11 | 


--------------------------------------------------------------------------------
/metadata/entities/wikipedia.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A class for interfacing with Wikipedia to associate metadata and images to
 3 | the CLTK named entities
 4 | 
 5 | Example usage:
 6 | >>> from cltk_api.metadata.entities.wikipedia import Wikipedia
 7 | >>> Wikipedia.query("Aeneas")
 8 | {
 9 | 	'name': 'Aeneas',
10 | 	'summary': 'In Greco-Roman mythology, Aeneas (/ᵻˈniːəs/; Greek: Αἰνείας, Aineías, possibly derived from Greek αἰνή meaning "praised") was a Trojan hero, the son of the prince Anchises and the goddess Venus (Aphrodite). His father was the second cousin of King Priam of Troy, making Aeneas Priam\'s second cousin, once removed. He is a character in Greek mythology and is mentioned in Homer\'s Iliad. Aeneas receives full treatment in Roman mythology, most extensively in Virgil\'s Aeneid where he is an ancestor of Romulus and Remus. He became the first true hero of Rome.'
11 | 	'images': ['https://upload.wikimedia.org/wikipedia/commons/c/c0/Denier_frapp%C3%A9_sous_C%C3%A9sar_c%C3%A9l%C3%A9brant_le_mythe_d%27En%C3%A9e_et_d%27Anchise.jpg', 'https://upload.wikimedia.org/wikipedia/commons/a/aa/Capitoline_she-wolf_Musei_Capitolini_MC1181.jpg', 'https://upload.wikimedia.org/wikipedia/commons/9/9f/Aineias_Ankhises_Louvre_F118.jpg', 'https://upload.wikimedia.org/wikipedia/commons/3/3c/William_Blake_Richmond_-_Venus_and_Anchises_-_Google_Art_Project.jpg', 'https://upload.wikimedia.org/wikipedia/commons/4/4c/Wikisource-logo.svg', 'https://upload.wikimedia.org/wikipedia/commons/2/2f/B._PINELLI%2C_Enea_e_il_Tevere.jpg', 'https://upload.wikimedia.org/wikipedia/commons/7/76/Aeneas_and_Turnus.jpg', 'https://upload.wikimedia.org/wikipedia/commons/e/e0/Gu%C3%A9rin_%C3%89n%C3%A9e_racontant_%C3%A0_Didon_les_malheurs_de_la_ville_de_Troie_Louvre_5184.jpg', 'https://upload.wikimedia.org/wikipedia/commons/a/a8/Venus_as_Huntress_Appears_to_Aeneas.jpg', 'https://upload.wikimedia.org/wikipedia/en/4/4a/Commons-logo.svg', 'https://upload.wikimedia.org/wikipedia/commons/f/f7/Aeneas%27_Flight_from_Troy_by_Federico_Barocci.jpg'],
12 | 	}
13 | 
14 | """
15 | 
16 | import wikipedia
17 | 
18 | class Wikipedia:
19 | 
20 | 	@staticmethod
21 | 	def query(entity_name):
22 | 		"""
23 | 		Retrieve data from Wikipedia for a given input entity name
24 | 		:return wikipedia_entity: dict
25 | 		"""
26 | 
27 | 		# Return a wikipedia entity dictionary
28 | 		wikipedia_entity = {}
29 | 
30 | 		# Get a list of results from wikipedia for the input entity name
31 | 		entity_results = wikipedia.search(entity_name, suggestion=True)
32 | 
33 | 		# For the moment, just use the first wikipedia entry
34 | 		# Perhaps work in wikipedia.suggest in the future
35 | 		try:
36 | 			wikipedia_entity['name'] = entity_results[0]
37 | 
38 | 			# Get the summary
39 | 			wikipedia_entity['summary'] = wikipedia.summary(wikipedia_entity['name'])
40 | 
41 | 			# Get the page and images
42 | 			wikipedia_page = wikipedia.page(wikipedia_entity['name'])
43 | 			wikipedia_entity['images'] = wikipedia_page.images
44 | 
45 | 
46 | 			# Get anything else we might need...
47 | 	
48 | 		except:
49 | 			wikipedia_entity = {}
50 | 		finally:
51 | 			return wikipedia_entity
52 | 			
53 | 


--------------------------------------------------------------------------------
/metadata/media/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cltk/cltk_api/eb736ec9f20c008436e93cd0be4bcd05be7d455c/metadata/media/__init__.py


--------------------------------------------------------------------------------
/metadata/pos/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cltk/cltk_api/eb736ec9f20c008436e93cd0be4bcd05be7d455c/metadata/pos/__init__.py


--------------------------------------------------------------------------------
/metadata/pos/constants.py:
--------------------------------------------------------------------------------
1 | # list of available POS tagging methods in CLTK
2 | POS_METHODS = {'greek': ['unigram', 'bigram', 'trigram', 'ngram123', 'tnt'],
3 |                'latin': ['unigram', 'bigram', 'trigram', 'ngram123', 'tnt']}
4 | DEFAULT_POS_METHOD = 'ngram123'
5 | 


--------------------------------------------------------------------------------
/metadata/pos/views.py:
--------------------------------------------------------------------------------
 1 | from .constants import POS_METHODS, DEFAULT_POS_METHOD
 2 | from cltk.tag.pos import POSTag
 3 | from flask_restful import Resource, reqparse
 4 | 
 5 | """
 6 |     GET     /core/pos                           View available POS tagging methods
 7 | 
 8 |     POST    /core/pos                           Return POS tags for the given string
 9 |             Data: {'string': string to tag,     using the specified langauge and
10 |                    'lang':   language           tagging method.
11 |                    'method': tagging method}
12 | """
13 | class POSTagger(Resource):
14 |     def get(self):
15 |         return {'methods': POS_METHODS}
16 | 
17 |     def post(self):
18 |         self.reqparse = reqparse.RequestParser()
19 |         self.reqparse.add_argument('string', required=True)
20 |         self.reqparse.add_argument('lang', required=True, choices=POS_METHODS.keys())
21 |         self.reqparse.add_argument('method', required=False,
22 |                                    default=DEFAULT_POS_METHOD)
23 | 
24 |         args = self.reqparse.parse_args()
25 |         string = args['string']
26 |         lang = args['lang']
27 |         method = args['method']
28 | 
29 |         if method not in POS_METHODS[lang]:
30 |             return {'message': {'method': method + ' is not a valid choice'}}
31 | 
32 |         tagger = POSTag(lang)
33 |         tagged = []
34 |         if method == 'unigram':
35 |             tagged = tagger.tag_unigram(string)
36 |         elif method == 'bigram':
37 |             tagged = tagger.tag_bigram(string)
38 |         elif method == 'trigram':
39 |             tagged = tagger.tag_trigram(string)
40 |         elif method == 'ngram123':
41 |             tagged = tagger.tag_ngram_123_backoff(string)
42 |         elif method == 'tnt':
43 |             tagged = tagger.tag_tnt(string)
44 | 
45 |         return {'tags': [{'word': word, 'tag': tag}
46 |                          if tag is not None else {'word': word, 'tag': 'None'}
47 |                          for word, tag in tagged]}
48 | 


--------------------------------------------------------------------------------
/metadata/prosody/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cltk/cltk_api/eb736ec9f20c008436e93cd0be4bcd05be7d455c/metadata/prosody/__init__.py


--------------------------------------------------------------------------------
/metadata/prosody/scansion.py:
--------------------------------------------------------------------------------
  1 | """
  2 | A class for predicting scansion in a line of Latin dactylic hexameter
  3 | 
  4 | May possibly be reworked with CLTK prosody modules in the future.
  5 | """
  6 | 
  7 | 
  8 | from cltk.util.syllabifier import Syllabifier
  9 | import string
 10 | import re
 11 | 
 12 | Patterns = {
 13 | 		'dactylic_hexameter' : {
 14 | 			'n_feet' : 6,
 15 | 			'feet' : [[1,0,0],[1,1]],
 16 | 			'pattern' :[
 17 | 						[[1,0,0],[1,1]],
 18 | 						[[1,0,0],[1,1]],
 19 | 						[[1,0,0],[1,1]],
 20 | 						[[1,0,0],[1,1]],
 21 | 						[[1,0,0],[1,1]],
 22 | 						[[1,1]]
 23 | 					]
 24 | 		},
 25 | 		'elegiac_pentameter' : {
 26 | 			'n_feet' : 5,
 27 | 			'feet' : [[1,0,0],[1,1]],
 28 | 			'pattern' :[
 29 | 						[[1,0,0],[1,1]],
 30 | 						[[1,0,0],[1,1]],
 31 | 						[[1]],
 32 | 						[[1,0,0]],
 33 | 						[[1,0,0]],
 34 | 						[[1]]
 35 | 					]
 36 | 		}
 37 | 
 38 | 	}
 39 | 
 40 | Latin = {
 41 | 
 42 | 		'diphthongs' : ["ae", "au", "ei", "eu", "oe"],
 43 | 		'two_consonants' : ["x", "z"],
 44 | 		'digraphs' : ["ch", "ph", "th", "qu", "gu", "su"],
 45 | 		'mute_consonants_and_f' : ['b', 'c', 'd', 'g', 'p', 't', 'f'],
 46 | 		'liquid_consonants' : ['l', 'r'],
 47 | 		'vowels' : [
 48 | 						'a', 'e', 'i', 'o', 'u',
 49 | 						'á', 'é', 'í', 'ó', 'ú',
 50 | 						'æ', 'œ',
 51 | 						'ǽ',  # no accented œ in unicode?
 52 | 						'y'   # y is treated as a vowel; not native to Latin but useful for words borrowed from Greek
 53 | 					]
 54 | 
 55 | 	}
 56 | 
 57 | class Scansion(object):
 58 | 	"""Predict scansion for a line of classical Greek or Latin poetry"""
 59 | 
 60 | 	def __init__(self, patterns=Patterns, language=Latin):
 61 | 
 62 | 		self.patterns = patterns
 63 | 		self.language = language
 64 | 		self.punctuation_transtable = {ord(c): " " for c in string.punctuation}
 65 | 		self.line = []
 66 | 
 67 | 		return
 68 | 
 69 | 	def scan(self, line, pattern="dactylic_hexameter"):
 70 | 		"""Input a line of poetry and receive an array of booleans indicating open or closed syllables"""
 71 | 
 72 | 		s = Syllabifier()
 73 | 		line_sylls = []
 74 | 		scansion = []
 75 | 		# Strip any punctuation and lower
 76 | 		line = line.translate(self.punctuation_transtable).lower()
 77 | 		line = line.replace("—", " ")
 78 | 
 79 | 
 80 | 		# Build list of line syllables
 81 | 		line = line.split()
 82 | 		for word in line:
 83 | 			if len( word ):
 84 | 				line_sylls.extend( s.syllabify( word ) )
 85 | 
 86 | 		# Build scansion for syllables, based on pattern
 87 | 		# If a syllable is not long, it is short
 88 | 		sylls_len = len(line_sylls)
 89 | 		for i, syll in enumerate(line_sylls):
 90 | 			scansion.append({
 91 | 					's' : syll,
 92 | 					'l' : 0
 93 | 				})
 94 | 
 95 | 			if i < sylls_len - 1 and self._is_elided( syll, line_sylls[ i + 1 ], line ):
 96 | 				scansion[i]['l'] = "-"
 97 | 				#scansion[i]['r'] = "elided"
 98 | 				continue
 99 | 
100 | 			elif self._long_by_nature( i, syll, line_sylls, line ):
101 | 				scansion[i]['l'] = 1
102 | 				#scansion[i]['r'] = "by nature"
103 | 				continue
104 | 
105 | 			elif i < sylls_len - 1 and self._long_by_position( syll, line_sylls[ i + 1 ] ):
106 | 				scansion[i]['l'] = 1
107 | 				#scansion[i]['r'] = "by position"
108 | 				continue
109 | 
110 | 		# For next step, remove elided syllables
111 | 		for syll in scansion:
112 | 			if syll['l'] == "-":
113 | 				scansion.remove(syll)
114 | 
115 | 		# Compare scansion against selected pattern
116 | 		scansion = self._scan_against_pattern( pattern, scansion, line )
117 | 
118 | 		return scansion
119 | 
120 | 	def _scan_against_pattern(self, pattern, scansion, line, depth=0):
121 | 		"""Make judgements about feet regularized to pattern"""
122 | 
123 | 		# Load permissible feet for scansion pattern
124 | 		feet = self.patterns[ pattern ]['feet']
125 | 		n_feet = self.patterns[ pattern ]['n_feet']
126 | 		ft_cos = self._find_feet_commonalities(feet)
127 | 		new_scansion = []
128 | 
129 | 		if depth == 1:
130 | 			scansion = self._check_synizesis(scansion)
131 | 
132 | 		# Make a copy of the input scansion
133 | 		prev_scansion = scansion[:]
134 | 
135 | 		# Primary loop for checking scansion against pattern feet
136 | 		while len(scansion) > 0:
137 | 			match = False
138 | 
139 | 			# First, check if a foot matches the start of syllable list
140 | 			for foot in feet:
141 | 				#foot.reverse()
142 | 				has_elided = 0
143 | 				sylls = scansion[:len(foot)]
144 | 
145 | 				# If the syllable list starts with a foot
146 | 				if self._comp_syll_foot( sylls, foot ):
147 | 
148 | 					# Add the syllables to the scansion foot
149 | 					#sylls.reverse()
150 | 					new_scansion.append(sylls)
151 | 
152 | 					# And remove the syllables from the original scansion list
153 | 					for sy in sylls:
154 | 						scansion.remove(sy)
155 | 
156 | 					match = True
157 | 					break
158 | 
159 | 			# If we don't have a match from the feet in our allowed feet
160 | 			if not match:
161 | 				# Apply common rules among the feet to syllables at start of list
162 | 				for c in ft_cos:
163 | 					scansion[c['i']]['l'] = c['val']
164 | 					#scansion[c['i']]['r'] = "commonality between feet"
165 | 
166 | 				# If short between two longs (and no iambic foot from earlier loop), make that short long
167 | 				if len(scansion) > 2:
168 | 					if scansion[0]['l'] == 1 and scansion[2]['l'] == 1:
169 | 						scansion[1]['l'] = 1
170 | 						#scansion[1]['r'] = "scansion context"
171 | 				# If long short at the end of the line (and no iambic foot), make that short long
172 | 				elif len(scansion) == 2:
173 | 					scansion[1]['l'] = 1
174 | 					#scansion[1]['r'] = "end of line"
175 | 
176 | 				# Catch the remainder to prevent inf loop
177 | 				else:
178 | 					# Add the syllables to the scansion foot
179 | 					new_scansion.append(scansion)
180 | 					# And remove the syllables from the original scansion list
181 | 					scansion = []
182 | 
183 | 		# If there's more feet in the new scansion than are allowed in the meter
184 | 		scan_len = len(new_scansion)
185 | 		new_scansion = {'scansion':new_scansion}
186 | 		if n_feet < scan_len:
187 | 			new_scansion['error'] = "too many"
188 | 			if depth == 0:
189 | 				new_scansion = self._scan_against_pattern(pattern, prev_scansion, line, depth + 1)
190 | 
191 | 		elif n_feet > scan_len:
192 | 			new_scansion['error'] = "too few"
193 | 			if depth == 0:
194 | 				new_scansion = self._scan_against_pattern(pattern, prev_scansion, line, depth + 1)
195 | 
196 | 		# Return the scansion with feet in the correct order
197 | 		#new_scansion.reverse()
198 | 		return new_scansion
199 | 
200 | 	def _comp_syll_foot(self, sylls, pattern):
201 | 		"""Check if the possible pattern for the foot matches syllables"""
202 | 		match = []
203 | 		if len(sylls) == len(pattern):
204 | 			for p_index, value in enumerate(pattern):
205 | 				if value == sylls[ p_index ]['l']:
206 | 					match.append(True)
207 | 				else:
208 | 					match.append(False)
209 | 
210 | 		if len(match):
211 | 			return all(item==True for item in match)
212 | 		else:
213 | 			return False
214 | 
215 | 	def _find_feet_commonalities(self, scansion_feet):
216 | 		"""Find the commonalities between the feet: e.g. dactyl and spondee have commonality of long syllable in first position"""
217 | 		commonalities = []
218 | 		for i_foot, foot in enumerate(scansion_feet):
219 | 			for i, val in enumerate(foot):
220 | 				common = True
221 | 
222 | 				for i_comp_foot, comp_foot in enumerate(scansion_feet):
223 | 					if i >= len(comp_foot) or comp_foot[i] != val:
224 | 						common = False
225 | 
226 | 				if common == True:
227 | 					c = {'i':i, 'val':val}
228 | 					included = False
229 | 					for comp_c in commonalities:
230 | 						if c['i'] == comp_c['i'] and c['val'] == comp_c['val']:
231 | 							included = True
232 | 					if not included:
233 | 						commonalities.append({'i':i,'val':val})
234 | 
235 | 		return commonalities
236 | 
237 | 	def _is_elided(self, syll, next_syll, line):
238 | 		"""Is the syllable elided based its ending and the next syllable's beginning"""
239 | 		is_elided = False
240 | 		line_len = len(line)
241 | 
242 | 		# Only check the syllables that are at word boundaries (not interior syllables)
243 | 		for i, word in enumerate(line):
244 | 			if word.endswith(syll):
245 | 				if 	(
246 | 						(
247 | 						# If the target syllable ends with 'm' or a vowel
248 | 							syll.endswith("m")
249 | 						or self._is_vowel( syll[-1] )
250 | 						)
251 | 					and
252 | 						(
253 | 						# And if the next word exists and it starts with the next syllable
254 | 							i < line_len - 1
255 | 						and line[i + 1].startswith( next_syll )
256 | 						)
257 | 					):
258 | 
259 | 						# And next word starts with a vowel or 'h'
260 | 						if	(
261 | 								self._is_vowel( next_syll[0] )
262 | 							or next_syll[0] == "h"
263 | 							):
264 | 
265 | 							# And if the next word starts with an i, and the i isn't a consonant
266 | 							if next_syll[0] == "i":
267 | 								if len( next_syll ) > 1 and not self._is_vowel( next_syll[1] ):
268 | 									is_elided = True
269 | 								elif len( next_syll ) == 1:
270 | 									is_elided = True
271 | 
272 | 							else:
273 | 								is_elided = True
274 | 
275 | 		return is_elided
276 | 
277 | 	def _long_by_nature(self, i, syll, line_sylls, line):
278 | 		"""Is the syllable long by nature"""
279 | 		is_long = False
280 | 		# Long_ends could also contain o, i, and u
281 | 		long_ends = ["as","es","os"]
282 | 		syll = syll.lstrip("qu")
283 | 
284 | 		# If it contains a diphthong
285 | 		for diphthong in self.language['diphthongs']:
286 | 			if diphthong in syll:
287 | 				is_long = True
288 | 				break
289 | 
290 | 		if not is_long:
291 | 			line_len = len(line)
292 | 			line_sylls_len = len(line_sylls)
293 | 
294 | 			# If it's a final o, i, u, as, es, or os
295 | 			for e in long_ends:
296 | 				if syll.endswith(e):
297 | 
298 | 					# Except tibi / mihi
299 | 					# If it has a preceding syllable
300 | 					if i > 0:
301 | 						if syll == "hi" and line_sylls[ i - 1 ] == "mi":
302 | 							return is_long
303 | 
304 | 						elif syll == "bi" and line_sylls[ i - 1 ] == "ti":
305 | 							return is_long
306 | 
307 | 					# Ensure the syll is an end of a word
308 | 					for l_i, word in enumerate(line):
309 | 						if word.endswith(syll):
310 | 
311 | 							# If there's a next word and next syllable
312 | 							if l_i < line_len - 1:
313 | 								# If there's a next syllable
314 | 								if i < line_sylls_len - 1:
315 | 									if word.endswith( syll ) and line[ l_i + 1 ].startswith( line_sylls[ i + 1 ] ):
316 | 										is_long = True
317 | 
318 | 							# Else, if there's not another syllable after it in the line, mark as long
319 | 							else:
320 | 								if i == line_sylls_len - 1 and word.endswith( syll ):
321 | 									is_long = True
322 | 
323 | 		return is_long
324 | 
325 | 	def _long_by_position(self, syll, next_syll):
326 | 		"""Is the syllable long by position, with two or more consonants between its vowel and the next"""
327 | 		is_long = False
328 | 
329 | 		if syll.endswith("x") or next_syll.startswith("x"):
330 | 			is_long = True
331 | 
332 | 		else:
333 | 			syll_cvs = self._return_consonants_vowels( syll )
334 | 			next_syll_cvs = self._return_consonants_vowels( next_syll )
335 | 			if ( syll_cvs.lstrip("c").count("c") + next_syll_cvs.rstrip("c").count("c") ) >= 2:
336 | 				is_long = True
337 | 
338 | 			#print(syll, syll_cvs, is_long)
339 | 			#print(next_syll, next_syll_cvs)
340 | 
341 | 		return is_long
342 | 
343 | 	def _return_consonants_vowels(self, input_string):
344 | 		"""Return a string of Cs and Vs for the consonants and vowels in the string"""
345 | 		cvs = ''
346 | 
347 | 
348 | 		for i, char in enumerate(input_string):
349 | 			has_prev_char = i > 0
350 | 			has_next_char = i < len(input_string) - 1
351 | 
352 | 			# First check for vowels with the u and i exceptions
353 | 			if self._is_vowel(char) and char not in ["u", "i"]:
354 | 				cvs = cvs + "v"
355 | 
356 | 			# If it's a 'u', it's a vowel unless preceded by a q, g, or s
357 | 			elif char == "u":
358 | 				if has_prev_char:
359 | 					if not (
360 | 							not has_next_char
361 | 						and input_string[ i - 1 ] in ["q","g","s"]
362 | 						):
363 | 						cvs = cvs + "v"
364 | 				else:
365 | 					cvs = cvs + "v"
366 | 
367 | 			# Handle the i/y/j exception
368 | 			elif char == "i":
369 | 				if has_next_char and i == 0 and self._is_vowel( input_string[ i + 1 ]):
370 | 					cvs = cvs + "c"
371 | 				else:
372 | 					cvs = cvs + "v"
373 | 
374 | 			# x and z are double consonants
375 | 			elif char in ["x","z"]:
376 | 				cvs = cvs + "cc"
377 | 
378 | 			# ch, ph, th are single
379 | 			elif has_prev_char and char == "h" and input_string[ i - 1 ] in ["c","p","t"]:
380 | 				pass
381 | 
382 | 			# mute followed by a liquid is single
383 | 			elif has_prev_char and self._is_liquid_consonant( char ) and self._is_mute_consonant_or_f( input_string[ i - 1 ] ):
384 | 				pass
385 | 
386 | 			elif char == "h":
387 | 				pass
388 | 
389 | 			# failing all of the above, it's a normal consonant
390 | 			else:
391 | 				cvs = cvs + "c"
392 | 
393 | 		return cvs
394 | 
395 | 	def _check_synizesis(self, scansion):
396 | 		new_scansion = []
397 | 		remove_next_syll = False
398 | 		for i, syll in enumerate(scansion):
399 | 
400 | 			len_scansion = len(scansion)
401 | 			has_next_syll = i < len_scansion - 1
402 | 			has_prev_syll = i > 0
403 | 
404 | 			if remove_next_syll:
405 | 				remove_next_syll = False
406 | 				continue
407 | 
408 | 			if has_next_syll:
409 | 				next_syll = scansion[i + 1]
410 | 				if syll["s"].endswith("u"):
411 | 					if next_syll["s"].startswith("u") or next_syll["s"].startswith("i") or next_syll["s"].startswith("e"):
412 | 						syll["s"] = syll['s'] + next_syll["s"][1:]
413 | 						remove_next_syll = True
414 | 
415 | 			if "uu" in syll['s']:
416 | 				syll['s'] = syll['s'].replace("uu","u")
417 | 			elif "ui" in syll['s']:
418 | 				syll['s'] = syll['s'].replace("ui","u")
419 | 			elif "ue" in syll['s']:
420 | 				syll['s'] = syll['s'].replace("ue","u")
421 | 
422 | 			new_scansion.append(syll)
423 | 
424 | 		return new_scansion[:]
425 | 
426 | 	def _is_consonant(self, char):
427 | 		"""Checks if char is in the list of vowels in the language"""
428 | 		return not char in self.language['vowels']
429 | 
430 | 	def _is_vowel(self, char):
431 | 		"""Checks if char is in the list of vowels in the language"""
432 | 		return char in self.language['vowels']
433 | 
434 | 	def _is_mute_consonant_or_f(self, char):
435 | 		"""Checks if char is in the mute_consonants_and_f list"""
436 | 		return char in self.language['mute_consonants_and_f']
437 | 
438 | 	def _is_liquid_consonant(self, char):
439 | 		"""Checks if char is in the mute_consonants_and_f list"""
440 | 		return char in self.language['liquid_consonants']
441 | 


--------------------------------------------------------------------------------
/metadata/prosody/scansion_to_html.py:
--------------------------------------------------------------------------------
 1 | """
 2 | For a given input string and scansion value, return a line of HTML with
 3 | <span>s around syllables and syllable-long and syllable-short classes
 4 | """
 5 | 
 6 | import re
 7 | import string
 8 | import sys
 9 | 
10 | class ScansionToHTML:
11 | 
12 | 	def __init__(self, line, scansion):
13 | 		"""
14 | 
15 | 		"""
16 | 
17 | 		return
18 | 
19 | 	def scansion_to_html(self, line, scansion):
20 | 		"""
21 | 		For a given input string and scansion, generate an HTML response of syllables wrapped in
22 | 		<span> elements with classes denoting long and short syllables.
23 |         :param line: str
24 |         :param scansion: Line scansion (needs to be reworked to be like the CLTK scansion)
25 | 		:return html_line: str (formatted HTML string)
26 | 		"""
27 | 
28 | 		while len( self.scansion ) > 0:
29 | 			foot = self.scansion[0]
30 | 			while len( foot ) > 0:
31 | 				syll = foot[0]
32 | 
33 | 				if self.line.lower().startswith( syll['s'] ):
34 | 					len_syll_s = len( syll['s'] )
35 | 
36 | 					if syll['l']:
37 | 						#long
38 | 						html_line += "<span class='scansion-syllable syllable-long'>" + self.line[0:len_syll_s] + "</span>"
39 | 
40 | 					else:
41 | 						#short
42 | 						html_line += "<span class='scansion-syllable syllable-short'>" + self.line[0:len_syll_s] + "</span>"
43 | 
44 | 					self.line = self.line[len_syll_s:]
45 | 
46 | 					# finally remove the syll
47 | 					foot.remove( syll )
48 | 
49 | 
50 | 				else:
51 | 					# skip one forward (spaces, punct, &c.)
52 | 					if len(self.line) > 0:
53 | 						html_line += self.line[0]
54 | 						self.line = self.line[1:]
55 | 					else:
56 | 						foot = []
57 | 						self.scansion = []
58 | 						print(" -- error with transfering to html for", self.line_orig, html_line)
59 | 						break
60 | 
61 | 			# If there's more scansion
62 | 			if len(self.scansion):
63 | 				# Remove the empty foot
64 | 				self.scansion.remove(foot)
65 | 
66 | 				# If scansion length is now no more
67 | 				if len(self.scansion) == 0:
68 | 					# add the remainder of line (final punctuation!!)
69 | 					html_line += self.line
70 | 
71 | 
72 | 		return html_line
73 | 


--------------------------------------------------------------------------------
/metadata/stem/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cltk/cltk_api/eb736ec9f20c008436e93cd0be4bcd05be7d455c/metadata/stem/__init__.py


--------------------------------------------------------------------------------
/metadata/stem/views.py:
--------------------------------------------------------------------------------
 1 | from cltk.stem.latin.stem import Stemmer
 2 | from flask_restful import Resource
 3 | 
 4 | class Stem(Resource):
 5 |     """
 6 |     GET     /core/stem/<string:sentence>
 7 |             Takes sentence input and strips suffix using CLTK's core Stemmer
 8 |     """
 9 | 
10 |     def get(self, sentence):
11 |         stemmer = Stemmer()
12 |         return {'stemmed_output': stemmer.stem(sentence.lower())}
13 | 


--------------------------------------------------------------------------------
/metadata/text_reuse/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cltk/cltk_api/eb736ec9f20c008436e93cd0be4bcd05be7d455c/metadata/text_reuse/__init__.py


--------------------------------------------------------------------------------
/metadata/tokenize/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cltk/cltk_api/eb736ec9f20c008436e93cd0be4bcd05be7d455c/metadata/tokenize/__init__.py


--------------------------------------------------------------------------------
/metadata/translations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cltk/cltk_api/eb736ec9f20c008436e93cd0be4bcd05be7d455c/metadata/translations/__init__.py


--------------------------------------------------------------------------------
/metadata/translations/map_translation.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 
  3 | Map a translation of a Latin document to the original Latin document
  4 | 
  5 | Must already have definitions ingested for this to work
  6 | 
  7 | """
  8 | 
  9 | 
 10 | import optparse
 11 | import pymongo
 12 | import re
 13 | import copy
 14 | import string
 15 | import numpy as np
 16 | from nltk.corpus import wordnet as wn
 17 | from nltk.corpus import stopwords
 18 | from nltk.stem.wordnet import WordNetLemmatizer
 19 | 
 20 | 
 21 | class MapTranslation:
 22 | 
 23 | 	def __init__(self, settings):
 24 | 
 25 | 
 26 | 		# What data to import and where to put it
 27 | 		self.trans_fname = settings.fname
 28 | 		self.work = settings.work
 29 | 		self.subwork = { 'n' : int( settings.subwork ) }
 30 | 		self.translators = [ settings.author ]
 31 | 		self.edition_slug = settings.author
 32 | 
 33 | 		# Get the length of the original work
 34 | 		self.len_orig = 0
 35 | 
 36 | 		# Get the length of the translation
 37 | 		self.len_trans = 0
 38 | 
 39 | 		# Threshold settings for association
 40 | 		self.r = 5 # range to search in lines before/after
 41 | 		self.si_thresh = 0.0 # must have a higher similarity index than this to save
 42 | 
 43 | 		# Helps
 44 | 		self.punctuation_transtable = {ord(c): " " for c in string.punctuation}
 45 | 		self.stops = stopwords.words("english")
 46 | 		self.lmtzr = WordNetLemmatizer()
 47 | 
 48 | 		# Load translation and map
 49 | 		self.load_trans()
 50 | 		self.map_trans()
 51 | 
 52 | 		return
 53 | 
 54 | 
 55 | 
 56 | 	def load_trans(self):
 57 | 
 58 | 		self.translation = []
 59 | 
 60 | 		with open( self.trans_fname, "r" ) as f:
 61 | 
 62 | 			trans = f.readlines()
 63 | 
 64 | 			for i, line in enumerate( trans ):
 65 | 				if len( line ):
 66 | 					self.translation.append( line.strip() )
 67 | 
 68 | 			self.len_trans = len( self.translation )
 69 | 
 70 | 		return
 71 | 
 72 | 
 73 | 	def map_trans(self):
 74 | 
 75 | 		# calculate the ratio of the length of the original to the translation
 76 | 		self.ratio = self.len_orig / self.len_trans
 77 | 
 78 | 		for i, text_unit in enumerate( self.translation ):
 79 | 
 80 | 			# nix 'd
 81 | 			text_unit_orig = text_unit
 82 | 			text_unit = text_unit.replace("'d", "")
 83 | 
 84 | 			# strip punctuation
 85 | 			text_unit = text_unit.translate(self.punctuation_transtable).lower()
 86 | 			text_unit = text_unit.replace("—", " ")
 87 | 
 88 | 			# split at words
 89 | 			words = text_unit.split(" ")
 90 | 
 91 | 			# lemmas
 92 | 			lemmas = []
 93 | 			for word in words:
 94 | 				if len(word):
 95 | 					word = self.lmtzr.lemmatize(word)
 96 | 
 97 | 					if word not in self.stops:
 98 | 						lemmas.append(word)
 99 | 
100 | 			# syns
101 | 			syns = []
102 | 			for lemma in lemmas:
103 | 				synsets = wn.synsets(lemma)
104 | 
105 | 				word_syns = []
106 | 				for syn in synsets:
107 | 					word_syns = word_syns + syn.lemma_names()
108 | 
109 | 				syns = syns + word_syns
110 | 
111 | 			syns = dedupe_list( syns )
112 | 			self._map_unit( i, syns, text_unit_orig )
113 | 
114 | 
115 | 
116 | 		return
117 | 
118 | 	def _map_unit( self, i, syns, text_unit_orig ):
119 | 
120 | 
121 | 		target_n = self.ratio * i
122 | 		l_n_min = np.floor( target_n - self.r )
123 | 		l_n_max = np.ceil( target_n + self.r )
124 | 
125 | 		# This is where we need to load lines from the original work
126 | 		lines = []
127 | 
128 | 		line_ms = []
129 | 		for line in lines:
130 | 
131 | 			line_senses = []
132 | 			line_defs_lemmas = []
133 | 			line_defs_syns = []
134 | 			m = 0
135 | 
136 | 			# Flatten the line definition senses
137 | 			for word in line['definitions']:
138 | 				for definition in word['defs']:
139 | 					line_senses = line_senses + definition['senses']
140 | 
141 | 			# Build list of lemmas from the word definitions
142 | 			for sense in line_senses:
143 | 				# nix 'd
144 | 				sense = sense.replace("'d", "")
145 | 
146 | 				# strip punctuation
147 | 				sense = sense.translate(self.punctuation_transtable).lower()
148 | 				sense = sense.replace("—", " ")
149 | 
150 | 				# split at words
151 | 				sense_words = sense.split(" ")
152 | 
153 | 				# lemmatize and check stoplist
154 | 				for word in sense_words:
155 | 					if len(word):
156 | 						word = self.lmtzr.lemmatize(word)
157 | 						if word not in self.stops:
158 | 							line_defs_lemmas.append(word)
159 | 			# syns
160 | 			line_defs_lemmas = dedupe_list( line_defs_lemmas)
161 | 			for lemma in line_defs_lemmas:
162 | 				synsets = wn.synsets(lemma)
163 | 				word_syns = []
164 | 				for syn in synsets:
165 | 					word_syns = word_syns + syn.lemma_names()
166 | 				line_defs_syns = line_defs_syns + word_syns
167 | 			line_defs_syns = dedupe_list( line_defs_syns )
168 | 
169 | 			# Compare the line definiton senses to our syn list
170 | 			for lem_syn in syns:
171 | 				for lem_def in line_defs_syns:
172 | 					if lem_syn == lem_def:
173 | 						m += 1
174 | 
175 | 			# Adjust m for the total number of syns compared
176 | 			m_rel = m / ( len( syns ) + len( line_defs_syns ) )
177 | 
178 | 			# Finally, add the comparison matching to the
179 | 			line_ms.append( [ m, m_rel ] )
180 | 
181 | 
182 | 		# Figure the min/max, rel * 100
183 | 		m_max = 0
184 | 		m_min = 100
185 | 		for m_ls in line_ms:
186 | 			m_ls[1] = m_ls[1] * 100
187 | 			m_rel = m_ls[1]
188 | 
189 | 			if m_rel > m_max:
190 | 				m_max = m_ls[1]
191 | 			if m_rel < m_min:
192 | 				m_min = m_ls[1]
193 | 
194 | 		# Scale m_rel and if above significance thresh, add to line nos
195 | 		trans_l_ns = []
196 | 		for m_i, m_ls in enumerate( line_ms ):
197 | 			# rel is scaled to min/max (20%)
198 | 			if ( m_max - m_min ) > 0:
199 | 				m_ls[1] = ( ( m_ls[1] - m_min ) / ( m_max - m_min ) ) * 0.20
200 | 			# Final adjust for bigger rels (80%)
201 | 			m_ls[1] = m_ls[1] + ( ( m_ls[0] / 100 ) * 0.80 )
202 | 
203 | 			if m_ls[1] >= self.si_thresh:
204 | 				line_n = int( l_n_min + m_i )
205 | 
206 | 				if( l_n_min < 0 ):
207 | 					line_n = int( l_n_min + m_i ) + self.r
208 | 				elif ( l_n_min > self.len_orig ):
209 | 					line_n = int( l_n_min + m_i )
210 | 
211 | 				# Append for base-1 counting
212 | 				trans_l_ns.append( line_n + 1 )
213 | 
214 | 		return trans_l_ns
215 | 


--------------------------------------------------------------------------------
/metadata/vector/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cltk/cltk_api/eb736ec9f20c008436e93cd0be4bcd05be7d455c/metadata/vector/__init__.py


--------------------------------------------------------------------------------
/perseus_parsing_notes.txt:
--------------------------------------------------------------------------------
  1 | # Notes
  2 | Some units are called 'card' (for sections in Seneca and line numbers in Vergil. Some rewriting will be required.
  3 | 
  4 | # Checked Authors
  5 | Metadata has been parsed from the following files (at least one text for each author)
  6 | 
  7 | ## Latin
  8 | Ammianus
  9 |     http://localhost:5000/lang/latin/corpus/perseus/author/Ammianus/text/amm
 10 | 
 11 | Apuleius
 12 |     http://localhost:5000/lang/latin/corpus/perseus/author/apuleius/text/apuleius.ap
 13 | 
 14 | Augustine
 15 |     http://localhost:5000/lang/latin/corpus/perseus/author/Augustine/text/august.sellet
 16 | 
 17 | Bede
 18 |     http://localhost:5000/lang/latin/corpus/perseus/author/bede/text/bede.hega
 19 | 
 20 | Caesar
 21 |     http://localhost:5000/lang/latin/corpus/perseus/author/caesar/text/caes.bg
 22 | 
 23 | Catullus
 24 |     http://localhost:5000/lang/latin/corpus/perseus/author/Catullus/text/cat
 25 | 
 26 | Celsus
 27 |     http://localhost:5000/lang/latin/corpus/perseus/author/celsus/text/cels_darem
 28 | 
 29 | Cicero
 30 |     http://localhost:5000/lang/latin/corpus/perseus/author/cicero/text/cic.ac
 31 |     http://localhost:5000/lang/latin/corpus/perseus/author/cicero/text/cic.fam
 32 | 
 33 | Columella
 34 |     http://localhost:5000/lang/latin/corpus/perseus/author/columella/text/col.agr01
 35 | 
 36 | Curtius
 37 |     http://localhost:5000/lang/latin/corpus/perseus/author/curtius/text/curtius.alex
 38 | 
 39 | Flaccus
 40 |     http://localhost:5000/lang/latin/corpus/perseus/author/flaccus/text/v.fl
 41 | 
 42 | Florus
 43 |     http://localhost:5000/lang/latin/corpus/perseus/author/florus/text/florus.epit
 44 | 
 45 | Gellius
 46 |     http://localhost:5000/lang/latin/corpus/perseus/author/Gellius/text/gel
 47 | 
 48 | Glass
 49 |     http://localhost:5000/lang/latin/corpus/perseus/author/glass/text/washington.bio
 50 | 
 51 | HistAugust
 52 |     http://localhost:5000/lang/latin/corpus/perseus/author/HistAugust/text/sha01.1
 53 | 
 54 | Horace
 55 |     http://localhost:5000/lang/latin/corpus/perseus/author/horace/text/hor.ap
 56 | 
 57 | Jerome
 58 |     http://localhost:5000/lang/latin/corpus/perseus/author/jerome/text/jerome.sellet
 59 | 
 60 | Juvenal
 61 |     http://localhost:5000/lang/latin/corpus/perseus/author/juvenal/text/juv
 62 | 
 63 | Livy
 64 |     http://localhost:5000/lang/latin/corpus/perseus/author/livy/text/livy.foster01-02
 65 | 
 66 | Lucan
 67 |     http://localhost:5000/lang/latin/corpus/perseus/author/lucan/text/pharsalia
 68 | 
 69 | Martial
 70 |     http://localhost:5000/lang/latin/corpus/perseus/author/martial/text/martial
 71 | 
 72 | MinuciusFelix
 73 |     http://localhost:5000/lang/latin/corpus/perseus/author/MinuciusFelix/text/minfel.octav
 74 | 
 75 | Nepos
 76 |     http://localhost:5000/lang/latin/corpus/perseus/author/nepos/text/lives
 77 | 
 78 | Ovid
 79 |     http://localhost:5000/lang/latin/corpus/perseus/author/ovid/text/ovid.am
 80 |     http://localhost:5000/lang/latin/corpus/perseus/author/ovid/text/ovid.met?section_1=1
 81 | 
 82 | Persius
 83 |     http://localhost:5000/lang/latin/corpus/perseus/author/persius/text/persius.sat
 84 | 
 85 | Petronius
 86 |     http://localhost:5000/lang/latin/corpus/perseus/author/petronius/text/petr
 87 | 
 88 | Plautus
 89 |     http://localhost:5000/lang/latin/corpus/perseus/author/Plautus/text/pl.aul
 90 | 
 91 | Pliny
 92 |     http://localhost:5000/lang/latin/corpus/perseus/author/pliny/text/pliny.min.letters
 93 | 
 94 | Propertius
 95 |     http://localhost:5000/lang/latin/corpus/perseus/author/propertius/text/prop
 96 | 
 97 | Prudentius
 98 |     http://localhost:5000/lang/latin/corpus/perseus/author/prudentius/text/prud.01praef
 99 | 
100 | Quintilian
101 |     http://localhost:5000/lang/latin/corpus/perseus/author/quintilian/text/quint.butler1-3
102 | 
103 | Quintus
104 |     http://localhost:5000/lang/latin/corpus/perseus/author/quintus/text/quintsmyrn_01
105 | 
106 | Sallust
107 |     http://localhost:5000/lang/latin/corpus/perseus/author/sallust/text/sallust.catil
108 | 
109 | Seneca
110 |     http://localhost:5000/lang/latin/corpus/perseus/author/seneca/text/sen.ag
111 | 
112 | Seneca1
113 |     http://localhost:5000/lang/latin/corpus/perseus/author/seneca1/text/seneca.contr
114 | 
115 | SiliusItalicus
116 |     http://localhost:5000/lang/latin/corpus/perseus/author/SiliusItalicus/text/silius.punica
117 | 
118 | Statius
119 |     http://localhost:5000/lang/latin/corpus/perseus/author/statius/text/stat.achill
120 | 
121 | Suetonius
122 |     http://localhost:5000/lang/latin/corpus/perseus/author/suetonius/text/suet.caes
123 | 
124 | Sulpicia
125 |     Note: Sulpicia has no Latin text file, so I remove it from /authors, however routing still available at
126 |     http://localhost:5000/lang/latin/corpus/perseus/author/Sulpicia/texts
127 | 
128 | Tacitus
129 |     http://localhost:5000/lang/latin/corpus/perseus/author/Tacitus/text/tac.ann
130 | 
131 | Terence
132 |     http://localhost:5000/lang/latin/corpus/perseus/author/terence/text/ad
133 | 
134 | Tertullian
135 |     http://localhost:5000/lang/latin/corpus/perseus/author/tertullian/text/tert.apol
136 | 
137 | Tibullus
138 |     http://localhost:5000/lang/latin/corpus/perseus/author/tibullus/text/tibullus.el
139 | 
140 | Vergil
141 |     http://localhost:5000/lang/latin/corpus/perseus/author/Vergil/text/verg.a
142 | 
143 | Vitruvius
144 |     http://localhost:5000/lang/latin/corpus/perseus/author/vitruvius/text/vitruv
145 | 
146 | ## Greek
147 | 
148 | Aeschines
149 |     http://localhost:5000/lang/greek/corpus/perseus/author/Aeschines/text/aeschin
150 | 
151 | Aeschylus
152 |     http://localhost:5000/lang/greek/corpus/perseus/author/Aeschylus/text/aesch.ag
153 | 
154 | Andocides
155 |     http://localhost:5000/lang/greek/corpus/perseus/author/Andocides/text/andoc
156 | 
157 | Anth
158 |     http://localhost:5000/lang/greek/corpus/perseus/author/anth/text/01
159 | 
160 | Apollodorus
161 |     http://localhost:5000/lang/greek/corpus/perseus/author/Apollodorus/text/apollod
162 | 
163 | Apollonius
164 |     http://localhost:5000/lang/greek/corpus/perseus/author/apollonius/text/argo
165 | 
166 | Appian
167 |     http://localhost:5000/lang/greek/corpus/perseus/author/appian/text/appian.cw
168 | 
169 | Aratus
170 |     http://localhost:5000/lang/greek/corpus/perseus/author/aratus/text/aratus_01
171 | 
172 | Aretaeus
173 |     http://localhost:5000/lang/greek/corpus/perseus/author/Aretaeus/text/aret
174 | 
175 | Aristides
176 |     http://localhost:5000/lang/greek/corpus/perseus/author/Aristides/text/aristid.orat
177 | 
178 | Aristophanes
179 |     http://localhost:5000/lang/greek/corpus/perseus/author/Aristophanes/text/aristoph.ach
180 | 
181 | Aristotle
182 |     http://localhost:5000/lang/greek/corpus/perseus/author/Aristotle/text/aristot.ath.pol
183 | 
184 | Arrian
185 |     http://localhost:5000/lang/greek/corpus/perseus/author/Arrian/text/arrian.acies
186 |     http://localhost:5000/lang/greek/corpus/perseus/author/Arrian/text/arrian.indica
187 | 
188 | Athenaeus
189 |     http://localhost:5000/lang/greek/corpus/perseus/author/Athenaeus/text/ath01
190 | 
191 | Bacchylides
192 |     http://localhost:5000/lang/greek/corpus/perseus/author/Bacchylides/text/bacchyl
193 | 
194 | Bible
195 |     http://localhost:5000/lang/greek/corpus/perseus/author/Bible/text/nt
196 | 
197 | Callimachus
198 |     http://localhost:5000/lang/greek/corpus/perseus/author/Callimachus/text/callimachus
199 |     http://localhost:5000/lang/greek/corpus/perseus/author/Callimachus/text/call_02
200 | 
201 | 
202 | Colluthus
203 |     http://localhost:5000/lang/greek/corpus/perseus/author/Colluthus/text/colluthus.01
204 | 
205 | Demades
206 |     http://localhost:5000/lang/greek/corpus/perseus/author/Demades/text/demad
207 | 
208 | Demosthenes
209 |     http://localhost:5000/lang/greek/corpus/perseus/author/Demosthenes/text/dem01-10
210 | 
211 | Dinarchus
212 |     http://localhost:5000/lang/greek/corpus/perseus/author/Dinarchus/text/din
213 | 
214 | DioChrys
215 |     http://localhost:5000/lang/greek/corpus/perseus/author/DioChrys/text/diochr01
216 | 
217 | Diodorus
218 |     http://localhost:5000/lang/greek/corpus/perseus/author/Diodorus/text/diod.hist01-05
219 | 
220 | Diogenes
221 |     http://localhost:5000/lang/greek/corpus/perseus/author/Diogenes/text/dl
222 | 
223 | Dionysius
224 |     http://localhost:5000/lang/greek/corpus/perseus/author/Dionysius/text/dh.002
225 |     http://localhost:5000/lang/greek/corpus/perseus/author/Dionysius/text/dh.hist01
226 | 
227 | Dobson
228 |     ! broken: What is this? Remove from /authors ?
229 | 
230 | Elegy
231 |     http://localhost:5000/lang/greek/corpus/perseus/author/elegy/text/1
232 | 
233 | Epictetus
234 |     http://localhost:5000/lang/greek/corpus/perseus/author/epictetus/text/epictetus
235 | 
236 | Euclid
237 |     http://localhost:5000/lang/greek/corpus/perseus/author/Euclid/text/euc.elem
238 | 
239 | Euripides
240 |     http://localhost:5000/lang/greek/corpus/perseus/author/Euripides/text/eur.orest
241 | 
242 | Galen
243 |     http://localhost:5000/lang/greek/corpus/perseus/author/Galen/text/gal.nat.fac
244 | 
245 | Herodotus
246 |     http://localhost:5000/lang/greek/corpus/perseus/author/Herodotus/text/hdt
247 | 
248 | Hesiod
249 |     http://localhost:5000/lang/greek/corpus/perseus/author/Hesiod/text/hes.sh
250 |     http://localhost:5000/lang/greek/corpus/perseus/author/Hesiod/text/hes.th
251 |     http://localhost:5000/lang/greek/corpus/perseus/author/Hesiod/text/hes.wd
252 |     http://localhost:5000/lang/greek/corpus/perseus/author/Hesiod/text/hes.wd?section_1=1
253 | 
254 | Hippocrates
255 |     http://localhost:5000/lang/greek/corpus/perseus/author/Hippocrates/text/hp.jones
256 | 
257 | Homer
258 |     http://localhost:5000/lang/greek/corpus/perseus/author/Homer/text/hom.il
259 | 
260 | Homeric_Hymns
261 |     http://localhost:5000/lang/greek/corpus/perseus/author/Homeric_Hymns/text/hh
262 | 
263 | Hyperides
264 |     http://localhost:5000/lang/greek/corpus/perseus/author/Hyperides/text/hyp
265 | 
266 | Isocrates
267 |     No Greek text, so rm from /authors
268 | 
269 | JebbOrators
270 |     http://localhost:5000/lang/greek/corpus/perseus/author/JebbOrators/text/attic_orators
271 | 
272 | Josephus
273 |     http://localhost:5000/lang/greek/corpus/perseus/author/Josephus/text/j.bj
274 | 
275 | Lucian
276 |     http://localhost:5000/lang/greek/corpus/perseus/author/Lucian/text/01
277 | 
278 | Lucretius
279 |     !! Why they hell is Lucretius under Greek???
280 |     !! This is something we'll have to fix while indexing; I'll leave him under Greek for now
281 |     http://localhost:5000/lang/greek/corpus/perseus/author/Lucretius/text/lucretius
282 | 
283 | Lycophron
284 |     http://localhost:5000/lang/greek/corpus/perseus/author/Lycophron/text/lycophron_01
285 | 
286 | Lycurgus
287 |     http://localhost:5000/lang/greek/corpus/perseus/author/Lycurgus/text/lyc
288 | 
289 | Lysias
290 |     http://localhost:5000/lang/greek/corpus/perseus/author/Lysias/text/lys
291 | 
292 | Nonnos
293 |     http://localhost:5000/lang/greek/corpus/perseus/author/Nonnos/text/nonnos_01
294 | 
295 | Oppian
296 |     http://localhost:5000/lang/greek/corpus/perseus/author/Oppian/texts
297 | 
298 | Pausanias
299 |     http://localhost:5000/lang/greek/corpus/perseus/author/Pausanias/text/paus
300 | 
301 | Phaedrus
302 |     http://localhost:5000/lang/greek/corpus/perseus/author/Phaedrus/texts
303 | 
304 | Pindar
305 |     http://localhost:5000/lang/greek/corpus/perseus/author/Pindar/text/pind
306 | 
307 | Plato
308 |     http://localhost:5000/lang/greek/corpus/perseus/author/Plato/text/plat.l
309 | 
310 | Plutarch
311 |     http://localhost:5000/lang/greek/corpus/perseus/author/Plutarch/text/plut.0094.002_teubner
312 |     http://localhost:5000/lang/greek/corpus/perseus/author/Plutarch/text/plut.082b_loeb
313 | 
314 | Polybius
315 |     http://localhost:5000/lang/greek/corpus/perseus/author/Polybius/text/hist
316 | 
317 | Sidonius
318 |     http://localhost:5000/lang/greek/corpus/perseus/author/Sidonius/texts
319 | 
320 | Sophocles
321 |     http://localhost:5000/lang/greek/corpus/perseus/author/Sophocles/text/soph.aj
322 | 
323 | Strabo
324 |     http://localhost:5000/lang/greek/corpus/perseus/author/Strabo/text/strab
325 | 
326 | Theocritus
327 |     http://localhost:5000/lang/greek/corpus/perseus/author/Theocritus/text/idylls
328 | 
329 | Theophrastus
330 |     http://localhost:5000/lang/greek/corpus/perseus/author/Theophrastus/text/char
331 | 
332 | Thucydides
333 |     ! There are no Greek files for Thucydides: http://localhost:5000/lang/greek/corpus/perseus/author/Thucydides/texts
334 |     /Users/kyle/cltk_data/greek/text/greek_text_perseus/Thucydides/opensource/
335 | 
336 | Tryphiodorus
337 |     http://localhost:5000/lang/greek/corpus/perseus/author/Tryphiodorus/text/tryphiodorus_01
338 | 
339 | Xenophon
340 |     http://localhost:5000/lang/greek/corpus/perseus/author/Xenophon/text/xen.anab
341 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | amqp==1.4.7
 2 | aniso8601==1.1.0
 3 | anyjson==0.3.3
 4 | beautifulsoup4==4.4.1
 5 | billiard==3.3.0.21
 6 | celery==3.1.19
 7 | cltk==0.1.29
 8 | Flask==0.10.1
 9 | Flask-PyMongo==0.4.0
10 | Flask-RESTful==0.3.4
11 | gitdb==0.6.4
12 | GitPython==1.0.1
13 | itsdangerous==0.24
14 | Jinja2==2.8
15 | kombu==3.0.29
16 | MarkupSafe==0.23
17 | nltk==3.1
18 | pymongo==3.1.1
19 | python-dateutil==2.4.2
20 | python-slugify==1.1.4
21 | pytz==2015.7
22 | regex==2015.11.14
23 | six==1.10.0
24 | smmap==0.9.0
25 | Unidecode==0.4.18
26 | Werkzeug==0.11.2
27 | wheel==0.24.0
28 | wikipedia
29 | 


--------------------------------------------------------------------------------
/tests.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import unittest
  3 | import api_json
  4 | import json
  5 | from cltk.corpus.utils.importer import CorpusImporter
  6 | from metadata.pos.constants import POS_METHODS
  7 | 
  8 | class TestAPIMethods(unittest.TestCase):
  9 |     """Requires latin_text_perseus folder in ~/cltk_data/latin/text/latin_text_perseus"""
 10 | 
 11 |     def setUp(self):
 12 |         file_rel_text = os.path.join('~/cltk_data/latin/text/latin_text_perseus/README.md')
 13 |         file_text = os.path.expanduser(file_rel_text)
 14 |         if not os.path.isfile(file_text):
 15 |             corpus_importer = CorpusImporter('latin')
 16 |             corpus_importer.import_corpus('latin_text_perseus')
 17 |             corpus_importer.import_corpus('latin_models_cltk')
 18 |             file_exists = os.path.isfile(file_text)
 19 |             self.assertTrue(file_exists)
 20 |             
 21 |         file_rel_treebank = os.path.join('~/cltk_data/latin/treebank/latin_treebank_perseus/README.md')
 22 |         file_treebank = os.path.expanduser(file_rel_treebank)
 23 |         if not os.path.isfile(file_rel_treebank):
 24 |             corpus_importer = CorpusImporter('latin')
 25 |             corpus_importer.import_corpus('latin_treebank_perseus')
 26 |             file_exists = os.path.isfile(file_treebank)
 27 |             self.assertTrue(file_exists)
 28 | 
 29 |         self.app = api_json.app.test_client()
 30 |         self.headers = [('Content-Type', 'application/json')]
 31 | 
 32 |     def test_home(self):
 33 |         response = self.app.get('/')
 34 |         self.assertEqual(response.status, '404 NOT FOUND')
 35 | 
 36 |     def test_hello_api(self):
 37 |         response = self.app.get('/hello')
 38 |         self.assertEqual(response.status, '200 OK')
 39 |         self.assertEqual(eval(response.data), dict(hello='world'))
 40 | 
 41 |     def test_todo_api(self):
 42 |         response = self.app.get('/todo/cltk_testing')
 43 |         self.assertEqual(response.status, '200 OK')
 44 |         self.assertEqual(eval(response.data), {'example with token': 'cltk_testing'})
 45 | 
 46 |     def test_lang_api(self):
 47 |         response = self.app.get('/lang')
 48 |         self.assertEqual(response.status, '200 OK')
 49 |         response_lang = eval(response.data)['languages']
 50 |         self.assertTrue('latin' in response_lang)
 51 | 
 52 |     def test_corpus_api(self):
 53 |         response = self.app.get('/lang/latin/corpus')
 54 |         self.assertEqual(response.status, '200 OK')
 55 |         self.assertEqual(eval(response.data)['language'], 'latin')
 56 |         self.assertTrue('perseus' in eval(response.data)['corpora'])
 57 | 
 58 |     def test_author_api(self):
 59 |         response = self.app.get('/lang/latin/corpus/perseus/author')
 60 |         self.assertEqual(response.status, '200 OK')
 61 |         self.assertEqual(eval(response.data)['language'], 'latin')
 62 |         self.assertTrue('glass' in eval(response.data)['authors'])
 63 | 
 64 |     def test_texts_api(self):
 65 |         response = self.app.get('/lang/latin/corpus/perseus/author/glass/text')
 66 |         self.assertEqual(response.status, '200 OK')
 67 |         self.assertEqual(eval(response.data)['language'], 'latin')
 68 |         self.assertEqual(eval(response.data)['corpus'], 'perseus')
 69 |         self.assertEqual(eval(response.data)['author'], 'glass')
 70 |         self.assertTrue('washingtonii_vita' in eval(response.data)['texts'])
 71 | 
 72 |     def test_text_api(self):
 73 |         response = self.app.get('/lang/latin/corpus/perseus/author/tacitus/text/germania')
 74 |         self.assertEqual(response.status, '200 OK')
 75 |         self.assertEqual(eval(response.data)['language'], 'latin')
 76 |         self.assertEqual(eval(response.data)['corpus'], 'perseus')
 77 |         self.assertEqual(eval(response.data)['author'], 'tacitus')
 78 |         self.assertEqual(eval(response.data)['meta'], 'book-chapter')
 79 |         self.assertEqual(eval(response.data)['work'], 'germania')
 80 |         self.assertEqual(eval(response.data)['text']['2']['1'].strip(), 'Ipsos Germanos indigenas crediderim minimeque aliarum gentium adventibus et hospitiis mixtos, quia nec terra olim sed classibus advehebantur qui mutare sedes quaerebant, et immensus ultra utque sic dixerim adversus Oceanus raris ab orbe nostro navibus aditur.')
 81 | 
 82 |         response_chunk1 = self.app.get('/lang/latin/corpus/perseus/author/tacitus/text/germania?chunk1=2')
 83 |         self.assertEqual(response_chunk1.status, '200 OK')
 84 |         self.assertEqual(eval(response_chunk1.data)['text']['2'].strip(), 'quis porro, praeter periculum horridi et ignoti maris, Asia aut Africa aut Italia relicta Germaniam peteret, informem terris, asperam caelo, tristem cultu aspectuque nisi si patria sit?')
 85 | 
 86 |         response_chunk2 = self.app.get('/lang/latin/corpus/perseus/author/tacitus/text/germania?chunk1=2&chunk2=4')
 87 |         self.assertEqual(response_chunk2.status, '200 OK')
 88 |         self.assertEqual(eval(response_chunk2.data)['text'].strip(), 'quidam, ut in licentia vetustatis, plures deo ortos pluresque gentis appellationes, Marsos Gambrivios Suebos Vandilios adfirmant, eaque vera et antiqua nomina.')
 89 | 
 90 |         response_chunk3 = self.app.get('/lang/latin/corpus/perseus/author/tacitus/text/germania?chunk1=2&chunk2=4&chunk3=1')
 91 |         self.assertEqual(response_chunk3.status, '500 INTERNAL SERVER ERROR')
 92 | 
 93 |     def test_pos_latin_ngram123(self):
 94 |         # test GET response
 95 |         response = self.app.get('/core/pos')
 96 |         expected_response = {'methods': POS_METHODS}
 97 |         self.assertEqual(eval(response.data), expected_response)
 98 | 
 99 |         # test POST response
100 |         data = json.dumps({'string': 'Gallia est omnis divisa in partes tres',
101 |                            'lang': 'latin',
102 |                            'method': 'ngram123'})
103 |         response = self.app.post('/core/pos', data=data, headers=self.headers)
104 |         expected_response = {u'tags': [{'word': 'Gallia', 'tag': 'None'},
105 |                                        {'word': 'est', 'tag': 'V3SPIA---'},
106 |                                        {'word': 'omnis', 'tag': 'A-S---MN-'},
107 |                                        {'word': 'divisa', 'tag': 'T-PRPPNN-'},
108 |                                        {'word': 'in', 'tag': 'R--------'},
109 |                                        {'word': 'partes', 'tag': 'N-P---FA-'},
110 |                                        {'word': 'tres', 'tag': 'M--------'}]}
111 |         self.assertEqual(response.status, '200 OK')
112 |         self.assertEqual(eval(response.data), expected_response)
113 | 
114 |     def test_core_stem(self):
115 |         response = self.app.get('/core/stem/Est interdum praestare mercaturis rem quaerere, nisi tam periculosum sit, et item foenerari, si tam honestum. Maiores nostri sic habuerunt et ita in legibus posiuerunt: furem dupli condemnari, foeneratorem quadrupli. Quanto peiorem ciuem existimarint foeneratorem quam furem, hinc licet existimare. Et uirum bonum quom laudabant, ita laudabant: bonum agricolam bonumque colonum; amplissime laudari existimabatur qui ita laudabatur. Mercatorem autem strenuum studiosumque rei quaerendae existimo, uerum, ut supra dixi, periculosum et calamitosum. At ex agricolis et uiri fortissimi et milites strenuissimi gignuntur, maximeque pius quaestus stabilissimusque consequitur minimeque inuidiosus, minimeque male cogitantes sunt qui in eo studio occupati sunt. Nunc, ut ad rem redeam, quod promisi institutum principium hoc erit.')
116 |         self.assertEqual(response.status, "200 OK")
117 |         self.assertEqual(eval(response.data)['stemmed_output'], 'est interd praestar mercatur r quaerere, nisi tam periculos sit, et it foenerari, si tam honestum. maior nostr sic habueru et ita in leg posiuerunt: fur dupl condemnari, foenerator quadrupli. quant peior ciu existimari foenerator quam furem, hinc lice existimare. et uir bon quo laudabant, ita laudabant: bon agricol bon colonum; amplissim laudar existimaba qui ita laudabatur. mercator autem strenu studios re quaerend existimo, uerum, ut supr dixi, periculos et calamitosum. at ex agricol et uir fortissim et milit strenuissim gignuntur, maxim p quaest stabilissim consequi minim inuidiosus, minim mal cogitant su qui in e studi occupat sunt. nunc, ut ad r redeam, quod promis institut principi hoc erit. ')
118 | 
119 |     def test_definition_api(self):
120 |         response = self.app.get('lang/latin/define/abante')
121 |         self.assertEqual(response.status, '200 OK')
122 |         self.assertEqual(eval(response.data)[0]['headword'], 'Abas')
123 |         self.assertEqual(eval(response.data)[0]['definition'], 'The twelfth king of Argos, son of Lynceus and Hypermnestra')
124 |         self.assertEqual(eval(response.data)[0]['pos'], 'noun sg masc abl')
125 | 
126 |     def test_translation_api(self):
127 |         response = self.app.get('/lang/latin/corpus/perseus/author/catullus/text/poemata?translation=english')
128 |         self.assertEqual(response.status, '200 OK')
129 |         data = json.loads(response.get_data(as_text=True))
130 |         self.assertEqual(data['language'], 'latin')
131 |         self.assertEqual(data['corpus'], 'perseus')
132 |         self.assertEqual(data['author'], 'catullus')
133 |         self.assertEqual(data['meta'], 'poem-line')
134 |         self.assertEqual(data['work'], 'poemata')
135 |         self.assertEqual(data['translations'][0]['translator'], 'Sir R. F. Burton')
136 |         self.assertEqual(data['translations'][0]['text']['1']['1'], 'To thee (Cornelius!); for wast ever fain')
137 | 
138 | if __name__ == '__main__':
139 |     unittest.main()
140 | 


--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cltk/cltk_api/eb736ec9f20c008436e93cd0be4bcd05be7d455c/util/__init__.py


--------------------------------------------------------------------------------
/util/jsonp.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from functools import wraps
 3 | from flask import request, current_app
 4 | 
 5 | def jsonp(func):
 6 |     """Wraps JSONified output for JSONP requests."""
 7 | 
 8 |     @wraps(func)
 9 |     def decorated_function(*args, **kwargs):
10 | 
11 |         callback = request.args.get('callback', False)
12 |         if callback:
13 |             data = str(func(*args, **kwargs))
14 |             content = str(callback) + '(' + data + ')'
15 |             mimetype = 'application/javascript'
16 |             return current_app.response_class(content, mimetype=mimetype)
17 | 
18 |         else:
19 |             return func(*args, **kwargs)
20 | 
21 |     return decorated_function
22 | 


--------------------------------------------------------------------------------
/util/numerals.py:
--------------------------------------------------------------------------------
 1 | """Convert to and from Roman numerals"""
 2 | 
 3 | __author__ = "Mark Pilgrim (f8dy@diveintopython.org)"
 4 | __version__ = "1.4"
 5 | __date__ = "8 August 2001"
 6 | __copyright__ = """Copyright (c) 2001 Mark Pilgrim
 7 | 
 8 | This program is part of "Dive Into Python", a free Python tutorial for
 9 | experienced programmers.  Visit http://diveintopython.org/ for the
10 | latest version.
11 | 
12 | This program is free software; you can redistribute it and/or modify
13 | it under the terms of the Python 2.1.1 license, available at
14 | http://www.python.org/2.1.1/license.html
15 | """
16 | 
17 | import re
18 | 
19 | #Define exceptions
20 | class RomanError(Exception): pass
21 | class OutOfRangeError(RomanError): pass
22 | class NotIntegerError(RomanError): pass
23 | class InvalidRomanNumeralError(RomanError): pass
24 | 
25 | #Define digit mapping
26 | romanNumeralMap = (('M',  1000),
27 |                    ('CM', 900),
28 |                    ('D',  500),
29 |                    ('CD', 400),
30 |                    ('C',  100),
31 |                    ('XC', 90),
32 |                    ('L',  50),
33 |                    ('XL', 40),
34 |                    ('X',  10),
35 |                    ('IX', 9),
36 |                    ('V',  5),
37 |                    ('IV', 4),
38 |                    ('I',  1))
39 | 
40 | def toRoman(n):
41 |     """convert integer to Roman numeral"""
42 |     if not (0 < n < 5000):
43 |         raise OutOfRangeError("number out of range (must be 1..4999)")
44 |     if int(n) != n:
45 |         raise NotIntegerError("decimals can not be converted")
46 | 
47 |     result = ""
48 |     for numeral, integer in romanNumeralMap:
49 |         while n >= integer:
50 |             result += numeral
51 |             n -= integer
52 |     return result
53 | 
54 | #Define pattern to detect valid Roman numerals
55 | romanNumeralPattern = re.compile("""
56 |     ^                   # beginning of string
57 |     M{0,4}              # thousands - 0 to 4 M's
58 |     (CM|CD|D?C{0,3})    # hundreds - 900 (CM), 400 (CD), 0-300 (0 to 3 C's),
59 |                         #            or 500-800 (D, followed by 0 to 3 C's)
60 |     (XC|XL|L?X{0,3})    # tens - 90 (XC), 40 (XL), 0-30 (0 to 3 X's),
61 |                         #        or 50-80 (L, followed by 0 to 3 X's)
62 |     (IX|IV|V?I{0,3})    # ones - 9 (IX), 4 (IV), 0-3 (0 to 3 I's),
63 |                         #        or 5-8 (V, followed by 0 to 3 I's)
64 |     $                   # end of string
65 |     """ ,re.VERBOSE)
66 | 
67 | def fromRoman(s):
68 |     """convert Roman numeral to integer"""
69 |     if not s:
70 |         raise InvalidRomanNumeralError('Input can not be blank')
71 |     if not romanNumeralPattern.search(s):
72 |         raise InvalidRomanNumeralError('Invalid Roman numeral: %s' % s)
73 | 
74 |     result = 0
75 |     index = 0
76 |     for numeral, integer in romanNumeralMap:
77 |         while s[index:index+len(numeral)] == numeral:
78 |             result += integer
79 |             index += len(numeral)
80 |     return result


--------------------------------------------------------------------------------
/util/text.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Sundry utility functions for sanitizing textual data
 3 | """
 4 | import re
 5 | import string
 6 | import unicodedata as ud
 7 | 
 8 | class TextUtil:
 9 | 
10 | 	def is_latin(self, uchr):
11 | 		try: return self.latin_letters[uchr]
12 | 		except KeyError:
13 | 			return self.latin_letters.setdefault(uchr, 'LATIN' in ud.name(uchr))
14 | 
15 | 	def only_roman_chars(self, unistr):
16 | 		return all(self.is_latin(uchr)
17 | 			for uchr in unistr
18 | 			if uchr.isalpha())
19 | 
20 | 	def only_iso88591(self, string):
21 | 		flag = True
22 | 		try:
23 | 			string.encode("iso-8859-1")
24 | 		except UnicodeEncodeError:
25 | 			flag = False
26 | 
27 | 		return flag
28 | 
29 | 	def strip_punctution( s ):
30 | 
31 | 	    exclude = set(string.punctuation)
32 | 
33 | 	    return ''.join(ch for ch in s if ch not in exclude)
34 | 


--------------------------------------------------------------------------------