├── .gitignore ├── README.md ├── docs ├── Makefile ├── _build │ ├── doctrees │ │ ├── app.doctree │ │ ├── config.doctree │ │ ├── environment.pickle │ │ ├── index.doctree │ │ ├── modules.doctree │ │ ├── pipeline.doctree │ │ ├── src.config.doctree │ │ ├── src.doctree │ │ ├── src.tasks.doctree │ │ └── tasks.doctree │ └── html │ │ ├── .buildinfo │ │ ├── _modules │ │ ├── app.html │ │ ├── index.html │ │ ├── pipeline.html │ │ ├── src │ │ │ ├── app.html │ │ │ ├── pipeline.html │ │ │ └── tasks │ │ │ │ ├── example.html │ │ │ │ └── example2.html │ │ └── tasks │ │ │ ├── example.html │ │ │ └── example2.html │ │ ├── _sources │ │ ├── app.txt │ │ ├── config.txt │ │ ├── index.txt │ │ ├── modules.txt │ │ ├── pipeline.txt │ │ ├── src.config.txt │ │ ├── src.tasks.txt │ │ ├── src.txt │ │ └── tasks.txt │ │ ├── _static │ │ ├── ajax-loader.gif │ │ ├── basic.css │ │ ├── bootstrap-2.3.2 │ │ │ ├── css │ │ │ │ ├── bootstrap-responsive.css │ │ │ │ ├── bootstrap-responsive.min.css │ │ │ │ ├── bootstrap.css │ │ │ │ └── bootstrap.min.css │ │ │ ├── img │ │ │ │ ├── glyphicons-halflings-white.png │ │ │ │ └── glyphicons-halflings.png │ │ │ └── js │ │ │ │ ├── bootstrap.js │ │ │ │ └── bootstrap.min.js │ │ ├── bootstrap-3.1.0 │ │ │ ├── css │ │ │ │ ├── bootstrap-theme.css │ │ │ │ ├── bootstrap-theme.css.map │ │ │ │ ├── bootstrap-theme.min.css │ │ │ │ ├── bootstrap.css │ │ │ │ ├── bootstrap.css.map │ │ │ │ └── bootstrap.min.css │ │ │ ├── fonts │ │ │ │ ├── glyphicons-halflings-regular.eot │ │ │ │ ├── glyphicons-halflings-regular.svg │ │ │ │ ├── glyphicons-halflings-regular.ttf │ │ │ │ └── glyphicons-halflings-regular.woff │ │ │ └── js │ │ │ │ ├── bootstrap.js │ │ │ │ └── bootstrap.min.js │ │ ├── bootstrap-sphinx.css │ │ ├── bootstrap-sphinx.js │ │ ├── bootswatch-2.3.2 │ │ │ ├── amelia │ │ │ │ └── bootstrap.min.css │ │ │ ├── cerulean │ │ │ │ └── bootstrap.min.css │ │ │ ├── cosmo │ │ │ │ └── bootstrap.min.css │ │ │ ├── cyborg │ │ │ │ └── bootstrap.min.css │ │ │ ├── flatly │ │ │ │ └── bootstrap.min.css │ │ │ ├── journal │ │ │ │ └── bootstrap.min.css │ │ │ ├── readable │ │ │ │ └── bootstrap.min.css │ │ │ ├── simplex │ │ │ │ └── bootstrap.min.css │ │ │ ├── slate │ │ │ │ └── bootstrap.min.css │ │ │ ├── spacelab │ │ │ │ └── bootstrap.min.css │ │ │ ├── spruce │ │ │ │ └── bootstrap.min.css │ │ │ ├── superhero │ │ │ │ └── bootstrap.min.css │ │ │ └── united │ │ │ │ └── bootstrap.min.css │ │ ├── bootswatch-3.1.0 │ │ │ ├── amelia │ │ │ │ └── bootstrap.min.css │ │ │ ├── cerulean │ │ │ │ └── bootstrap.min.css │ │ │ ├── cosmo │ │ │ │ └── bootstrap.min.css │ │ │ ├── cupid │ │ │ │ └── bootstrap.min.css │ │ │ ├── cyborg │ │ │ │ └── bootstrap.min.css │ │ │ ├── flatly │ │ │ │ └── bootstrap.min.css │ │ │ ├── journal │ │ │ │ └── bootstrap.min.css │ │ │ ├── lumen │ │ │ │ └── bootstrap.min.css │ │ │ ├── readable │ │ │ │ └── bootstrap.min.css │ │ │ ├── simplex │ │ │ │ └── bootstrap.min.css │ │ │ ├── slate │ │ │ │ └── bootstrap.min.css │ │ │ ├── spacelab │ │ │ │ └── bootstrap.min.css │ │ │ ├── superhero │ │ │ │ └── bootstrap.min.css │ │ │ ├── united │ │ │ │ └── bootstrap.min.css │ │ │ └── yeti │ │ │ │ └── bootstrap.min.css │ │ ├── comment-bright.png │ │ ├── comment-close.png │ │ ├── comment.png │ │ ├── default.css │ │ ├── doctools.js │ │ ├── down-pressed.png │ │ ├── down.png │ │ ├── file.png │ │ ├── jquery.js │ │ ├── js │ │ │ ├── jquery-1.11.0.min.js │ │ │ └── jquery-fix.js │ │ ├── minus.png │ │ ├── plus.png │ │ ├── pygments.css │ │ ├── searchtools.js │ │ ├── sidebar.js │ │ ├── underscore.js │ │ ├── up-pressed.png │ │ ├── up.png │ │ └── websupport.js │ │ ├── app.html │ │ ├── config.html │ │ ├── genindex.html │ │ ├── index.html │ │ ├── modules.html │ │ ├── objects.inv │ │ ├── pipeline.html │ │ ├── py-modindex.html │ │ ├── search.html │ │ ├── searchindex.js │ │ ├── src.config.html │ │ ├── src.html │ │ ├── src.tasks.html │ │ └── tasks.html ├── app.rst ├── conf.py ├── config.rst ├── index.rst ├── make.bat ├── modules.rst ├── pipeline.rst ├── src.config.rst ├── src.rst ├── src.tasks.rst └── tasks.rst ├── etc ├── grimlock.conf └── grimlock_prod.conf ├── fabfile.py ├── requirements.txt ├── src ├── __init__.py ├── app.py ├── cn_search_py │ ├── __init__.py │ ├── collections.py │ ├── connect.py │ ├── data │ │ ├── __init__.py │ │ └── language_codes.py │ ├── exceptions.py │ └── models.py ├── cn_store_py │ ├── __init__.py │ ├── connect.py │ └── models.py ├── config │ ├── __init__.py │ └── settings.py ├── pipeline.py └── tasks │ ├── __init__.py │ ├── add_default_values.py │ ├── data │ ├── __init__.py │ └── word_tag_map.py │ ├── donation_classifier.py │ ├── extract_content.py │ ├── extract_place.py │ ├── format_address.py │ ├── geocode.py │ ├── identify_language.py │ ├── image_tagger.py │ ├── relevance_classifier.py │ ├── reverse_geocode.py │ ├── translate_content.py │ └── update_doc.py └── tests ├── test_donation_classifier.py ├── test_extract_place.py ├── test_format_address_task.py ├── test_geocode_task.py ├── test_identify_language.py ├── test_pipeline.py ├── test_relevance_classifier.py └── test_reverse_geocode.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | src/config/production_settings.py 3 | src/config/development_settings.py 4 | src/config/staging_settings.py 5 | src/config/test_settings.py 6 | requirements.txt.freeze 7 | venv 8 | .DS_Store 9 | deploy_config.py 10 | src/tasks/worldcitiespop.txt 11 | src/tasks/locs.db 12 | src/cn_search_py/utils.py 13 | dump.rdb -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Grimlock 2 | ======== 3 | 4 | ["Me, Grimlock."](http://tfwiki.net/wiki/Grimlock_(G1)) 5 | 6 | A simple transformation/data processing pipeline for CrisisNET. Pulls jobs from a FIFO queue and runs each through a series of predefined tasks. These tasks add metadata to content retrieved by [Sucka](https://github.com/ushahidi/sucka). 7 | 8 | New tasks can be added to the `tasks` package, and included in the pipeline in `app.set_pipeline_steps`. 9 | 10 | Very much a work in progress. Watch this space. 11 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/src.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/src.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/src" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/src" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /docs/_build/doctrees/app.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/doctrees/app.doctree -------------------------------------------------------------------------------- /docs/_build/doctrees/config.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/doctrees/config.doctree -------------------------------------------------------------------------------- /docs/_build/doctrees/environment.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/doctrees/environment.pickle -------------------------------------------------------------------------------- /docs/_build/doctrees/index.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/doctrees/index.doctree -------------------------------------------------------------------------------- /docs/_build/doctrees/modules.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/doctrees/modules.doctree -------------------------------------------------------------------------------- /docs/_build/doctrees/pipeline.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/doctrees/pipeline.doctree -------------------------------------------------------------------------------- /docs/_build/doctrees/src.config.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/doctrees/src.config.doctree -------------------------------------------------------------------------------- /docs/_build/doctrees/src.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/doctrees/src.doctree -------------------------------------------------------------------------------- /docs/_build/doctrees/src.tasks.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/doctrees/src.tasks.doctree -------------------------------------------------------------------------------- /docs/_build/doctrees/tasks.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/doctrees/tasks.doctree -------------------------------------------------------------------------------- /docs/_build/html/.buildinfo: -------------------------------------------------------------------------------- 1 | # Sphinx build info version 1 2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. 3 | config: d7e98ce4490cfbb1c312785b64541164 4 | tags: 645f666f9bcd5a90fca523b33c5a78b7 5 | -------------------------------------------------------------------------------- /docs/_build/html/_modules/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Overview: module code — Grimlock 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 114 | 115 |
116 |
117 |
118 | 119 |

All modules for which code is available

120 | 129 | 130 |
131 | 132 |
133 |
134 |
135 |
136 |

137 | Back to top 138 | 139 |

140 |

141 | © Copyright 2014, Author.
142 | Created using Sphinx 1.2.1.
143 |

144 |
145 |
146 | 147 | -------------------------------------------------------------------------------- /docs/_build/html/_modules/src/tasks/example.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | src.tasks.example — Grimlock 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 115 | 116 |
117 |
118 |
119 | 120 |

Source code for src.tasks.example

121 | """ This is just an example. Inside the run method of a task you can do whatever 
122 | you want. The only requirements are that your run method accepts a single 
123 | argument - the structure to be transformed or augmented - and returns the 
124 | transformed/augmented structure so that downstream tasks can make further 
125 | modifications or update the document in the datastore. 
126 | 
127 | """
128 | 
[docs]def run(data): 129 | print "in example" 130 | data["hiphophoray"] = "ho, hey, ho" 131 | return data
132 |
133 | 134 |
135 | 136 |
137 |
138 |
139 |
140 |

141 | Back to top 142 | 143 |

144 |

145 | © Copyright 2014, Author.
146 | Created using Sphinx 1.2.1.
147 |

148 |
149 |
150 | 151 | -------------------------------------------------------------------------------- /docs/_build/html/_modules/src/tasks/example2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | src.tasks.example2 — Grimlock 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 115 | 116 |
117 |
118 |
119 | 120 |

Source code for src.tasks.example2

121 | 
[docs]def run(data): 122 | print 'example 2' 123 | print data
124 |
125 | 126 |
127 | 128 |
129 |
130 |
131 |
132 |

133 | Back to top 134 | 135 |

136 |

137 | © Copyright 2014, Author.
138 | Created using Sphinx 1.2.1.
139 |

140 |
141 |
142 | 143 | -------------------------------------------------------------------------------- /docs/_build/html/_modules/tasks/example.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | tasks.example — Grimlock 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 115 | 116 |
117 |
118 |
119 | 120 |

Source code for tasks.example

121 | """ This is just an example. Inside the run method of a task you can do whatever 
122 | you want. The only requirements are that your run method accepts a single 
123 | argument - the structure to be transformed or augmented - and returns the 
124 | transformed/augmented structure so that downstream tasks can make further 
125 | modifications or update the document in the datastore. 
126 | 
127 | """
128 | 
[docs]def run(data): 129 | print "in example" 130 | data["hiphophoray"] = "ho, hey, ho" 131 | return data
132 |
133 | 134 |
135 | 136 |
137 |
138 |
139 |
140 |

141 | Back to top 142 | 143 |

144 |

145 | © Copyright 2014, Author.
146 | Created using Sphinx 1.2.1.
147 |

148 |
149 |
150 | 151 | -------------------------------------------------------------------------------- /docs/_build/html/_modules/tasks/example2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | tasks.example2 — Grimlock 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 115 | 116 |
117 |
118 |
119 | 120 |

Source code for tasks.example2

121 | 
[docs]def run(data): 122 | print 'example 2' 123 | print data
124 |
125 | 126 |
127 | 128 |
129 |
130 |
131 |
132 |

133 | Back to top 134 | 135 |

136 |

137 | © Copyright 2014, Author.
138 | Created using Sphinx 1.2.1.
139 |

140 |
141 |
142 | 143 | -------------------------------------------------------------------------------- /docs/_build/html/_sources/app.txt: -------------------------------------------------------------------------------- 1 | app module 2 | ========== 3 | 4 | .. automodule:: app 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/_build/html/_sources/config.txt: -------------------------------------------------------------------------------- 1 | config package 2 | ============== 3 | 4 | Submodules 5 | ---------- 6 | 7 | config.settings module 8 | ---------------------- 9 | 10 | .. automodule:: config.settings 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: config 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/_build/html/_sources/index.txt: -------------------------------------------------------------------------------- 1 | .. src documentation master file, created by 2 | sphinx-quickstart on Sat Feb 15 20:43:34 2014. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to src's documentation! 7 | =============================== 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 4 13 | 14 | app 15 | config 16 | pipeline 17 | tasks 18 | 19 | 20 | Indices and tables 21 | ================== 22 | 23 | * :ref:`genindex` 24 | * :ref:`modindex` 25 | * :ref:`search` 26 | 27 | -------------------------------------------------------------------------------- /docs/_build/html/_sources/modules.txt: -------------------------------------------------------------------------------- 1 | Grimlock 2 | ======== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | src 8 | -------------------------------------------------------------------------------- /docs/_build/html/_sources/pipeline.txt: -------------------------------------------------------------------------------- 1 | pipeline module 2 | =============== 3 | 4 | .. automodule:: pipeline 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/_build/html/_sources/src.config.txt: -------------------------------------------------------------------------------- 1 | src.config package 2 | ================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | src.config.settings module 8 | -------------------------- 9 | 10 | .. automodule:: src.config.settings 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: src.config 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/_build/html/_sources/src.tasks.txt: -------------------------------------------------------------------------------- 1 | src.tasks package 2 | ================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | src.tasks.example module 8 | ------------------------ 9 | 10 | .. automodule:: src.tasks.example 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | src.tasks.example2 module 16 | ------------------------- 17 | 18 | .. automodule:: src.tasks.example2 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: src.tasks 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/_build/html/_sources/src.txt: -------------------------------------------------------------------------------- 1 | src package 2 | =========== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | src.config 10 | src.tasks 11 | 12 | Submodules 13 | ---------- 14 | 15 | src.app module 16 | -------------- 17 | 18 | .. automodule:: src.app 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | src.pipeline module 24 | ------------------- 25 | 26 | .. automodule:: src.pipeline 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: src 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/_build/html/_sources/tasks.txt: -------------------------------------------------------------------------------- 1 | tasks package 2 | ============= 3 | 4 | Submodules 5 | ---------- 6 | 7 | tasks.example module 8 | -------------------- 9 | 10 | .. automodule:: tasks.example 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | tasks.example2 module 16 | --------------------- 17 | 18 | .. automodule:: tasks.example2 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: tasks 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/_build/html/_static/ajax-loader.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/html/_static/ajax-loader.gif -------------------------------------------------------------------------------- /docs/_build/html/_static/bootstrap-2.3.2/img/glyphicons-halflings-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/html/_static/bootstrap-2.3.2/img/glyphicons-halflings-white.png -------------------------------------------------------------------------------- /docs/_build/html/_static/bootstrap-2.3.2/img/glyphicons-halflings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/html/_static/bootstrap-2.3.2/img/glyphicons-halflings.png -------------------------------------------------------------------------------- /docs/_build/html/_static/bootstrap-3.1.0/fonts/glyphicons-halflings-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/html/_static/bootstrap-3.1.0/fonts/glyphicons-halflings-regular.eot -------------------------------------------------------------------------------- /docs/_build/html/_static/bootstrap-3.1.0/fonts/glyphicons-halflings-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/html/_static/bootstrap-3.1.0/fonts/glyphicons-halflings-regular.ttf -------------------------------------------------------------------------------- /docs/_build/html/_static/bootstrap-3.1.0/fonts/glyphicons-halflings-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/html/_static/bootstrap-3.1.0/fonts/glyphicons-halflings-regular.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/bootstrap-sphinx.css: -------------------------------------------------------------------------------- 1 | /* 2 | * bootstrap-sphinx.css 3 | * ~~~~~~~~~~~~~~~~~~~~ 4 | * 5 | * Sphinx stylesheet -- Twitter Bootstrap theme. 6 | */ 7 | 8 | .navbar-inverse .brand { 9 | color: #FFF; 10 | } 11 | 12 | .page-top { 13 | top: 0px; 14 | } 15 | 16 | 17 | 18 | body { 19 | 20 | padding-top: 40px; 21 | 22 | } 23 | .page-top { 24 | 25 | top: 40px; 26 | 27 | } 28 | 29 | 30 | .navbar-inner { 31 | padding-left: 12px !important; 32 | padding-right: 12px !important; 33 | } 34 | 35 | 36 | table { 37 | border: 0; 38 | } 39 | 40 | .highlighttable .code pre { 41 | font-size: 12px; 42 | } 43 | 44 | .highlighttable .linenos pre { 45 | word-break: normal; 46 | font-size: 12px; 47 | } 48 | 49 | div.highlight { 50 | background: none; 51 | } 52 | 53 | a.footnote-reference { 54 | vertical-align: super; 55 | font-size: 75%; 56 | } 57 | 58 | table.footnote td.label { 59 | font-size: 100%; 60 | display: block; 61 | line-height: normal; 62 | background: inherit; 63 | } 64 | 65 | table.footnote { 66 | width: auto; 67 | margin-bottom: 0px; 68 | } 69 | 70 | table.field-list { 71 | width: auto; 72 | } 73 | 74 | .footer { 75 | width: 100%; 76 | border-top: 1px solid #ccc; 77 | padding-top: 10px; 78 | } 79 | 80 | .bs-sidenav form, .bs-sidenav #sourcelink { 81 | padding: 5px 20px; 82 | } 83 | 84 | 85 | 86 | /* The code below is based on the bootstrap website sidebar */ 87 | 88 | .bs-sidenav.affix { 89 | position: static; 90 | } 91 | 92 | /* First level of nav */ 93 | .bs-sidenav { 94 | margin-top: 30px; 95 | margin-bottom: 30px; 96 | padding-top: 10px; 97 | padding-bottom: 10px; 98 | text-shadow: 0 1px 0 #fff; 99 | background-color: #f7f5fa; 100 | border-radius: 5px; 101 | } 102 | 103 | /* All levels of nav */ 104 | .bs-sidenav .nav > li > a { 105 | display: block; 106 | color: #716b7a; 107 | padding: 5px 20px; 108 | } 109 | .bs-sidenav .nav > li > a:hover, 110 | .bs-sidenav .nav > li > a:focus { 111 | text-decoration: none; 112 | background-color: #e5e3e9; 113 | border-right: 1px solid #dbd8e0; 114 | } 115 | .bs-sidenav .nav > .active > a, 116 | .bs-sidenav .nav > .active:hover > a, 117 | .bs-sidenav .nav > .active:focus > a { 118 | font-weight: bold; 119 | color: #563d7c; 120 | background-color: transparent; 121 | border-right: 1px solid #563d7c; 122 | } 123 | 124 | .bs-sidenav .nav .nav > li > a { 125 | padding-top: 3px; 126 | padding-bottom: 3px; 127 | padding-left: 30px; 128 | font-size: 90%; 129 | } 130 | 131 | .bs-sidenav .nav .nav .nav > li > a { 132 | padding-top: 3px; 133 | padding-bottom: 3px; 134 | padding-left: 40px; 135 | font-size: 90%; 136 | } 137 | 138 | .bs-sidenav .nav .nav .nav .nav > li > a { 139 | padding-top: 3px; 140 | padding-bottom: 3px; 141 | padding-left: 50px; 142 | font-size: 90%; 143 | } 144 | 145 | /* Show and affix the side nav when space allows it */ 146 | @media screen and (min-width: 992px) { 147 | .bs-sidenav .nav > .active > ul { 148 | display: block; 149 | } 150 | /* Widen the fixed sidenav */ 151 | .bs-sidenav.affix, 152 | .bs-sidenav.affix-bottom { 153 | width: 213px; 154 | } 155 | .bs-sidenav.affix { 156 | position: fixed; /* Undo the static from mobile first approach */ 157 | top: 80px; 158 | } 159 | .bs-sidenav.affix-bottom { 160 | position: absolute; /* Undo the static from mobile first approach */ 161 | } 162 | .bs-sidenav.affix-bottom .bs-sidenav, 163 | .bs-sidenav.affix .bs-sidenav { 164 | margin-top: 0; 165 | margin-bottom: 0; 166 | } 167 | } 168 | @media screen and (min-width: 1200px) { 169 | /* Widen the fixed sidenav again */ 170 | .bs-sidenav.affix-bottom, 171 | .bs-sidenav.affix { 172 | width: 263px; 173 | } 174 | } 175 | 176 | 177 | -------------------------------------------------------------------------------- /docs/_build/html/_static/bootstrap-sphinx.js: -------------------------------------------------------------------------------- 1 | (function ($) { 2 | /** 3 | * Patch TOC list. 4 | * 5 | * Will mutate the underlying span to have a correct ul for nav. 6 | * 7 | * @param $span: Span containing nested UL's to mutate. 8 | * @param minLevel: Starting level for nested lists. (1: global, 2: local). 9 | */ 10 | var patchToc = function ($ul, minLevel) { 11 | var findA, 12 | patchTables, 13 | $localLi; 14 | 15 | // Find all a "internal" tags, traversing recursively. 16 | findA = function ($elem, level) { 17 | level = level || 0; 18 | var $items = $elem.find("> li > a.internal, > ul, > li > ul"); 19 | 20 | // Iterate everything in order. 21 | $items.each(function (index, item) { 22 | var $item = $(item), 23 | tag = item.tagName.toLowerCase(), 24 | $childrenLi = $item.children('li'), 25 | $parentLi = $($item.parent('li'), $item.parent().parent('li')); 26 | 27 | // Add dropdowns if more children and above minimum level. 28 | if (tag === 'ul' && level >= minLevel && $childrenLi.length > 0) { 29 | $parentLi 30 | .addClass('dropdown-submenu') 31 | .children('a').first().attr('tabindex', -1); 32 | 33 | $item.addClass('dropdown-menu'); 34 | } 35 | 36 | findA($item, level + 1); 37 | }); 38 | }; 39 | 40 | findA($ul); 41 | }; 42 | 43 | /** 44 | * Patch all tables to remove ``docutils`` class and add Bootstrap base 45 | * ``table`` class. 46 | */ 47 | patchTables = function () { 48 | $("table.docutils") 49 | .removeClass("docutils") 50 | .addClass("table") 51 | .attr("border", 0); 52 | }; 53 | 54 | $(window).load(function () { 55 | /* 56 | * Scroll the window to avoid the topnav bar 57 | * https://github.com/twitter/bootstrap/issues/1768 58 | */ 59 | if ($("#navbar.navbar-fixed-top").length > 0) { 60 | var navHeight = $("#navbar").height(), 61 | shiftWindow = function() { scrollBy(0, -navHeight - 10); }; 62 | 63 | if (location.hash) { 64 | setTimeout(shiftWindow, 1); 65 | } 66 | 67 | window.addEventListener("hashchange", shiftWindow); 68 | } 69 | }); 70 | 71 | $(document).ready(function () { 72 | // Add styling, structure to TOC's. 73 | $(".dropdown-menu").each(function () { 74 | $(this).find("ul").each(function (index, item){ 75 | var $item = $(item); 76 | $item.addClass('unstyled'); 77 | }); 78 | }); 79 | 80 | // Global TOC. 81 | if ($("ul.globaltoc li").length) { 82 | patchToc($("ul.globaltoc"), 1); 83 | } else { 84 | // Remove Global TOC. 85 | $(".globaltoc-container").remove(); 86 | } 87 | 88 | // Local TOC. 89 | $(".bs-sidenav ul").addClass("nav nav-list"); 90 | $(".bs-sidenav > ul > li > a").addClass("nav-header"); 91 | 92 | 93 | // back to top 94 | setTimeout(function () { 95 | var $sideBar = $('.bs-sidenav'); 96 | 97 | $sideBar.affix({ 98 | offset: { 99 | top: function () { 100 | var offsetTop = $sideBar.offset().top; 101 | var sideBarMargin = parseInt($sideBar.children(0).css('margin-top'), 10); 102 | var navOuterHeight = $('#navbar').height(); 103 | 104 | return (this.top = offsetTop - navOuterHeight - sideBarMargin); 105 | } 106 | , bottom: function () { 107 | // add 25 because the footer height doesn't seem to be enough 108 | return (this.bottom = $('.footer').outerHeight(true) + 25); 109 | } 110 | } 111 | }); 112 | }, 100); 113 | 114 | 115 | // Local TOC. 116 | patchToc($("ul.localtoc"), 2); 117 | 118 | // Mutate sub-lists (for bs-2.3.0). 119 | $(".dropdown-menu ul").not(".dropdown-menu").each(function () { 120 | var $ul = $(this), 121 | $parent = $ul.parent(), 122 | tag = $parent[0].tagName.toLowerCase(), 123 | $kids = $ul.children().detach(); 124 | 125 | // Replace list with items if submenu header. 126 | if (tag === "ul") { 127 | $ul.replaceWith($kids); 128 | } else if (tag === "li") { 129 | // Insert into previous list. 130 | $parent.after($kids); 131 | $ul.remove(); 132 | } 133 | }); 134 | 135 | // Add divider in page TOC. 136 | $localLi = $("ul.localtoc li"); 137 | if ($localLi.length > 2) { 138 | $localLi.first().after('
  • '); 139 | } 140 | 141 | // Manually add dropdown. 142 | // Appears unnecessary as of: 143 | // https://github.com/ryan-roemer/sphinx-bootstrap-theme/pull/90 144 | // Remove next time around... 145 | // a.dropdown-toggle class needed in globaltoc.html 146 | //$('.dropdown-toggle').dropdown(); 147 | 148 | // Patch tables. 149 | patchTables(); 150 | 151 | // Add Note, Warning styles. (BS v2,3 compatible). 152 | $('.admonition').addClass('alert alert-info') 153 | .filter('.warning, .caution') 154 | .removeClass('alert-info') 155 | .addClass('alert-warning').end() 156 | .filter('.error, .danger') 157 | .removeClass('alert-info') 158 | .addClass('alert-danger alert-error').end(); 159 | 160 | // Inline code styles to Bootstrap style. 161 | $('tt.docutils.literal').not(".xref").each(function (i, e) { 162 | // ignore references 163 | if (!$(e).parent().hasClass("reference")) { 164 | $(e).replaceWith(function () { 165 | return $("").html($(this).html()); 166 | }); 167 | }}); 168 | 169 | // Update sourcelink to remove outerdiv (fixes appearance in navbar). 170 | var $srcLink = $(".nav #sourcelink"); 171 | $srcLink.parent().html($srcLink.html()); 172 | }); 173 | }(window.$jqTheme || window.jQuery)); -------------------------------------------------------------------------------- /docs/_build/html/_static/comment-bright.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/html/_static/comment-bright.png -------------------------------------------------------------------------------- /docs/_build/html/_static/comment-close.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/html/_static/comment-close.png -------------------------------------------------------------------------------- /docs/_build/html/_static/comment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/html/_static/comment.png -------------------------------------------------------------------------------- /docs/_build/html/_static/default.css: -------------------------------------------------------------------------------- 1 | /* 2 | * default.css_t 3 | * ~~~~~~~~~~~~~ 4 | * 5 | * Sphinx stylesheet -- default theme. 6 | * 7 | * :copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS. 8 | * :license: BSD, see LICENSE for details. 9 | * 10 | */ 11 | 12 | @import url("basic.css"); 13 | 14 | /* -- page layout ----------------------------------------------------------- */ 15 | 16 | body { 17 | font-family: sans-serif; 18 | font-size: 100%; 19 | background-color: #11303d; 20 | color: #000; 21 | margin: 0; 22 | padding: 0; 23 | } 24 | 25 | div.document { 26 | background-color: #1c4e63; 27 | } 28 | 29 | div.documentwrapper { 30 | float: left; 31 | width: 100%; 32 | } 33 | 34 | div.bodywrapper { 35 | margin: 0 0 0 230px; 36 | } 37 | 38 | div.body { 39 | background-color: #ffffff; 40 | color: #000000; 41 | padding: 0 20px 30px 20px; 42 | } 43 | 44 | div.footer { 45 | color: #ffffff; 46 | width: 100%; 47 | padding: 9px 0 9px 0; 48 | text-align: center; 49 | font-size: 75%; 50 | } 51 | 52 | div.footer a { 53 | color: #ffffff; 54 | text-decoration: underline; 55 | } 56 | 57 | div.related { 58 | background-color: #133f52; 59 | line-height: 30px; 60 | color: #ffffff; 61 | } 62 | 63 | div.related a { 64 | color: #ffffff; 65 | } 66 | 67 | div.sphinxsidebar { 68 | } 69 | 70 | div.sphinxsidebar h3 { 71 | font-family: 'Trebuchet MS', sans-serif; 72 | color: #ffffff; 73 | font-size: 1.4em; 74 | font-weight: normal; 75 | margin: 0; 76 | padding: 0; 77 | } 78 | 79 | div.sphinxsidebar h3 a { 80 | color: #ffffff; 81 | } 82 | 83 | div.sphinxsidebar h4 { 84 | font-family: 'Trebuchet MS', sans-serif; 85 | color: #ffffff; 86 | font-size: 1.3em; 87 | font-weight: normal; 88 | margin: 5px 0 0 0; 89 | padding: 0; 90 | } 91 | 92 | div.sphinxsidebar p { 93 | color: #ffffff; 94 | } 95 | 96 | div.sphinxsidebar p.topless { 97 | margin: 5px 10px 10px 10px; 98 | } 99 | 100 | div.sphinxsidebar ul { 101 | margin: 10px; 102 | padding: 0; 103 | color: #ffffff; 104 | } 105 | 106 | div.sphinxsidebar a { 107 | color: #98dbcc; 108 | } 109 | 110 | div.sphinxsidebar input { 111 | border: 1px solid #98dbcc; 112 | font-family: sans-serif; 113 | font-size: 1em; 114 | } 115 | 116 | 117 | 118 | /* -- hyperlink styles ------------------------------------------------------ */ 119 | 120 | a { 121 | color: #355f7c; 122 | text-decoration: none; 123 | } 124 | 125 | a:visited { 126 | color: #355f7c; 127 | text-decoration: none; 128 | } 129 | 130 | a:hover { 131 | text-decoration: underline; 132 | } 133 | 134 | 135 | 136 | /* -- body styles ----------------------------------------------------------- */ 137 | 138 | div.body h1, 139 | div.body h2, 140 | div.body h3, 141 | div.body h4, 142 | div.body h5, 143 | div.body h6 { 144 | font-family: 'Trebuchet MS', sans-serif; 145 | background-color: #f2f2f2; 146 | font-weight: normal; 147 | color: #20435c; 148 | border-bottom: 1px solid #ccc; 149 | margin: 20px -20px 10px -20px; 150 | padding: 3px 0 3px 10px; 151 | } 152 | 153 | div.body h1 { margin-top: 0; font-size: 200%; } 154 | div.body h2 { font-size: 160%; } 155 | div.body h3 { font-size: 140%; } 156 | div.body h4 { font-size: 120%; } 157 | div.body h5 { font-size: 110%; } 158 | div.body h6 { font-size: 100%; } 159 | 160 | a.headerlink { 161 | color: #c60f0f; 162 | font-size: 0.8em; 163 | padding: 0 4px 0 4px; 164 | text-decoration: none; 165 | } 166 | 167 | a.headerlink:hover { 168 | background-color: #c60f0f; 169 | color: white; 170 | } 171 | 172 | div.body p, div.body dd, div.body li { 173 | text-align: justify; 174 | line-height: 130%; 175 | } 176 | 177 | div.admonition p.admonition-title + p { 178 | display: inline; 179 | } 180 | 181 | div.admonition p { 182 | margin-bottom: 5px; 183 | } 184 | 185 | div.admonition pre { 186 | margin-bottom: 5px; 187 | } 188 | 189 | div.admonition ul, div.admonition ol { 190 | margin-bottom: 5px; 191 | } 192 | 193 | div.note { 194 | background-color: #eee; 195 | border: 1px solid #ccc; 196 | } 197 | 198 | div.seealso { 199 | background-color: #ffc; 200 | border: 1px solid #ff6; 201 | } 202 | 203 | div.topic { 204 | background-color: #eee; 205 | } 206 | 207 | div.warning { 208 | background-color: #ffe4e4; 209 | border: 1px solid #f66; 210 | } 211 | 212 | p.admonition-title { 213 | display: inline; 214 | } 215 | 216 | p.admonition-title:after { 217 | content: ":"; 218 | } 219 | 220 | pre { 221 | padding: 5px; 222 | background-color: #eeffcc; 223 | color: #333333; 224 | line-height: 120%; 225 | border: 1px solid #ac9; 226 | border-left: none; 227 | border-right: none; 228 | } 229 | 230 | tt { 231 | background-color: #ecf0f3; 232 | padding: 0 1px 0 1px; 233 | font-size: 0.95em; 234 | } 235 | 236 | th { 237 | background-color: #ede; 238 | } 239 | 240 | .warning tt { 241 | background: #efc2c2; 242 | } 243 | 244 | .note tt { 245 | background: #d6d6d6; 246 | } 247 | 248 | .viewcode-back { 249 | font-family: sans-serif; 250 | } 251 | 252 | div.viewcode-block:target { 253 | background-color: #f4debf; 254 | border-top: 1px solid #ac9; 255 | border-bottom: 1px solid #ac9; 256 | } -------------------------------------------------------------------------------- /docs/_build/html/_static/doctools.js: -------------------------------------------------------------------------------- 1 | /* 2 | * doctools.js 3 | * ~~~~~~~~~~~ 4 | * 5 | * Sphinx JavaScript utilities for all documentation. 6 | * 7 | * :copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS. 8 | * :license: BSD, see LICENSE for details. 9 | * 10 | */ 11 | 12 | /** 13 | * select a different prefix for underscore 14 | */ 15 | $u = _.noConflict(); 16 | 17 | /** 18 | * make the code below compatible with browsers without 19 | * an installed firebug like debugger 20 | if (!window.console || !console.firebug) { 21 | var names = ["log", "debug", "info", "warn", "error", "assert", "dir", 22 | "dirxml", "group", "groupEnd", "time", "timeEnd", "count", "trace", 23 | "profile", "profileEnd"]; 24 | window.console = {}; 25 | for (var i = 0; i < names.length; ++i) 26 | window.console[names[i]] = function() {}; 27 | } 28 | */ 29 | 30 | /** 31 | * small helper function to urldecode strings 32 | */ 33 | jQuery.urldecode = function(x) { 34 | return decodeURIComponent(x).replace(/\+/g, ' '); 35 | }; 36 | 37 | /** 38 | * small helper function to urlencode strings 39 | */ 40 | jQuery.urlencode = encodeURIComponent; 41 | 42 | /** 43 | * This function returns the parsed url parameters of the 44 | * current request. Multiple values per key are supported, 45 | * it will always return arrays of strings for the value parts. 46 | */ 47 | jQuery.getQueryParameters = function(s) { 48 | if (typeof s == 'undefined') 49 | s = document.location.search; 50 | var parts = s.substr(s.indexOf('?') + 1).split('&'); 51 | var result = {}; 52 | for (var i = 0; i < parts.length; i++) { 53 | var tmp = parts[i].split('=', 2); 54 | var key = jQuery.urldecode(tmp[0]); 55 | var value = jQuery.urldecode(tmp[1]); 56 | if (key in result) 57 | result[key].push(value); 58 | else 59 | result[key] = [value]; 60 | } 61 | return result; 62 | }; 63 | 64 | /** 65 | * highlight a given string on a jquery object by wrapping it in 66 | * span elements with the given class name. 67 | */ 68 | jQuery.fn.highlightText = function(text, className) { 69 | function highlight(node) { 70 | if (node.nodeType == 3) { 71 | var val = node.nodeValue; 72 | var pos = val.toLowerCase().indexOf(text); 73 | if (pos >= 0 && !jQuery(node.parentNode).hasClass(className)) { 74 | var span = document.createElement("span"); 75 | span.className = className; 76 | span.appendChild(document.createTextNode(val.substr(pos, text.length))); 77 | node.parentNode.insertBefore(span, node.parentNode.insertBefore( 78 | document.createTextNode(val.substr(pos + text.length)), 79 | node.nextSibling)); 80 | node.nodeValue = val.substr(0, pos); 81 | } 82 | } 83 | else if (!jQuery(node).is("button, select, textarea")) { 84 | jQuery.each(node.childNodes, function() { 85 | highlight(this); 86 | }); 87 | } 88 | } 89 | return this.each(function() { 90 | highlight(this); 91 | }); 92 | }; 93 | 94 | /** 95 | * Small JavaScript module for the documentation. 96 | */ 97 | var Documentation = { 98 | 99 | init : function() { 100 | this.fixFirefoxAnchorBug(); 101 | this.highlightSearchWords(); 102 | this.initIndexTable(); 103 | }, 104 | 105 | /** 106 | * i18n support 107 | */ 108 | TRANSLATIONS : {}, 109 | PLURAL_EXPR : function(n) { return n == 1 ? 0 : 1; }, 110 | LOCALE : 'unknown', 111 | 112 | // gettext and ngettext don't access this so that the functions 113 | // can safely bound to a different name (_ = Documentation.gettext) 114 | gettext : function(string) { 115 | var translated = Documentation.TRANSLATIONS[string]; 116 | if (typeof translated == 'undefined') 117 | return string; 118 | return (typeof translated == 'string') ? translated : translated[0]; 119 | }, 120 | 121 | ngettext : function(singular, plural, n) { 122 | var translated = Documentation.TRANSLATIONS[singular]; 123 | if (typeof translated == 'undefined') 124 | return (n == 1) ? singular : plural; 125 | return translated[Documentation.PLURALEXPR(n)]; 126 | }, 127 | 128 | addTranslations : function(catalog) { 129 | for (var key in catalog.messages) 130 | this.TRANSLATIONS[key] = catalog.messages[key]; 131 | this.PLURAL_EXPR = new Function('n', 'return +(' + catalog.plural_expr + ')'); 132 | this.LOCALE = catalog.locale; 133 | }, 134 | 135 | /** 136 | * add context elements like header anchor links 137 | */ 138 | addContextElements : function() { 139 | $('div[id] > :header:first').each(function() { 140 | $('\u00B6'). 141 | attr('href', '#' + this.id). 142 | attr('title', _('Permalink to this headline')). 143 | appendTo(this); 144 | }); 145 | $('dt[id]').each(function() { 146 | $('\u00B6'). 147 | attr('href', '#' + this.id). 148 | attr('title', _('Permalink to this definition')). 149 | appendTo(this); 150 | }); 151 | }, 152 | 153 | /** 154 | * workaround a firefox stupidity 155 | */ 156 | fixFirefoxAnchorBug : function() { 157 | if (document.location.hash && $.browser.mozilla) 158 | window.setTimeout(function() { 159 | document.location.href += ''; 160 | }, 10); 161 | }, 162 | 163 | /** 164 | * highlight the search words provided in the url in the text 165 | */ 166 | highlightSearchWords : function() { 167 | var params = $.getQueryParameters(); 168 | var terms = (params.highlight) ? params.highlight[0].split(/\s+/) : []; 169 | if (terms.length) { 170 | var body = $('div.body'); 171 | if (!body.length) { 172 | body = $('body'); 173 | } 174 | window.setTimeout(function() { 175 | $.each(terms, function() { 176 | body.highlightText(this.toLowerCase(), 'highlighted'); 177 | }); 178 | }, 10); 179 | $('') 181 | .appendTo($('#searchbox')); 182 | } 183 | }, 184 | 185 | /** 186 | * init the domain index toggle buttons 187 | */ 188 | initIndexTable : function() { 189 | var togglers = $('img.toggler').click(function() { 190 | var src = $(this).attr('src'); 191 | var idnum = $(this).attr('id').substr(7); 192 | $('tr.cg-' + idnum).toggle(); 193 | if (src.substr(-9) == 'minus.png') 194 | $(this).attr('src', src.substr(0, src.length-9) + 'plus.png'); 195 | else 196 | $(this).attr('src', src.substr(0, src.length-8) + 'minus.png'); 197 | }).css('display', ''); 198 | if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) { 199 | togglers.click(); 200 | } 201 | }, 202 | 203 | /** 204 | * helper function to hide the search marks again 205 | */ 206 | hideSearchWords : function() { 207 | $('#searchbox .highlight-link').fadeOut(300); 208 | $('span.highlighted').removeClass('highlighted'); 209 | }, 210 | 211 | /** 212 | * make the url absolute 213 | */ 214 | makeURL : function(relativeURL) { 215 | return DOCUMENTATION_OPTIONS.URL_ROOT + '/' + relativeURL; 216 | }, 217 | 218 | /** 219 | * get the current relative url 220 | */ 221 | getCurrentURL : function() { 222 | var path = document.location.pathname; 223 | var parts = path.split(/\//); 224 | $.each(DOCUMENTATION_OPTIONS.URL_ROOT.split(/\//), function() { 225 | if (this == '..') 226 | parts.pop(); 227 | }); 228 | var url = parts.join('/'); 229 | return path.substring(url.lastIndexOf('/') + 1, path.length - 1); 230 | } 231 | }; 232 | 233 | // quick alias for translations 234 | _ = Documentation.gettext; 235 | 236 | $(document).ready(function() { 237 | Documentation.init(); 238 | }); 239 | -------------------------------------------------------------------------------- /docs/_build/html/_static/down-pressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/html/_static/down-pressed.png -------------------------------------------------------------------------------- /docs/_build/html/_static/down.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/html/_static/down.png -------------------------------------------------------------------------------- /docs/_build/html/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/html/_static/file.png -------------------------------------------------------------------------------- /docs/_build/html/_static/js/jquery-fix.js: -------------------------------------------------------------------------------- 1 | // No Conflict in later (our) version of jQuery 2 | window.$jqTheme = jQuery.noConflict(true); -------------------------------------------------------------------------------- /docs/_build/html/_static/minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/html/_static/minus.png -------------------------------------------------------------------------------- /docs/_build/html/_static/plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/html/_static/plus.png -------------------------------------------------------------------------------- /docs/_build/html/_static/pygments.css: -------------------------------------------------------------------------------- 1 | .highlight .hll { background-color: #ffffcc } 2 | .highlight { background: #eeffcc; } 3 | .highlight .c { color: #408090; font-style: italic } /* Comment */ 4 | .highlight .err { border: 1px solid #FF0000 } /* Error */ 5 | .highlight .k { color: #007020; font-weight: bold } /* Keyword */ 6 | .highlight .o { color: #666666 } /* Operator */ 7 | .highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */ 8 | .highlight .cp { color: #007020 } /* Comment.Preproc */ 9 | .highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */ 10 | .highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */ 11 | .highlight .gd { color: #A00000 } /* Generic.Deleted */ 12 | .highlight .ge { font-style: italic } /* Generic.Emph */ 13 | .highlight .gr { color: #FF0000 } /* Generic.Error */ 14 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ 15 | .highlight .gi { color: #00A000 } /* Generic.Inserted */ 16 | .highlight .go { color: #333333 } /* Generic.Output */ 17 | .highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */ 18 | .highlight .gs { font-weight: bold } /* Generic.Strong */ 19 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ 20 | .highlight .gt { color: #0044DD } /* Generic.Traceback */ 21 | .highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */ 22 | .highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */ 23 | .highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */ 24 | .highlight .kp { color: #007020 } /* Keyword.Pseudo */ 25 | .highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */ 26 | .highlight .kt { color: #902000 } /* Keyword.Type */ 27 | .highlight .m { color: #208050 } /* Literal.Number */ 28 | .highlight .s { color: #4070a0 } /* Literal.String */ 29 | .highlight .na { color: #4070a0 } /* Name.Attribute */ 30 | .highlight .nb { color: #007020 } /* Name.Builtin */ 31 | .highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */ 32 | .highlight .no { color: #60add5 } /* Name.Constant */ 33 | .highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */ 34 | .highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */ 35 | .highlight .ne { color: #007020 } /* Name.Exception */ 36 | .highlight .nf { color: #06287e } /* Name.Function */ 37 | .highlight .nl { color: #002070; font-weight: bold } /* Name.Label */ 38 | .highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */ 39 | .highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */ 40 | .highlight .nv { color: #bb60d5 } /* Name.Variable */ 41 | .highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */ 42 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */ 43 | .highlight .mf { color: #208050 } /* Literal.Number.Float */ 44 | .highlight .mh { color: #208050 } /* Literal.Number.Hex */ 45 | .highlight .mi { color: #208050 } /* Literal.Number.Integer */ 46 | .highlight .mo { color: #208050 } /* Literal.Number.Oct */ 47 | .highlight .sb { color: #4070a0 } /* Literal.String.Backtick */ 48 | .highlight .sc { color: #4070a0 } /* Literal.String.Char */ 49 | .highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */ 50 | .highlight .s2 { color: #4070a0 } /* Literal.String.Double */ 51 | .highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */ 52 | .highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */ 53 | .highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */ 54 | .highlight .sx { color: #c65d09 } /* Literal.String.Other */ 55 | .highlight .sr { color: #235388 } /* Literal.String.Regex */ 56 | .highlight .s1 { color: #4070a0 } /* Literal.String.Single */ 57 | .highlight .ss { color: #517918 } /* Literal.String.Symbol */ 58 | .highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */ 59 | .highlight .vc { color: #bb60d5 } /* Name.Variable.Class */ 60 | .highlight .vg { color: #bb60d5 } /* Name.Variable.Global */ 61 | .highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */ 62 | .highlight .il { color: #208050 } /* Literal.Number.Integer.Long */ -------------------------------------------------------------------------------- /docs/_build/html/_static/sidebar.js: -------------------------------------------------------------------------------- 1 | /* 2 | * sidebar.js 3 | * ~~~~~~~~~~ 4 | * 5 | * This script makes the Sphinx sidebar collapsible. 6 | * 7 | * .sphinxsidebar contains .sphinxsidebarwrapper. This script adds 8 | * in .sphixsidebar, after .sphinxsidebarwrapper, the #sidebarbutton 9 | * used to collapse and expand the sidebar. 10 | * 11 | * When the sidebar is collapsed the .sphinxsidebarwrapper is hidden 12 | * and the width of the sidebar and the margin-left of the document 13 | * are decreased. When the sidebar is expanded the opposite happens. 14 | * This script saves a per-browser/per-session cookie used to 15 | * remember the position of the sidebar among the pages. 16 | * Once the browser is closed the cookie is deleted and the position 17 | * reset to the default (expanded). 18 | * 19 | * :copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS. 20 | * :license: BSD, see LICENSE for details. 21 | * 22 | */ 23 | 24 | $(function() { 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | // global elements used by the functions. 34 | // the 'sidebarbutton' element is defined as global after its 35 | // creation, in the add_sidebar_button function 36 | var bodywrapper = $('.bodywrapper'); 37 | var sidebar = $('.sphinxsidebar'); 38 | var sidebarwrapper = $('.sphinxsidebarwrapper'); 39 | 40 | // for some reason, the document has no sidebar; do not run into errors 41 | if (!sidebar.length) return; 42 | 43 | // original margin-left of the bodywrapper and width of the sidebar 44 | // with the sidebar expanded 45 | var bw_margin_expanded = bodywrapper.css('margin-left'); 46 | var ssb_width_expanded = sidebar.width(); 47 | 48 | // margin-left of the bodywrapper and width of the sidebar 49 | // with the sidebar collapsed 50 | var bw_margin_collapsed = '.8em'; 51 | var ssb_width_collapsed = '.8em'; 52 | 53 | // colors used by the current theme 54 | var dark_color = $('.related').css('background-color'); 55 | var light_color = $('.document').css('background-color'); 56 | 57 | function sidebar_is_collapsed() { 58 | return sidebarwrapper.is(':not(:visible)'); 59 | } 60 | 61 | function toggle_sidebar() { 62 | if (sidebar_is_collapsed()) 63 | expand_sidebar(); 64 | else 65 | collapse_sidebar(); 66 | } 67 | 68 | function collapse_sidebar() { 69 | sidebarwrapper.hide(); 70 | sidebar.css('width', ssb_width_collapsed); 71 | bodywrapper.css('margin-left', bw_margin_collapsed); 72 | sidebarbutton.css({ 73 | 'margin-left': '0', 74 | 'height': bodywrapper.height() 75 | }); 76 | sidebarbutton.find('span').text('»'); 77 | sidebarbutton.attr('title', _('Expand sidebar')); 78 | document.cookie = 'sidebar=collapsed'; 79 | } 80 | 81 | function expand_sidebar() { 82 | bodywrapper.css('margin-left', bw_margin_expanded); 83 | sidebar.css('width', ssb_width_expanded); 84 | sidebarwrapper.show(); 85 | sidebarbutton.css({ 86 | 'margin-left': ssb_width_expanded-12, 87 | 'height': bodywrapper.height() 88 | }); 89 | sidebarbutton.find('span').text('«'); 90 | sidebarbutton.attr('title', _('Collapse sidebar')); 91 | document.cookie = 'sidebar=expanded'; 92 | } 93 | 94 | function add_sidebar_button() { 95 | sidebarwrapper.css({ 96 | 'float': 'left', 97 | 'margin-right': '0', 98 | 'width': ssb_width_expanded - 28 99 | }); 100 | // create the button 101 | sidebar.append( 102 | '
    «
    ' 103 | ); 104 | var sidebarbutton = $('#sidebarbutton'); 105 | light_color = sidebarbutton.css('background-color'); 106 | // find the height of the viewport to center the '<<' in the page 107 | var viewport_height; 108 | if (window.innerHeight) 109 | viewport_height = window.innerHeight; 110 | else 111 | viewport_height = $(window).height(); 112 | sidebarbutton.find('span').css({ 113 | 'display': 'block', 114 | 'margin-top': (viewport_height - sidebar.position().top - 20) / 2 115 | }); 116 | 117 | sidebarbutton.click(toggle_sidebar); 118 | sidebarbutton.attr('title', _('Collapse sidebar')); 119 | sidebarbutton.css({ 120 | 'color': '#FFFFFF', 121 | 'border-left': '1px solid ' + dark_color, 122 | 'font-size': '1.2em', 123 | 'cursor': 'pointer', 124 | 'height': bodywrapper.height(), 125 | 'padding-top': '1px', 126 | 'margin-left': ssb_width_expanded - 12 127 | }); 128 | 129 | sidebarbutton.hover( 130 | function () { 131 | $(this).css('background-color', dark_color); 132 | }, 133 | function () { 134 | $(this).css('background-color', light_color); 135 | } 136 | ); 137 | } 138 | 139 | function set_position_from_cookie() { 140 | if (!document.cookie) 141 | return; 142 | var items = document.cookie.split(';'); 143 | for(var k=0; k 2 | 3 | 4 | 5 | 6 | 7 | 8 | config package — Grimlock 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 136 | 137 |
    138 |
    139 |
    140 | 141 |
    142 |

    config package

    143 |
    144 |

    Submodules

    145 |
    146 |
    147 |

    config.settings module

    148 |
    149 |
    150 |

    Module contents

    151 |
    152 |
    153 | 154 | 155 |
    156 | 157 |
    158 |
    159 |
    160 |
    161 |

    162 | Back to top 163 | 164 |

    165 |

    166 | © Copyright 2014, Author.
    167 | Created using Sphinx 1.2.1.
    168 |

    169 |
    170 |
    171 | 172 | -------------------------------------------------------------------------------- /docs/_build/html/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Welcome to Grimlock's documentation! 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 127 | 128 |
    129 |
    130 |
    131 | 132 |
    133 |

    Welcome to Grimlock's documentation!

    134 |

    Contents:

    135 |
    136 | 153 |
    154 |
    155 |
    156 |

    Indices and tables

    157 | 162 |
    163 | 164 | 165 |
    166 | 167 |
    168 |
    169 |
    170 |
    171 |

    172 | Back to top 173 | 174 |

    175 |

    176 | © Copyright 2014, Author.
    177 | Created using Sphinx 1.2.1.
    178 |

    179 |
    180 |
    181 | 182 | -------------------------------------------------------------------------------- /docs/_build/html/modules.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Grimlock — Grimlock 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 121 | 122 |
    123 |
    124 |
    125 | 126 |
    127 |

    Grimlock

    128 |
    129 | 154 |
    155 |
    156 | 157 | 158 |
    159 | 160 |
    161 |
    162 |
    163 |
    164 |

    165 | Back to top 166 | 167 |

    168 |

    169 | © Copyright 2014, Author.
    170 | Created using Sphinx 1.2.1.
    171 |

    172 |
    173 |
    174 | 175 | -------------------------------------------------------------------------------- /docs/_build/html/objects.inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/docs/_build/html/objects.inv -------------------------------------------------------------------------------- /docs/_build/html/search.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Search — Grimlock 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 115 | 116 |
    117 |
    118 |
    119 | 120 |

    Search

    121 |
    122 | 123 |

    124 | Please activate JavaScript to enable the search 125 | functionality. 126 |

    127 |
    128 |

    129 | From here you can search these documents. Enter your search 130 | words into the box below and click "search". Note that the search 131 | function will automatically search for all of the words. Pages 132 | containing fewer words won't appear in the result list. 133 |

    134 | 135 | 136 |
    137 |
    138 | 139 |
    140 | 141 | 142 |
    143 | 144 | 145 | 146 |
    147 | 148 |
    149 | 150 |
    151 | 152 |
    153 |
    154 |
    155 |
    156 |

    157 | Back to top 158 | 159 |

    160 |

    161 | © Copyright 2014, Author.
    162 | Created using Sphinx 1.2.1.
    163 |

    164 |
    165 |
    166 | 167 | -------------------------------------------------------------------------------- /docs/_build/html/searchindex.js: -------------------------------------------------------------------------------- 1 | Search.setIndex({envversion:42,terms:{composit:[7,1],chain:[7,1],consum:[7,1],through:[7,4],follow:[7,1],paramet:[7,1],onli:[2,3],execut:[7,1,4],singl:[7,1,2,4,3],should:[7,4],requir:[2,3],save:[7,4],func2:[7,1],func1:[7,1],sourc:[7,1,2,4,3],"return":[7,1,2,4,3],downstream:[2,3],initi:[7,1],returnvaluefromfunc1:[7,1],laros:[7,1],term:[7,1],revers:[7,1],list:[7,1],borrow:[7,1],item:[7,4],either:[7,4],each:[7,1,4],where:[7,1],page:0,pass:[7,1,4],our:[7,1],index:0,what:[7,1],databas:[7,4],version:[7,4],"import":[7,1],ever:[7,1],method:[7,4,2,3],math:[7,1],whatev:[2,3],previou:[7,1],run:[7,4,2,3],insid:[2,3],job:[7,4],base:[7,4],modif:[2,3],modifi:[7,4],valu:[7,1,4],search:0,just:[2,3],queue:[7,4],mani:[7,1],set_pipeline_step:[7,4],produc:[7,1],first:[7,1],feed:[7,1,4],arrai:[],prefer:[7,1],transform:[2,3],poll:[7,4],your:[2,3],mathieu:[7,1],from:[7,1,4],compos:[7,1],start:[7,4],call:[7,1,4],subscrib:[7,4],friend:[7,1],listen:[7,4],"function":[7,1,4],task1:[7,1],task2:[7,1],worker:[7,4],must:[7,1],retriev:[7,4],augment:[2,3],work:[7,4],structur:[2,3],defin:[7,1,4],can:[7,1,2,3],def:[7,1],underscorej:[7,1],want:[7,1,2,3],process:[7,1,4],arg:[],argument:[7,4,2,3],accept:[7,1,2,4,3],good:[7,1],example2:[],have:[7,4],further:[2,3],probabl:[7,4],receiv:[7,4],make:[2,3],which:[7,4],you:[7,1,2,3],channel:[7,4],updat:[2,3],pipelin:[],sequenc:[7,1],object:[7,4],datastor:[2,3],data:[7,1,2,4,3],"class":[7,4],subpackag:[],task:[],takeawai:[7,1],liner:[7,1],exampl:[],thi:[2,3],order:[7,1,4]},objtypes:{"0":"py:module","1":"py:method","2":"py:function","3":"py:class"},objnames:{"0":["py","module","Python module"],"1":["py","method","Python method"],"2":["py","function","Python function"],"3":["py","class","Python class"]},filenames:["index","pipeline","tasks","src.tasks","app","modules","src.config","src","config"],titles:["Welcome to src’s documentation!","pipeline module","tasks package","src.tasks package","app module","Grimlock","src.config package","src package","config package"],objects:{"":{src:[7,0,0,"-"],pipeline:[1,0,0,"-"],tasks:[2,0,0,"-"],config:[8,0,0,"-"],app:[4,0,0,"-"]},src:{app:[7,0,0,"-"],tasks:[3,0,0,"-"],config:[6,0,0,"-"],pipeline:[7,0,0,"-"]},pipeline:{process:[1,2,1,""],compose:[1,2,1,""]},tasks:{example2:[2,0,0,"-"],example:[2,0,0,"-"]},"src.app":{source:[7,2,1,""],App:[7,3,1,""],set_pipeline_steps:[7,2,1,""]},"tasks.example2":{run:[2,2,1,""]},"src.tasks":{example2:[3,0,0,"-"],example:[3,0,0,"-"]},"app.App":{start:[4,1,1,""],work:[4,1,1,""]},app:{source:[4,2,1,""],App:[4,3,1,""],set_pipeline_steps:[4,2,1,""]},"src.tasks.example2":{run:[3,2,1,""]},"tasks.example":{run:[2,2,1,""]},"src.tasks.example":{run:[3,2,1,""]},"src.config":{settings:[6,0,0,"-"]},config:{settings:[8,0,0,"-"]},"src.app.App":{start:[7,1,1,""],work:[7,1,1,""]},"src.pipeline":{process:[7,2,1,""],compose:[7,2,1,""]}},titleterms:{subpackag:7,src:[6,0,7,3],pipelin:[7,1],set:[6,8],grimlock:5,welcom:0,app:[7,4],submodul:[6,7,2,8,3],indic:0,task:[2,3],content:[6,7,2,8,3],packag:[6,7,2,8,3],exampl:[2,3],example2:[2,3],tabl:0,document:0,config:[6,8],modul:[7,1,2,3,4,6,8]}}) -------------------------------------------------------------------------------- /docs/_build/html/src.config.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | src.config package — Grimlock 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 126 | 127 |
    128 |
    129 |
    130 | 131 |
    132 |

    src.config package

    133 |
    134 |

    Submodules

    135 |
    136 |
    137 |

    src.config.settings module

    138 |
    139 |
    140 |

    Module contents

    141 |
    142 |
    143 | 144 | 145 |
    146 | 147 |
    148 |
    149 |
    150 |
    151 |

    152 | Back to top 153 | 154 |

    155 |

    156 | © Copyright 2014, Author.
    157 | Created using Sphinx 1.2.1.
    158 |

    159 |
    160 |
    161 | 162 | -------------------------------------------------------------------------------- /docs/_build/html/src.tasks.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | src.tasks package — Grimlock 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 127 | 128 |
    129 |
    130 |
    131 | 132 |
    133 |

    src.tasks package

    134 |
    135 |

    Submodules

    136 |
    137 |
    138 |

    src.tasks.example module

    139 |

    This is just an example. Inside the run method of a task you can do whatever 140 | you want. The only requirements are that your run method accepts a single 141 | argument - the structure to be transformed or augmented - and returns the 142 | transformed/augmented structure so that downstream tasks can make further 143 | modifications or update the document in the datastore.

    144 |
    145 |
    146 | src.tasks.example.run(data)[source]
    147 |
    148 | 149 |
    150 |
    151 |

    src.tasks.example2 module

    152 |
    153 |
    154 | src.tasks.example2.run(data)[source]
    155 |
    156 | 157 |
    158 |
    159 |

    Module contents

    160 |
    161 |
    162 | 163 | 164 |
    165 | 166 |
    167 |
    168 |
    169 |
    170 |

    171 | Back to top 172 | 173 |

    174 |

    175 | © Copyright 2014, Author.
    176 | Created using Sphinx 1.2.1.
    177 |

    178 |
    179 |
    180 | 181 | -------------------------------------------------------------------------------- /docs/_build/html/tasks.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | tasks package — Grimlock 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 132 | 133 |
    134 |
    135 |
    136 | 137 |
    138 |

    tasks package

    139 |
    140 |

    Submodules

    141 |
    142 |
    143 |

    tasks.example module

    144 |

    This is just an example. Inside the run method of a task you can do whatever 145 | you want. The only requirements are that your run method accepts a single 146 | argument - the structure to be transformed or augmented - and returns the 147 | transformed/augmented structure so that downstream tasks can make further 148 | modifications or update the document in the datastore.

    149 |
    150 |
    151 | tasks.example.run(data)[source]
    152 |
    153 | 154 |
    155 |
    156 |

    tasks.example2 module

    157 |
    158 |
    159 | tasks.example2.run(data)[source]
    160 |
    161 | 162 |
    163 |
    164 |

    Module contents

    165 |
    166 |
    167 | 168 | 169 |
    170 | 171 |
    172 |
    173 |
    174 |
    175 |

    176 | Back to top 177 | 178 |

    179 |

    180 | © Copyright 2014, Author.
    181 | Created using Sphinx 1.2.1.
    182 |

    183 |
    184 |
    185 | 186 | -------------------------------------------------------------------------------- /docs/app.rst: -------------------------------------------------------------------------------- 1 | app module 2 | ========== 3 | 4 | .. automodule:: app 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/config.rst: -------------------------------------------------------------------------------- 1 | config package 2 | ============== 3 | 4 | Submodules 5 | ---------- 6 | 7 | config.settings module 8 | ---------------------- 9 | 10 | .. automodule:: config.settings 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: config 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. src documentation master file, created by 2 | sphinx-quickstart on Sat Feb 15 20:43:34 2014. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Grimlock's documentation! 7 | =============================== 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 4 13 | 14 | app 15 | config 16 | pipeline 17 | tasks 18 | 19 | 20 | Indices and tables 21 | ================== 22 | 23 | * :ref:`genindex` 24 | * :ref:`modindex` 25 | * :ref:`search` 26 | 27 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | goto end 41 | ) 42 | 43 | if "%1" == "clean" ( 44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 45 | del /q /s %BUILDDIR%\* 46 | goto end 47 | ) 48 | 49 | 50 | %SPHINXBUILD% 2> nul 51 | if errorlevel 9009 ( 52 | echo. 53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 54 | echo.installed, then set the SPHINXBUILD environment variable to point 55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 56 | echo.may add the Sphinx directory to PATH. 57 | echo. 58 | echo.If you don't have Sphinx installed, grab it from 59 | echo.http://sphinx-doc.org/ 60 | exit /b 1 61 | ) 62 | 63 | if "%1" == "html" ( 64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 68 | goto end 69 | ) 70 | 71 | if "%1" == "dirhtml" ( 72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 76 | goto end 77 | ) 78 | 79 | if "%1" == "singlehtml" ( 80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 84 | goto end 85 | ) 86 | 87 | if "%1" == "pickle" ( 88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can process the pickle files. 92 | goto end 93 | ) 94 | 95 | if "%1" == "json" ( 96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 97 | if errorlevel 1 exit /b 1 98 | echo. 99 | echo.Build finished; now you can process the JSON files. 100 | goto end 101 | ) 102 | 103 | if "%1" == "htmlhelp" ( 104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 105 | if errorlevel 1 exit /b 1 106 | echo. 107 | echo.Build finished; now you can run HTML Help Workshop with the ^ 108 | .hhp project file in %BUILDDIR%/htmlhelp. 109 | goto end 110 | ) 111 | 112 | if "%1" == "qthelp" ( 113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 117 | .qhcp project file in %BUILDDIR%/qthelp, like this: 118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\src.qhcp 119 | echo.To view the help file: 120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\src.ghc 121 | goto end 122 | ) 123 | 124 | if "%1" == "devhelp" ( 125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished. 129 | goto end 130 | ) 131 | 132 | if "%1" == "epub" ( 133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 137 | goto end 138 | ) 139 | 140 | if "%1" == "latex" ( 141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 145 | goto end 146 | ) 147 | 148 | if "%1" == "latexpdf" ( 149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 150 | cd %BUILDDIR%/latex 151 | make all-pdf 152 | cd %BUILDDIR%/.. 153 | echo. 154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 155 | goto end 156 | ) 157 | 158 | if "%1" == "latexpdfja" ( 159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 160 | cd %BUILDDIR%/latex 161 | make all-pdf-ja 162 | cd %BUILDDIR%/.. 163 | echo. 164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 165 | goto end 166 | ) 167 | 168 | if "%1" == "text" ( 169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 170 | if errorlevel 1 exit /b 1 171 | echo. 172 | echo.Build finished. The text files are in %BUILDDIR%/text. 173 | goto end 174 | ) 175 | 176 | if "%1" == "man" ( 177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 178 | if errorlevel 1 exit /b 1 179 | echo. 180 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 181 | goto end 182 | ) 183 | 184 | if "%1" == "texinfo" ( 185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 186 | if errorlevel 1 exit /b 1 187 | echo. 188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 189 | goto end 190 | ) 191 | 192 | if "%1" == "gettext" ( 193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 194 | if errorlevel 1 exit /b 1 195 | echo. 196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 197 | goto end 198 | ) 199 | 200 | if "%1" == "changes" ( 201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 202 | if errorlevel 1 exit /b 1 203 | echo. 204 | echo.The overview file is in %BUILDDIR%/changes. 205 | goto end 206 | ) 207 | 208 | if "%1" == "linkcheck" ( 209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 210 | if errorlevel 1 exit /b 1 211 | echo. 212 | echo.Link check complete; look for any errors in the above output ^ 213 | or in %BUILDDIR%/linkcheck/output.txt. 214 | goto end 215 | ) 216 | 217 | if "%1" == "doctest" ( 218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 219 | if errorlevel 1 exit /b 1 220 | echo. 221 | echo.Testing of doctests in the sources finished, look at the ^ 222 | results in %BUILDDIR%/doctest/output.txt. 223 | goto end 224 | ) 225 | 226 | if "%1" == "xml" ( 227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 228 | if errorlevel 1 exit /b 1 229 | echo. 230 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 231 | goto end 232 | ) 233 | 234 | if "%1" == "pseudoxml" ( 235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 236 | if errorlevel 1 exit /b 1 237 | echo. 238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 239 | goto end 240 | ) 241 | 242 | :end 243 | -------------------------------------------------------------------------------- /docs/modules.rst: -------------------------------------------------------------------------------- 1 | Grimlock 2 | ======== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | src 8 | -------------------------------------------------------------------------------- /docs/pipeline.rst: -------------------------------------------------------------------------------- 1 | pipeline module 2 | =============== 3 | 4 | .. automodule:: pipeline 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/src.config.rst: -------------------------------------------------------------------------------- 1 | src.config package 2 | ================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | src.config.settings module 8 | -------------------------- 9 | 10 | .. automodule:: src.config.settings 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: src.config 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/src.rst: -------------------------------------------------------------------------------- 1 | src package 2 | =========== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | src.config 10 | src.tasks 11 | 12 | Submodules 13 | ---------- 14 | 15 | src.app module 16 | -------------- 17 | 18 | .. automodule:: src.app 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | src.pipeline module 24 | ------------------- 25 | 26 | .. automodule:: src.pipeline 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: src 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/src.tasks.rst: -------------------------------------------------------------------------------- 1 | src.tasks package 2 | ================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | src.tasks.example module 8 | ------------------------ 9 | 10 | .. automodule:: src.tasks.example 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | src.tasks.example2 module 16 | ------------------------- 17 | 18 | .. automodule:: src.tasks.example2 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: src.tasks 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/tasks.rst: -------------------------------------------------------------------------------- 1 | tasks package 2 | ============= 3 | 4 | Submodules 5 | ---------- 6 | 7 | tasks.example module 8 | -------------------- 9 | 10 | .. automodule:: tasks.example 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | tasks.example2 module 16 | --------------------- 17 | 18 | .. automodule:: tasks.example2 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: tasks 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /etc/grimlock.conf: -------------------------------------------------------------------------------- 1 | description "transformer" 2 | start on runlevel [2345] 3 | stop on runlevel [!2345] 4 | script 5 | GRIMLOCK=staging 6 | export GRIMLOCK 7 | exec /home/crisisnet/grimlock/venv/bin/python /home/crisisnet/grimlock/src/app.py --logto /var/log/grimlock.log 8 | end script 9 | respawn -------------------------------------------------------------------------------- /etc/grimlock_prod.conf: -------------------------------------------------------------------------------- 1 | instance $INST 2 | 3 | description "transformer" 4 | start on runlevel [2345] 5 | stop on runlevel [!2345] 6 | script 7 | GRIMLOCK=production 8 | export GRIMLOCK 9 | exec /home/crisisnet/grimlock/venv/bin/python /home/crisisnet/grimlock/src/app.py --logto /var/log/grimlock.log 10 | end script 11 | respawn -------------------------------------------------------------------------------- /fabfile.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import deploy_config 4 | 5 | # Add current directory to path. 6 | local_dir = os.path.dirname(__file__) 7 | sys.path.append(local_dir) 8 | 9 | from fabric.api import * 10 | 11 | path = '/home/crisisnet/grimlock' 12 | venv = '/home/crisisnet/grimlock/venv' 13 | release_file = '/home/crisisnet/releases.grimlock' 14 | 15 | @task 16 | def staging(): 17 | env.host_string = deploy_config.STAGING_HOST 18 | env.user = deploy_config.STAGING_USER 19 | env.password = deploy_config.STAGING_PASSWORD 20 | env.key_filename = '' 21 | env.branch = 'development' 22 | env.upstart_script = 'grimlock' 23 | env.settings_file = 'staging_settings.py' 24 | env.app_env = 'staging' 25 | env.num_workers = 1 26 | 27 | 28 | @task 29 | def production(): 30 | env.host_string = deploy_config.PROD_HOST 31 | env.user = deploy_config.PROD_USER 32 | env.password = deploy_config.PROD_PASSWORD 33 | env.branch = 'master' 34 | env.upstart_script = 'grimlock_prod' 35 | env.settings_file = 'production_settings.py' 36 | env.app_env = 'production' 37 | env.port = 15922 38 | env.num_workers = 4 39 | 40 | 41 | def install_deps(): 42 | """ 43 | Installs os and base packages. 44 | """ 45 | deps = ['build-essential python-dev python-pip libevent-dev libpq-dev libxml2-dev libxslt1-dev git'] 46 | for dep in deps: 47 | sudo('apt-get install -y %s' % dep) 48 | sudo('pip install virtualenv') 49 | 50 | 51 | def check_upstart(): 52 | """ 53 | Checks if uwsgi upstart exists; if not, upstart job is created. 54 | If it exists and is different from the checked-in version, it's updated. 55 | """ 56 | conf = env.upstart_script+'.conf' 57 | sudo('test -f /etc/init/'+conf+' || cp etc/'+conf+' /etc/init') 58 | sudo('diff etc/'+conf+' /etc/init/'+conf+' || cp etc/'+conf+' /etc/init') 59 | 60 | 61 | @task 62 | @parallel 63 | def deploy(branch=None): 64 | branch = branch or env.branch 65 | install_deps() 66 | # Check for first deploy. 67 | run("test -d %s || git clone https://github.com/ushahidi/grimlock.git %s" % (path, path)) 68 | 69 | # Check for virtualenv. 70 | run('test -d %s || virtualenv %s' % (venv, venv)) 71 | 72 | with cd(path): 73 | #run('git branch --set-upstream %s origin/%s' % (branch, branch)) 74 | do_release(branch) 75 | record_release() 76 | 77 | 78 | 79 | def copy_private_files(): 80 | """ 81 | Files that we shouldn't include in the public repo because they contain 82 | sensitive information (third-party service API keys, db connect info, etc) 83 | """ 84 | settings_file = '/src/config/' + env.settings_file 85 | put(local_dir + settings_file,path + settings_file,mirror_local_mode=True) 86 | 87 | 88 | def do_release(branch): 89 | run('git fetch') 90 | run('git checkout %s && git pull' % branch) 91 | with prefix('source %s/bin/activate' % venv): 92 | run('pip install -r requirements.txt') 93 | run('python -m nltk.downloader maxent_ne_chunker') 94 | run('python -m nltk.downloader words') 95 | run('python -m nltk.downloader treebank') 96 | run('python -m nltk.downloader maxent_treebank_pos_tagger') 97 | copy_private_files() 98 | check_upstart() 99 | 100 | for i in range(env.num_workers): 101 | sudo('service '+env.upstart_script+' stop INST='+str(i)+'; service '+env.upstart_script+' start INST='+str(i)+' GRIMLOCK='+env.app_env) 102 | 103 | 104 | def record_release(): 105 | """ 106 | Records the git commit version so that we can rollback. 107 | """ 108 | current_release = run("git rev-parse HEAD") 109 | # Note that this uses warn_only kwarg which will still fail in older 110 | # versions of fabric. 111 | last_release = run("tail -n 1 %s" % release_file, warn_only=True) 112 | if last_release.failed: 113 | run("echo %s > %s" % (current_release, release_file)) 114 | elif current_release != last_release: 115 | run("echo %s >> %s" % (current_release, release_file)) 116 | 117 | 118 | @task 119 | @parallel 120 | def rollback(num=1): 121 | """ 122 | Rollsback git version to a previous release. 123 | """ 124 | num = num + 1 125 | with cd(path): 126 | release_version = run("tail -n %s %s | head -n 1" % (num, release_file)) 127 | run('git checkout %s' % release_version) 128 | do_release() 129 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | redis==2.9.1 2 | Sphinx==1.2.1 3 | sphinx-bootstrap-theme==0.3.9 4 | qr==0.6.0 5 | mongokit==0.9.1.1 6 | nose==1.3.0 7 | requests==2.2.1 8 | langid==1.1.4dev 9 | geograpy==0.3.7 10 | elasticsearch==1.0.0 11 | jellyfish==0.2.2 12 | microsofttranslator==0.4 13 | goslate==1.2.0 14 | newspaper==0.0.6 15 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/src/__init__.py -------------------------------------------------------------------------------- /src/app.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | import sys 4 | import json 5 | from datetime import datetime 6 | from qr import Queue 7 | from config import settings 8 | from pipeline import process 9 | from tasks import (geocode, format_address, update_doc, identify_language, 10 | add_default_values, reverse_geocode, extract_place, translate_content, 11 | relevance_classifier, extract_content, donation_classifier, image_tagger) 12 | from cn_store_py.connect import get_connection 13 | from cn_search_py.connect import (setup_indexes, 14 | get_connection as get_search_connection) 15 | from cn_search_py.collections import ItemCollection 16 | 17 | from bson import objectid 18 | 19 | logger = logging.getLogger('grimlock') 20 | 21 | 22 | def source(item_collection, doc_id): 23 | """ Returns the function that will be called to feed data into the 24 | pipeline. 25 | 26 | """ 27 | def get_doc(): 28 | search_params = [ 29 | { 30 | 'field':'_id', 31 | 'value': doc_id 32 | } 33 | ] 34 | #print "Processing doc " + str(id) 35 | 36 | # try 37 | # except does not exist 38 | # refresh and try again 39 | 40 | doc = item_collection.get(search_params) 41 | return doc 42 | #return db.Item.find_one() 43 | 44 | return get_doc 45 | 46 | 47 | def set_pipeline_steps(steps, **kwargs): 48 | """ Define the order in which tasks should be executed in the pipeline. Each 49 | task module should have a `run` method, which accepts a single argument 50 | and either returns a value (probably a modified version of the object it 51 | received) or saves to the database. 52 | 53 | """ 54 | 55 | return [mod.setup(**kwargs) if hasattr(mod, 'setup') else mod.run for mod in steps] 56 | 57 | 58 | default_tasks = [ 59 | add_default_values, 60 | extract_content, 61 | image_tagger, 62 | identify_language, 63 | translate_content, 64 | extract_place, 65 | relevance_classifier, 66 | #donation_classifier, 67 | format_address, 68 | geocode, 69 | reverse_geocode, 70 | update_doc 71 | ] 72 | 73 | class App(object): 74 | """ Polls the queue and runs each received job through the processing 75 | pipeline. 76 | 77 | """ 78 | def __init__(self, queue_name, pipeline_steps = default_tasks): 79 | """ Init redis pubsub and subscribe to the appropriate channels. 80 | 81 | Args: 82 | r (redis.Redis): connected redis instance 83 | channels (array): string names of channels to which we should subscribe 84 | """ 85 | 86 | self.queue = Queue(queue_name, host=settings.REDIS_HOST, 87 | port=settings.REDIS_PORT, password=settings.REDIS_PASSWORD) 88 | self.queue.serializer = json 89 | self.db = get_connection() 90 | self.search_db = get_search_connection() 91 | self.item_collection = ItemCollection(self.search_db) 92 | self.pipeline = set_pipeline_steps(pipeline_steps, item_collection=self.item_collection) 93 | 94 | 95 | def work(self, item): 96 | """ Feed jobs from the queue into the pipeline """ 97 | try: 98 | data = json.loads(item) 99 | logger.info('Processing task: '+data['id']) 100 | process(source(self.item_collection, data['id']), self.pipeline) 101 | except Exception, e: 102 | import traceback 103 | logger.error("Problem! " + str(e)) 104 | logger.error(traceback.format_exc()) 105 | #data = json.loads(item) 106 | #process(source(self.db, data['id']), PIPELINE) 107 | 108 | 109 | def start(self): 110 | """ Listen to the channels we've subscribed to and pass retrieved items 111 | to the worker 112 | 113 | """ 114 | logger.warn("Starting grimlock") 115 | while True: 116 | try: 117 | item = self.queue.pop() 118 | if item: 119 | self.work(item) 120 | time.sleep(1) 121 | except KeyboardInterrupt: 122 | logger.warn("Exiting grimlock") 123 | sys.exit() 124 | 125 | 126 | 127 | def run_for_set(item_collection, start_date=None, end_date=None): 128 | if not start_date: 129 | raise Exception("run_for_set start_date is required") 130 | 131 | pipeline = set_pipeline_steps(default_tasks, item_collection=item_collection) 132 | 133 | # No need to fail gracefully here. If the format is wrong go ahead and crash 134 | start = datetime.strptime(start_date, "%Y-%m-%d") 135 | search_val = start 136 | search_op = '>' 137 | end = None 138 | 139 | if end_date: 140 | end = datetime.strptime(end_date, "%Y-%m-%d") 141 | search_val = [start, end] 142 | search_op = 'between' 143 | 144 | search_params = [ 145 | { 146 | 'field':'updatedAt', 147 | 'value': search_val, 148 | 'op': search_op 149 | } 150 | ] 151 | 152 | print search_params 153 | 154 | def run(offset=0): 155 | docs = item_collection.find(search_params, offset=offset) 156 | for doc in docs['docs']: 157 | #pass 158 | process(lambda: doc, pipeline) 159 | 160 | offset += len(docs['docs']) 161 | #print "Running for " + str(docs['total']) + " docs" 162 | if offset < docs['total']: 163 | run(offset=offset) 164 | else: 165 | print 'done' 166 | 167 | run() 168 | 169 | 170 | def run_for_single(item_collection, doc_id): 171 | pipeline = set_pipeline_steps(item_collection=item_collection) 172 | process(source(item_collection, doc_id), pipeline) 173 | 174 | 175 | if __name__ == "__main__": 176 | app = App("transform") 177 | args = sys.argv 178 | 179 | if len(args) > 1 and args[1] == '--fordates': 180 | 181 | if len(args) == 3: 182 | logger.info("Running with one arg: " + args[2]) 183 | run_for_set(app.item_collection, start_date=args[2]) 184 | 185 | elif len(args) == 4: 186 | logger.info("Running with two args: " + args[2] + ", " + args[3]) 187 | run_for_set(app.item_collection, start_date=args[2], end_date=args[3]) 188 | 189 | elif len(args) > 1 and args[1] == '--fordoc': 190 | run_for_single(app.item_collection, args[2]) 191 | 192 | else: 193 | logger.info("Starting grimlock") 194 | app.start() 195 | 196 | -------------------------------------------------------------------------------- /src/cn_search_py/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/src/cn_search_py/__init__.py -------------------------------------------------------------------------------- /src/cn_search_py/collections.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from elasticsearch import Elasticsearch 3 | from .models import Item 4 | from .exceptions import DoesNotExist, MultipleObjectsReturned 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | class Collection(object): 9 | def make_model(self, data={}): 10 | return self.model(data, self) 11 | 12 | 13 | def _build_params(self, params): 14 | _params = [] 15 | 16 | for param in params: 17 | if 'op' not in param: 18 | param['op'] = '=' 19 | 20 | if param['op'] is '=': 21 | obj = {} 22 | obj[param['field']] = param['value'] 23 | 24 | _params.append({ 'term': obj }) 25 | 26 | elif param['op'] is 'between': 27 | obj = {} 28 | 29 | obj[param['field']] = { 30 | 'lte': param['value'][1], 31 | 'gte': param['value'][0] 32 | } 33 | 34 | _params.append({ 'range': obj }) 35 | 36 | elif '>' in param['op'] or '<' in param['op']: 37 | obj = {} 38 | 39 | keys = { 40 | '>': 'gt', 41 | '<': 'lt', 42 | '>=': 'gte', 43 | '<=': 'lte' 44 | } 45 | 46 | key = keys[param['op']] 47 | 48 | obj[param['field']] = {} 49 | obj[param['field']][key] = param['value'] 50 | 51 | _params.append({ 'range': obj }) 52 | 53 | 54 | if len(_params) is 0: 55 | return _params[0] 56 | 57 | 58 | return { 59 | "and": _params 60 | } 61 | 62 | 63 | def _search(self, params, limit=100, offset=0): 64 | body = { 65 | "query": { 66 | "filtered" : { 67 | "filter" : self._build_params(params) 68 | } 69 | } 70 | } 71 | 72 | kwargs = { 73 | 'index': self.index, 74 | 'doc_type': self.doc_type, 75 | 'body': body, 76 | 'size': limit 77 | } 78 | 79 | res = self.conn.search(**kwargs) 80 | 81 | return res 82 | 83 | 84 | def get(self, params): 85 | res = self._search(params) 86 | 87 | if res['hits']['total'] == 1: 88 | doc = res['hits']['hits'][0]['_source'] 89 | doc['id'] = res['hits']['hits'][0]['_id'] 90 | 91 | return doc 92 | 93 | if res['hits']['total'] == 0: 94 | raise DoesNotExist( 95 | "%s matching query does not exist. " 96 | "Lookup parameters were %s" % 97 | (self.model.__name__, params)) 98 | 99 | raise MultipleObjectsReturned( 100 | "get() returned more than one %s -- it returned %s! " 101 | "Lookup parameters were %s" % 102 | (self.model.__name__, res['hits']['total'], params)) 103 | 104 | 105 | def find(self, params, limit=100, offset=0): 106 | res = self._search(params) 107 | docs = [] 108 | if res['hits']['total'] > 0: 109 | for hit in res['hits']['hits']: 110 | doc = hit['_source'] 111 | doc['id'] = hit['_id'] 112 | docs.append(doc) 113 | 114 | return { 115 | 'total': res['hits']['total'], 116 | 'docs': docs 117 | } 118 | 119 | 120 | class ItemCollection(Collection): 121 | model = Item 122 | doc_type = 'item-type' 123 | index = 'item_alias' 124 | mapping = { 125 | 'properties': { 126 | 'geo': { 127 | 'properties': { 128 | 'coords': { 129 | 'type': 'geo_point' 130 | } 131 | } 132 | }, 133 | 'remoteID': { 134 | "type" : "string", 135 | "index" : "not_analyzed" 136 | }, 137 | 'tags': { 138 | 'properties': { 139 | 'name': { 140 | 'type': 'string', 141 | 'index': 'not_analyzed' 142 | } 143 | } 144 | } 145 | } 146 | } 147 | 148 | 149 | def __init__(self, conn, index=None): 150 | self.conn = conn 151 | if index: 152 | self.index = index 153 | 154 | -------------------------------------------------------------------------------- /src/cn_search_py/connect.py: -------------------------------------------------------------------------------- 1 | from config import settings 2 | from elasticsearch import Elasticsearch 3 | from .collections import ItemCollection 4 | 5 | 6 | def get_connection(): 7 | return Elasticsearch(host=settings.ES_HOST, port=settings.ES_PORT, 8 | http_auth=settings.ES_AUTH) 9 | 10 | def setup_indexes(conn): 11 | conn.indices.create(ItemCollection.index, ignore=400) 12 | conn.indices.put_mapping(doc_type=ItemCollection.doc_type, 13 | body=ItemCollection.mapping, index=ItemCollection.index) 14 | 15 | -------------------------------------------------------------------------------- /src/cn_search_py/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/src/cn_search_py/data/__init__.py -------------------------------------------------------------------------------- /src/cn_search_py/exceptions.py: -------------------------------------------------------------------------------- 1 | class DoesNotExist(Exception): 2 | pass 3 | 4 | 5 | class MultipleObjectsReturned(Exception): 6 | pass -------------------------------------------------------------------------------- /src/cn_search_py/models.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import datetime 3 | from .exceptions import DoesNotExist, MultipleObjectsReturned 4 | from .data import language_codes 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | class Model(object): 9 | def __init__(self, data, collection): 10 | self._collection = collection 11 | self.conn = collection.conn 12 | self.data = self.format_data(self.set_defaults(data)) 13 | self.doc_type = self._collection.doc_type 14 | self.index = self._collection.index 15 | 16 | 17 | def _index(self, **kwargs): 18 | if 'force_new' in kwargs or 'id' not in kwargs: 19 | kwargs['op_type'] = 'create' 20 | del kwargs['force_new'] 21 | 22 | kwargs['index'] = self.index 23 | kwargs['doc_type'] = self.doc_type 24 | 25 | return self.conn.index(**kwargs) 26 | 27 | 28 | def save(self, upsert_params = [], force_new=False, refresh=False): 29 | if force_new: 30 | return self._index(body=self.data, force_new=True, refresh=refresh) 31 | else: 32 | return self.upsert(params=upsert_params, refresh=refresh) 33 | 34 | 35 | def upsert(self, params = [], refresh=False): 36 | if params: 37 | logger.info("Have params") 38 | try: 39 | doc = self._collection.get(params) 40 | return self._index(body=self.data, id=doc['id'], 41 | refresh=refresh) 42 | except DoesNotExist: 43 | logger.info("Did not find doc for " + str(params)) 44 | return self._index(body=self.data, force_new=True, 45 | refresh=refresh) 46 | except MultipleObjectsReturned: 47 | logger.info("Got more than one object for "+str(params)) 48 | return None 49 | 50 | else: 51 | logger.info("Upsert called without params") 52 | return self._index(body=self.data, force_new=True, 53 | refresh=refresh) 54 | 55 | 56 | def format_data(self, data): 57 | return data 58 | 59 | def set_defaults(self, data): 60 | return data 61 | 62 | 63 | class Item(Model): 64 | def set_defaults(self, data): 65 | defaults = { 66 | 'license': 'unknown', 67 | 'lifespan': 'temporary', 68 | 'createdAt': datetime.now() 69 | } 70 | 71 | for key,val in defaults.iteritems(): 72 | if key not in data: 73 | data[key] = val 74 | 75 | return data 76 | 77 | 78 | def save(self, refresh=False): 79 | upsert_params = [ 80 | { 81 | 'field':'remoteID', 82 | 'value': self.data['remoteID'] 83 | }, 84 | { 85 | 'field': 'source', 86 | 'value': self.data['source'] 87 | } 88 | ] 89 | 90 | return super(Item, self).save(upsert_params=upsert_params, 91 | refresh=refresh) 92 | 93 | 94 | def format_data(self, data): 95 | data['updatedAt'] = datetime.now() 96 | 97 | if 'publishedAt' not in data: 98 | data['publishedAt'] = data['createdAt'] 99 | 100 | if 'language' in data and 'code' in data['language']: 101 | if data['language']['code'] in language_codes.codes: 102 | data['language'] = language_codes[data['language']['code']] 103 | 104 | 105 | def unique_tags(tags): 106 | utags = [] 107 | utags_names = [] 108 | 109 | for tag in tags: 110 | if tag['name'] not in utags_names: 111 | utags.append(tag) 112 | utags_names.append(tag['name']) 113 | 114 | return utags 115 | 116 | 117 | if 'tags' in data: 118 | data['tags'] = unique_tags(data['tags']) 119 | 120 | 121 | if 'tags' in data and len(data['tags']) > 10: 122 | data['tags'] = data['tags'][:10] 123 | 124 | if 'entities' in data: 125 | data['entities'] = [entity for entity in data['entities'] if entity] 126 | 127 | 128 | data['searchText'] = '' 129 | 130 | if 'content' in data: 131 | data['searchText'] = data['content'] 132 | 133 | if 'summary' in data: 134 | data['searchText'] += ' ' + data['summary'] 135 | 136 | if 'contentEnglish' in data: 137 | data['searchText'] += ' ' + data['contentEnglish'] 138 | 139 | if 'entities' in data: 140 | data['searchText'] += ' ' + ' '.join(data['entities']) 141 | 142 | if 'tags' in data: 143 | data['searchText'] += ' ' + ' '.join([tag['name'] for tag in data['tags']]) 144 | 145 | #data['searchText'] = data['searchText'] 146 | 147 | return data 148 | -------------------------------------------------------------------------------- /src/cn_store_py/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/src/cn_store_py/__init__.py -------------------------------------------------------------------------------- /src/cn_store_py/connect.py: -------------------------------------------------------------------------------- 1 | from mongokit import Connection 2 | from .models import Item 3 | from config import settings 4 | 5 | 6 | def get_connection(): 7 | Item.__database__ = settings.MONGO_DB 8 | 9 | connection = Connection(host=settings.MONGO_HOST) 10 | connection.register([Item]) 11 | 12 | return connection -------------------------------------------------------------------------------- /src/cn_store_py/models.py: -------------------------------------------------------------------------------- 1 | from mongokit import Document, INDEX_GEO2D 2 | #from pymongo import objectid 3 | import datetime 4 | 5 | class Item(Document): 6 | __collection__ = 'items' 7 | use_schemaless = True 8 | structure = { 9 | 'createdAt': datetime.datetime, 10 | 'udpatedAt': datetime.datetime, 11 | 'remoteID': basestring, 12 | 'activeUntil': datetime.datetime, 13 | 'lifespan': basestring, 14 | 'content': basestring, 15 | 'summary': basestring, 16 | 'image': basestring, 17 | 'geo': { 18 | 'addressComponents': { 19 | 'formattedAddress': basestring, 20 | 'streetNumber': basestring, 21 | 'streetName': basestring, 22 | 'streetAddress': basestring, 23 | 'neighborhood': basestring, 24 | 'adminArea5': basestring, # city 25 | 'adminArea4': basestring, # county 26 | 'adminArea3': basestring, # state 27 | 'adminArea2': basestring, # region 28 | 'adminArea1': basestring, # country 29 | 'postalCode': basestring 30 | }, 31 | 'mentionedPlaces': [basestring], 32 | # lng, lat 33 | 'coords': None, 34 | 'accuracy': int, 35 | 'granularity': basestring, 36 | 'locationIdentifiers': { 37 | 'authorLocationName': basestring, 38 | 'authorTimeZone': basestring 39 | } 40 | }, 41 | 'tags': [{ 42 | 'name': basestring, 43 | 'confidence': int 44 | }], 45 | 'language': { 46 | 'code': basestring, 47 | 'name': basestring, 48 | 'nativeName': basestring 49 | }, 50 | 'source': basestring, 51 | 'license': basestring 52 | } 53 | required_fields = ['remoteID', 'lifespan'] 54 | indexes = [ 55 | { 56 | 'fields':[('geo.coords',INDEX_GEO2D)], 57 | } 58 | ] 59 | 60 | default_values = { 61 | 'license': 'unknown', 62 | 'lifespan': 'temporary', 63 | 'createdAt': datetime.datetime.utcnow 64 | } -------------------------------------------------------------------------------- /src/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/src/config/__init__.py -------------------------------------------------------------------------------- /src/config/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | import importlib 3 | import sys 4 | import logging 5 | logger = logging.getLogger(__name__) 6 | 7 | QUEUE_NAME = 'transform' 8 | 9 | environ = os.environ.get('GRIMLOCK') 10 | # Set environment specific settings. 11 | if environ: 12 | _this_module = sys.modules[__name__] 13 | try: 14 | _m = importlib.import_module('config.%s_settings' % environ) 15 | except ImportError, ex: 16 | pass 17 | else: 18 | print "Using GRIMLOCK=%s" % environ 19 | for _k in dir(_m): 20 | setattr(_this_module, _k, getattr(_m, _k)) 21 | # Dev is the default environment. 22 | else: 23 | try: 24 | from development_settings import * 25 | logger.info("Using GRIMLOCK=%s" % environ) 26 | except ImportError, ex: 27 | pass -------------------------------------------------------------------------------- /src/pipeline.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | def compose(*functions): 4 | """ As defined by our good friends at UnderscoreJS: Returns the composition of a 5 | list of functions, where each function consumes the return value of the 6 | function that follows. In math terms, composing the functions f(), g(), and h() 7 | produces f(g(h())). 8 | 9 | One-liner borrowed from `Mathieu Larose `_ 10 | 11 | The important takeaway is that each function must accept the return value 12 | of the previous function. 13 | 14 | >>> def func1(): return {'a': 'b'} 15 | >>> def func2(returnValueFromFunc1): return {'what': 'ever'} 16 | 17 | :param functions: As many functions as you want. 18 | 19 | """ 20 | return functools.reduce(lambda f, g: lambda x: f(g(x)), functions) 21 | 22 | 23 | def process(source, tasks): 24 | """ Compose a single function from the passed tasks, and feed the first function 25 | in that chain the return value from source. 26 | 27 | So... 28 | 29 | >>> task2(task1(source())) 30 | 31 | We're reversing the list of tasks so the consumer can pass in a list of 32 | functions in preferred execution order. 33 | 34 | :param source: The function that will return the initial data for the pipeline 35 | :param tasks: A list of functions that will be called in sequence 36 | 37 | """ 38 | compose(*tasks[::-1])(source()) -------------------------------------------------------------------------------- /src/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/src/tasks/__init__.py -------------------------------------------------------------------------------- /src/tasks/add_default_values.py: -------------------------------------------------------------------------------- 1 | defaults = { 2 | 'content': 'No content available' 3 | } 4 | 5 | def run(data): 6 | for key, val in defaults.iteritems(): 7 | if key not in data or data[key] == '' or data[key] is None: 8 | data[key] = val 9 | 10 | return data 11 | -------------------------------------------------------------------------------- /src/tasks/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ushahidi/grimlock/c728b9d1beb0f1b9a2bec69abf93fe041cd3ba99/src/tasks/data/__init__.py -------------------------------------------------------------------------------- /src/tasks/data/word_tag_map.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import json 3 | 4 | keywords = [ 5 | { 6 | 'word': 'storming', 7 | 'tags': ['armed-conflict', 'conflict'] 8 | }, 9 | { 10 | 'word': 'militia', 11 | 'tags': ['armed-conflict', 'conflict'] 12 | }, 13 | { 14 | 'word': 'army', 15 | 'tags': ['armed-conflict', 'conflict'] 16 | }, 17 | { 18 | 'word': 'kill', 19 | 'tags': ['armed-conflict', 'conflict', 'death'] 20 | }, 21 | { 22 | 'word': 'killing', 23 | 'tags': ['armed-conflict', 'conflict', 'death'] 24 | }, 25 | { 26 | 'word': 'clashes', 27 | 'tags': ['armed-conflict', 'conflict'] 28 | }, 29 | { 30 | 'word': 'factions', 31 | 'tags': ['armed-conflict', 'conflict'] 32 | }, 33 | { 34 | 'word': 'liberation', 35 | 'tags': ['armed-conflict', 'conflict'] 36 | }, 37 | { 38 | 'word': 'spoils', 39 | 'tags': ['armed-conflict', 'conflict'] 40 | }, 41 | { 42 | 'word': 'targeting', 43 | 'tags': ['armed-conflict', 'conflict'] 44 | }, 45 | { 46 | 'word': 'barrels', 47 | 'tags': ['armed-conflict', 'air-combat', 'conflict'] 48 | }, 49 | { 50 | 'word': 'clashes', 51 | 'tags': ['armed-conflict', 'conflict'] 52 | }, 53 | { 54 | 'word': 'aviation', 55 | 'tags': ['armed-conflict', 'air-combat', 'conflict'] 56 | }, 57 | { 58 | 'word': 'helicopter', 59 | 'tags': ['armed-conflict', 'air-combat', 'conflict'] 60 | }, 61 | { 62 | 'word': 'liberated', 63 | 'tags': ['armed-conflict', 'conflict'] 64 | }, 65 | { 66 | 'word': 'drums', 67 | 'tags': ['air-combat', 'armed-conflict', 'conflict'] 68 | }, 69 | { 70 | 'word': 'shelling', 71 | 'tags': ['armed-conflict', 'conflict'] 72 | }, 73 | { 74 | 'word': 'mig', 75 | 'tags': ['air-combat', 'armed-conflict', 'conflict'] 76 | }, 77 | { 78 | 'word': 'launch a missle', 79 | 'tags': ['air-combat', 'armed-conflict', 'conflict'] 80 | }, 81 | { 82 | 'word': 'aircraft', 83 | 'tags': ['air-combat', 'armed-conflict', 'conflict'] 84 | }, 85 | { 86 | 'word': 'moment of detonation', 87 | 'tags': ['armed-conflict', 'conflict'] 88 | }, 89 | { 90 | 'word': 'explosion', 91 | 'tags': ['armed-conflict', 'conflict'] 92 | }, 93 | { 94 | 'word': 'field gun', 95 | 'tags': ['armed-conflict', 'conflict'] 96 | }, 97 | { 98 | 'word': 'at-3 atgm', 99 | 'tags': ['armed-conflict', 'conflict'] 100 | }, 101 | { 102 | 'word': 'hell cannon', 103 | 'tags': ['armed-conflict', 'conflict'] 104 | }, 105 | { 106 | 'word': 'artillery', 107 | 'tags': ['armed-conflict', 'conflict'] 108 | }, 109 | { 110 | 'word': 'rocket', 111 | 'tags': ['armed-conflict', 'conflict'] 112 | }, 113 | { 114 | 'word': 'missle', 115 | 'tags': ['armed-conflict', 'conflict'] 116 | }, 117 | { 118 | 'word': 'cluster', 119 | 'tags': ['armed-conflict', 'conflict'] 120 | }, 121 | { 122 | 'word': 'at-5 konkurs atgm', 123 | 'tags': ['armed-conflict', 'conflict'] 124 | }, 125 | { 126 | 'word': 'chlorine', 127 | 'tags': ['armed-conflict', 'conflict', 'chemical-warfare'] 128 | }, 129 | { 130 | 'word': 'gassed', 131 | 'tags': ['armed-conflict', 'conflict', 'chemical-warfare'] 132 | }, 133 | { 134 | 'word': 'bm-21 grad', 135 | 'tags': ['armed-conflict', 'conflict'] 136 | }, 137 | { 138 | 'word': 'at-13 metis', 139 | 'tags': ['armed-conflict', 'conflict'] 140 | }, 141 | { 142 | 'word': 'manpads', 143 | 'tags': ['armed-conflict', 'conflict'] 144 | }, 145 | { 146 | 'word': 'violent clashes', 147 | 'tags': ['armed-conflict', 'conflict'] 148 | }, 149 | { 150 | 'word': 'catapult', 151 | 'tags': ['armed-conflict', 'conflict'] 152 | }, 153 | { 154 | 'word': 'chemical', 155 | 'tags': ['armed-conflict', 'conflict', 'chemical-warfare'] 156 | } 157 | ] -------------------------------------------------------------------------------- /src/tasks/donation_classifier.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import logging 4 | 5 | logger = logging.getLogger('grimlock') 6 | 7 | def run(data): 8 | if data['source'] != 'twitter': 9 | return data 10 | 11 | if 'tags' not in data: 12 | data['tags'] = [] 13 | 14 | 15 | url = 'http://knoesis-twit.cs.wright.edu/CrisisComputingAPI/classifyController' 16 | params = { 17 | 'text': data['content'], 18 | "classifyType": "all", 19 | "platform": "twitter", 20 | "timestamp": "2014-05-26 09:00:00", 21 | "location": "Dayton,OH", 22 | "latitude": 39.796931, 23 | "longitude": -84.27961, 24 | "msgId": "123456789" 25 | } 26 | headers = {'content-type': 'application/json'} 27 | 28 | r = requests.post(url, data=json.dumps(params), headers=headers) 29 | if r.status_code == 200: 30 | resp_json = r.json() 31 | 32 | if resp_json['donation_classification_probb'] > .5: 33 | data['tags'].append({ 34 | 'name': 'donation', 35 | 'confidence': resp_json['donation_classification_probb'] 36 | }) 37 | 38 | if resp_json["request_classification_probb"] > .2: 39 | data['tags'].append({ 40 | 'name': 'donation-request', 41 | 'confidence': resp_json['request_classification_probb'] 42 | }) 43 | 44 | if resp_json["offer_classification_probb"] > .2: 45 | data['tags'].append({ 46 | 'name': 'donation-offer', 47 | 'confidence': resp_json['offer_classification_probb'] 48 | }) 49 | else: 50 | logger.error("Donation classifier failed! " + str(r.status_code) + " ... " + r.text) 51 | 52 | 53 | return data -------------------------------------------------------------------------------- /src/tasks/extract_content.py: -------------------------------------------------------------------------------- 1 | from newspaper import Article 2 | 3 | def run(data): 4 | character_limit = 400 5 | 6 | if data['source'] in ['gdelt']: 7 | if 'content' in data and len(data['content']) < 100: 8 | if 'fromURL' not in data: 9 | return data 10 | try: 11 | a = Article(data['fromURL']) 12 | a.download() 13 | a.parse() 14 | except: 15 | return data 16 | 17 | if len(a.text) > character_limit: 18 | text = a.text[:character_limit] + '...' 19 | else: 20 | text = a.text 21 | 22 | data['content'] = text 23 | 24 | 25 | return data -------------------------------------------------------------------------------- /src/tasks/extract_place.py: -------------------------------------------------------------------------------- 1 | import geograpy 2 | 3 | def setup(**kwargs): 4 | #pc = geograpy.places.PlaceContext([]) 5 | #pc.populate_db() 6 | 7 | return run 8 | 9 | def run(data): 10 | if 'geo' in data and 'coords' in data['geo']: 11 | return data 12 | 13 | if 'fromURL' in data and data['source'] in ['gdelt']: 14 | kwargs = { 15 | 'url': data['fromURL'] 16 | } 17 | 18 | else: 19 | if 'contentEnglish' in data: 20 | field = 'contentEnglish' 21 | else: 22 | field = 'content' 23 | 24 | kwargs = { 25 | 'text': data[field] 26 | } 27 | 28 | try: 29 | pc = geograpy.get_place_context(**kwargs) 30 | except Exception, e: 31 | return data 32 | 33 | if 'entities' not in data: 34 | data['entities'] = [] 35 | 36 | for place in list(set(pc.places)): 37 | if place not in data['entities']: 38 | data['entities'].append(place) 39 | 40 | # starting from scratch with no location data 41 | if 'geo' not in data: 42 | data['geo'] = {} 43 | 44 | if 'addressComponents' not in data['geo']: 45 | data['geo']['addressComponents'] = {} 46 | if pc.countries: 47 | # _mentions are tuples ordered most > least, like 48 | # [(Name1, 5), (Name2, 4)]. We'll assume the most mentioned is the 49 | # most important. 50 | 51 | # now that we have a country, we only want regions/cities from 52 | # within that country 53 | country = pc.country_mentions[0][0] 54 | data['geo']['addressComponents']['adminArea1'] = country 55 | 56 | data = region_city_for_country(data, country, pc) 57 | 58 | # without countries we're left to work our way back up the chain starting 59 | # with city information 60 | else: 61 | if pc.cities: 62 | data['geo']['addressComponents']['adminArea5'] = pc.city_mentions[0][0] 63 | # now that we know we have a city, try to work backwards from 64 | # here to get the country and region. 65 | build_from_city(data, pc) 66 | 67 | # we must already have some information about this place's location. use 68 | # that in conjunction with the places returned by geograpy to fill in the 69 | # gaps. 70 | else: 71 | # top down from country 72 | if 'adminArea1' in data['geo']['addressComponents']: 73 | country = pc.correct_country_mispelling(data['geo']['addressComponents']['adminArea1']) 74 | 75 | # now that we have a a country, get regions cities that we might 76 | # know about, assuming they are within that country 77 | data = region_city_for_country(data, country, pc) 78 | 79 | 80 | # try it with a city 81 | elif 'adminArea5' in data['geo'] and not pc.countries: 82 | # now that we know we have a city, try to work backwards from 83 | # here to get the country and region. 84 | build_from_city(data, pc) 85 | 86 | # take your best shot at guessing the country and then working down from 87 | # there. 88 | else: 89 | data = set_missing_country_region(data, pc) 90 | if 'adminArea1' in data['geo']['addressComponents']: 91 | country = data['geo']['addressComponents']['adminArea1'] 92 | data = region_city_for_country(data, country, pc) 93 | 94 | # We couldn't do anything with found country names, maybe we at least 95 | # have a city mention. 96 | elif pc.cities: 97 | data['geo']['addressComponents']['adminArea5'] = pc.city_mentions[0][0] 98 | # now that we know we have a city, try to work backwards from 99 | # here to get the country and region. 100 | build_from_city(data, pc) 101 | 102 | return data 103 | 104 | 105 | 106 | def most_mentioned(place_names, place_mentions): 107 | for place in place_mentions: 108 | if place[0] in place_names: 109 | return place[0] 110 | 111 | 112 | def mentioned_in_dict(place_name, country_dict): 113 | for key, val in country_dict.iteritems(): 114 | if place_name in val: 115 | return key 116 | 117 | return None 118 | 119 | 120 | def region_city_for_country(data, country, pc): 121 | # now that we country, only get regions and cities in that country 122 | if country in pc.country_regions and 'adminArea3' not in data['geo']['addressComponents']: 123 | data['geo']['addressComponents']['adminArea3'] = most_mentioned( 124 | pc.regions, pc.region_mentions) 125 | 126 | if country in pc.country_cities and 'adminArea5' not in data['geo']['addressComponents']: 127 | data['geo']['addressComponents']['adminArea5'] = most_mentioned( 128 | pc.cities, pc.city_mentions) 129 | 130 | return data 131 | 132 | 133 | def set_missing_country_region(data, pc): 134 | if 'adminArea1' not in data['geo']['addressComponents'] and pc.countries: 135 | data['geo']['addressComponents']['adminArea1'] = pc.country_mentions[0][0] 136 | 137 | if 'adminArea3' not in data['geo']['addressComponents'] and pc.regions: 138 | data['geo']['addressComponents']['adminArea3'] = pc.region_mentions[0][0] 139 | 140 | return data 141 | 142 | 143 | def build_from_city(data, pc): 144 | region_name = None 145 | 146 | components = data['geo']['addressComponents'] 147 | if 'adminArea3' in components: 148 | region_name = components['adminArea3'] 149 | elif 'adminArea2' in components: 150 | region_name = components['adminArea2'] 151 | 152 | # get all cities matching this name 153 | possible_cities = geograpy.places.cities_for_name(data['geo']['addressComponents']['adminArea5']) 154 | actual_city = None 155 | 156 | # try to validate that of the cities returned, we have found one in 157 | # a region that we're already aware of. 158 | if possible_cities: 159 | if region_name: 160 | for city in possible_cities: 161 | if city[6] == region_name: 162 | actual_city = city 163 | 164 | # assuming we weren't able to match this city with a region we already 165 | # knew about. 166 | if not actual_city and len(possible_cities) == 1: 167 | actual_city = possible_cities[0] 168 | else: 169 | return set_missing_country_region(data, pc) 170 | 171 | # use city, country and region name from city record 172 | if actual_city: 173 | data['geo']['addressComponents']['adminArea5'] = actual_city[7] 174 | data['geo']['addressComponents']['adminArea1'] = actual_city[4] 175 | 176 | if 'adminArea3' not in data['geo']['addressComponents']: 177 | data['geo']['addressComponents']['adminArea3'] = actual_city[6] 178 | 179 | # oook. we're not very sure about the city, just set country/region 180 | else: 181 | data = set_missing_country_region(data, pc) 182 | 183 | 184 | return data 185 | -------------------------------------------------------------------------------- /src/tasks/format_address.py: -------------------------------------------------------------------------------- 1 | def run(data): 2 | address = '' 3 | 4 | if 'geo' not in data: 5 | return data 6 | 7 | component_keys = [ 8 | 'neighborhood', 9 | 'adminArea5', 10 | 'adminArea4', 11 | 'adminArea3', 12 | 'adminArea2', 13 | 'adminArea1' 14 | ] 15 | 16 | has_address = False 17 | if 'addressComponents' in data['geo']: 18 | for key in component_keys: 19 | if key in data['geo']['addressComponents']: 20 | has_address = True 21 | 22 | 23 | if has_address: 24 | address = build_address_from_components(data) 25 | elif 'locationIdentifiers' in data['geo']: 26 | address = build_address_from_identifiers(data) 27 | 28 | if address and len(address) > 0: 29 | if 'addressComponents' not in data['geo']: 30 | data['geo']['addressComponents'] = {} 31 | 32 | data['geo']['addressComponents']['formattedAddress'] = address 33 | 34 | 35 | if 'entities' not in data: 36 | data['entities'] = [] 37 | 38 | if 'addressComponents' in data['geo']: 39 | for key in component_keys: 40 | if key in data['geo']['addressComponents'] and data['geo']['addressComponents'][key] not in data['entities']: 41 | data['entities'].append(data['geo']['addressComponents'][key]) 42 | 43 | 44 | return data 45 | 46 | 47 | def add_if_exists(obj, key): 48 | if key in obj and obj[key]: 49 | return obj[key] + ',' 50 | 51 | return '' 52 | 53 | 54 | def build_address_from_components(data): 55 | components = data['geo']['addressComponents'] 56 | 57 | if 'formattedAddress' in components and components['formattedAddress'] is not None and len(components['formattedAddress']) > 0: 58 | return components['formattedAddress'] 59 | 60 | address = "" 61 | component_keys = [ 62 | 'streetNumber', 63 | 'streetName', 64 | 'neighborhood', 65 | 'adminArea5', 66 | 'adminArea4', 67 | 'adminArea3', 68 | 'adminArea2', 69 | 'adminArea1' 70 | ] 71 | 72 | for key in component_keys: 73 | address += add_if_exists(components, key) 74 | 75 | address = address[:-1] 76 | 77 | return address 78 | 79 | 80 | def build_address_from_identifiers(data): 81 | """ 82 | for key in ['authorLocationName', 'authorTimeZone']: 83 | if key in data['geo']['locationIdentifiers']: 84 | return data['geo']['locationIdentifiers'][key].strip() 85 | 86 | return '' 87 | """ 88 | loc_id = data['geo']['locationIdentifiers'] 89 | if 'authorTimeZone' in loc_id and loc_id['authorTimeZone']: 90 | return data['geo']['locationIdentifiers']['authorTimeZone'] 91 | 92 | elif 'authorLocationName' in loc_id and loc_id['authorLocationName']: 93 | return data['geo']['locationIdentifiers']['authorLocationName'] 94 | 95 | else: 96 | return '' -------------------------------------------------------------------------------- /src/tasks/geocode.py: -------------------------------------------------------------------------------- 1 | """ This is just an example. Inside the run method of a task you can do whatever 2 | you want. The only requirements are that your run method accepts a single 3 | argument - the structure to be transformed or augmented - and returns the 4 | transformed/augmented structure so that downstream tasks can make further 5 | modifications or update the document in the datastore. 6 | 7 | """ 8 | 9 | import requests 10 | import json 11 | import logging 12 | from config import settings 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | def run(data): 17 | # We can only geocode if we have an address 18 | if 'geo' not in data or 'addressComponents' not in data['geo'] or 'formattedAddress' not in data['geo']['addressComponents']: 19 | return data 20 | 21 | # Assumption is that provided coords are accurate enough (if they exist) 22 | if 'coords' in data['geo']: 23 | return data 24 | 25 | url = 'http://dev.virtualearth.net/REST/v1/Locations/%s?key=%s' % ( 26 | data['geo']['addressComponents']['formattedAddress'], settings.MAP_KEY) 27 | 28 | r = requests.get(url) 29 | 30 | if r.status_code != 200: 31 | logger.error("Geocode error " + str(r.status_code)) 32 | #print r.text 33 | return data 34 | 35 | try: 36 | json_data = r.json() 37 | except ValueError, e: 38 | logger.error("no json available from geocode") 39 | return data 40 | 41 | if len(json_data['resourceSets']) == 0: 42 | return data 43 | 44 | if len(json_data['resourceSets'][0]['resources']) == 0: 45 | return data 46 | 47 | coords = json_data['resourceSets'][0]['resources'][0]['point']['coordinates'] 48 | 49 | # API gives us lat,lng and we need lng,lat 50 | data['geo']['coords'] = coords[::-1] 51 | 52 | return data -------------------------------------------------------------------------------- /src/tasks/image_tagger.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import urllib 3 | from config import settings 4 | 5 | def run(data): 6 | if 'image' not in data: 7 | return data 8 | 9 | url = 'http://access.alchemyapi.com/calls/url/URLGetRankedImageKeywords' 10 | params = { 11 | 'apikey': settings.ALCHEMY_API_KEY, 12 | 'url': data['image'], 13 | 'imagePostMode': 'not-raw', 14 | 'outputMode': 'json' 15 | } 16 | 17 | r = requests.get(url, params=params) 18 | r_data = r.json() 19 | 20 | for tag in r_data['imageKeywords']: 21 | if tag['text'] == 'person' and float(tag['score']) > 0.6: 22 | if 'tags' not in data: 23 | data['tags'] = [] 24 | 25 | data['tags'].append({'name':'photo-person', 'confidence': float(tag['score'])}) 26 | 27 | return data -------------------------------------------------------------------------------- /src/tasks/relevance_classifier.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import requests 3 | from config import settings 4 | import csv 5 | import os 6 | import jellyfish 7 | from .data import word_tag_map 8 | 9 | def fuzzy_match(s1, s2, max_dist=.9): 10 | try: 11 | distance = jellyfish.jaro_distance(s1, s2) 12 | is_match = distance >= max_dist 13 | except: 14 | is_match = False 15 | distance = 0 16 | 17 | return is_match, distance 18 | 19 | 20 | def setup(**kwargs): 21 | def get_tags(offset=0, total_retrieved=0, tags=[]): 22 | api_url = settings.API_URL 23 | api_key = settings.API_KEY 24 | 25 | url = api_url + '/system-tag?limit=200' 26 | headers = {'Authorization': 'Bearer ' + api_key} 27 | 28 | url = url + '&offset=' + str(offset) 29 | r = requests.get(url, headers=headers) 30 | if r.status_code == 200: 31 | json_data = r.json() 32 | tag_data = json_data['data'] 33 | for tag in tag_data: 34 | tags.append(tag) 35 | 36 | total_retrieved += len(tag_data) 37 | if total_retrieved < json_data['total']: 38 | return get_tags(offset=total_retrieved, total_retrieved=total_retrieved, 39 | tags=tags) 40 | else: 41 | return tags 42 | else: 43 | return [] 44 | 45 | if 'get_tags' in kwargs: 46 | get_tags = kwargs['get_tags'] 47 | 48 | tags = get_tags() 49 | 50 | 51 | def has_tag(tag_name, doc_tags): 52 | names = [tag['name'] for tag in doc_tags] 53 | return tag_name in names 54 | 55 | def tokenized_tag(tag): 56 | tag_names = tag['name'].split() 57 | 58 | tokens = [] 59 | 60 | for tag_name in tag_names: 61 | tokens.append(tag_name) 62 | 63 | names = tag_name.split('-') 64 | for n in names: 65 | tokens.append(n) 66 | 67 | return tokens 68 | 69 | 70 | def run(data): 71 | if len(tags) == 0: 72 | logger.warn('No tags available for relevance_classifier') 73 | return data 74 | 75 | #data['tags'] = [] 76 | if 'tags' in data: 77 | found_tags = [_ for _ in data['tags']] 78 | else: 79 | found_tags = [] 80 | 81 | 82 | def add_tag(tag): 83 | if len(found_tags) > 20: 84 | return 85 | 86 | found_tags.append({'name': tag['name'], 'confidence': 1}) 87 | if 'categories' in tag: 88 | for category in tag['categories']: 89 | if category in ['disaster','crisis'] and not has_tag(category, found_tags): 90 | found_tags.append({'name': category, 'confidence': 1}) 91 | 92 | 93 | if 'contentEnglish' in data: 94 | combined_text = data['contentEnglish'] 95 | elif 'searchText' in data: 96 | combined_text = data['searchText'] 97 | else: 98 | combined_text = data['content'] 99 | 100 | for keyword in word_tag_map.keywords: 101 | if keyword['word'] in combined_text: 102 | for tag in keyword['tags']: 103 | if not has_tag(tag, found_tags): 104 | add_tag({'name': tag}) 105 | 106 | 107 | for tag in tags: 108 | if tag['name'] == 'conflict' or tag['name'] == 'disaster': 109 | continue 110 | 111 | # Look for ' word ' 112 | if ' ' + tag['name'].replace('-', ' ').lower() + ' ' in combined_text and not has_tag(tag['name'], found_tags): 113 | add_tag(tag) 114 | 115 | # Look for ' word.' 116 | if ' ' + tag['name'].replace('-', ' ').lower() + '.' in combined_text and not has_tag(tag['name'], found_tags): 117 | add_tag(tag) 118 | 119 | 120 | if 'entities' in data: 121 | for entity in data['entities']: 122 | is_match, distance = fuzzy_match(entity.lower(), tag['name'].lower()) 123 | if is_match and not has_tag(tag['name'], found_tags): 124 | add_tag(tag) 125 | 126 | 127 | if len(found_tags) > 0: 128 | if 'tags' not in data: 129 | data['tags'] = found_tags 130 | else: 131 | for tag in found_tags: 132 | if not has_tag(tag['name'], data['tags']): 133 | data['tags'].append(tag) 134 | 135 | 136 | """ 137 | words = data[field].split() 138 | for tag in tags: 139 | if tag['name'] == 'conflict' or tag['name'] == 'disaster': 140 | continue 141 | tag_names = tokenized_tag(tag) 142 | 143 | for name in tag_names: 144 | for word in words: 145 | if len(word) <= 3: 146 | continue 147 | 148 | is_match, distance = fuzzy_match(name.lower(), word.lower()) 149 | 150 | if is_match and not has_tag(tag['name'], data['tags']): 151 | data['tags'].append({'name': tag['name'], 'confidence': distance}) 152 | for category in tag['categories']: 153 | if not has_tag(category, data['tags']): 154 | data['tags'].append({'name': category, 'confidence': 1}) 155 | """ 156 | 157 | return data 158 | 159 | return run 160 | -------------------------------------------------------------------------------- /src/tasks/reverse_geocode.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | from config import settings 4 | 5 | 6 | def run(data): 7 | if 'geo' not in data or 'coords' not in data['geo'] or data['geo']['coords'] is None: 8 | return data 9 | 10 | include_entity_types = [ 11 | 'Address', 12 | 'Neighborhood', 13 | 'PopulatedPlace', 14 | 'Postcode1', 15 | 'AdminDivision1', 16 | 'AdminDivision2', 17 | 'CountryRegion' 18 | ] 19 | 20 | iet = ','.join(include_entity_types) 21 | latLng = ','.join([str(coord) for coord in data['geo']['coords'][::-1]]) 22 | 23 | url = 'http://dev.virtualearth.net/REST/v1/Locations/%s?includeEntityTypes=%s&key=%s' % ( 24 | latLng, iet, settings.MAP_KEY) 25 | 26 | r = requests.get(url) 27 | 28 | if r.status_code != 200: 29 | logger.error("Geocode error " + str(r.status_code)) 30 | #print r.text 31 | return data 32 | 33 | try: 34 | json_data = r.json() 35 | except ValueError, e: 36 | logger.error("no json available from geocode") 37 | return data 38 | 39 | if len(json_data['resourceSets']) == 0: 40 | return data 41 | 42 | if len(json_data['resourceSets'][0]['resources']) == 0: 43 | return data 44 | 45 | address_data = json_data['resourceSets'][0]['resources'][0]['address'] 46 | 47 | model_address_data = {} 48 | 49 | prop_map = { 50 | 'addressLine': 'streetAddress', 51 | 'adminDistrict': 'adminArea3', 52 | 'adminDistrict2': 'adminArea4', 53 | 'countryRegion': 'adminArea1', 54 | 'locality': 'adminArea5', 55 | 'postalCode': 'postalCode', 56 | 'formattedAddress': 'formattedAddress' 57 | } 58 | 59 | def include_if_present(prop): 60 | if prop in address_data: 61 | model_address_data[prop_map[prop]] = address_data[prop] 62 | 63 | for key in prop_map: 64 | include_if_present(key) 65 | 66 | data['geo']['addressComponents'] = model_address_data 67 | 68 | return data -------------------------------------------------------------------------------- /src/tasks/translate_content.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import logging 4 | from config import settings 5 | from microsofttranslator import Translator 6 | import goslate 7 | 8 | 9 | def run(data): 10 | if 'language' in data and 'code' in data['language'] and data['language']['code'] == 'en': 11 | return data 12 | 13 | 14 | """ 15 | translator = Translator(settings.BING_APP_ID, settings.BING_APP_SECRET) 16 | 17 | try: 18 | data['contentEnglish'] = translator.translate(data['content'], "en") 19 | except Exception, e: 20 | print e 21 | pass 22 | """ 23 | 24 | try: 25 | gs = goslate.Goslate() 26 | data['contentEnglish'] = gs.translate(data['content'][:1000], 'en') 27 | except Exception, e: 28 | print e 29 | pass 30 | 31 | return data -------------------------------------------------------------------------------- /src/tasks/update_doc.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | def setup(**kwargs): 4 | def run(data): 5 | data['updatedAt'] = datetime.datetime.utcnow() 6 | 7 | if 'entities' not in data: 8 | data['entities'] = [] 9 | 10 | component_keys = [ 11 | 'neighborhood', 12 | 'adminArea5', 13 | 'adminArea4', 14 | 'adminArea3', 15 | 'adminArea2', 16 | 'adminArea1' 17 | ] 18 | 19 | if 'geo' in data and 'addressComponents' in data['geo']: 20 | for key in component_keys: 21 | if key in data['geo']['addressComponents'] and data['geo']['addressComponents'][key] not in data['entities']: 22 | data['entities'].append(data['geo']['addressComponents'][key]) 23 | 24 | if 'item_collection' not in kwargs: 25 | raise 'update_doc task requires item_collection kwarg' 26 | 27 | ic = kwargs['item_collection'] 28 | 29 | item = ic.make_model(data) 30 | item.save(refresh=True) 31 | 32 | return item 33 | 34 | return run -------------------------------------------------------------------------------- /tests/test_donation_classifier.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | from src.tasks import donation_classifier 3 | 4 | data = { 5 | 'source': 'twitter', 6 | 'content': "If you'd like to help with the #Sandy recovery, what's needed most is money, blood and volunteer labor: http://tnat.in/eTs77" 7 | } 8 | 9 | data = donation_classifier.run(data) 10 | tags = [tag['name'] for tag in data['tags']] 11 | 12 | assert 'donation' in tags 13 | assert 'donation-offer' in tags 14 | 15 | data = { 16 | 'source': 'twitter', 17 | 'content': "Heckuva Job Brownie criticizes Obama for responding to Hurricane Sandy so quickly. http://dlvr.it/2PgX64" 18 | } 19 | 20 | data = donation_classifier.run(data) 21 | 22 | assert 'donation' not in [tag['name'] for tag in data['tags']] 23 | -------------------------------------------------------------------------------- /tests/test_extract_place.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import logging 4 | requests_log = logging.getLogger("pycountry.db") 5 | requests_log.setLevel(logging.WARNING) 6 | from src.tasks import extract_place 7 | 8 | def test(): 9 | extract_place.setup() 10 | 11 | data = { 12 | "remoteID": "291506692", 13 | "content": "Expelordeportindividuals", 14 | "source": "gdelt", 15 | "fromURL": "http://www.theage.com.au/world/taliban-attackers-mistake-armed-contractors-for-christian-daycare-workers-20140330-zqolw.html", 16 | "summary": "Expelordeportindividuals", 17 | "_id": "533a28bec906a78c36984a35", 18 | "license": "unknown", 19 | "language": { 20 | "code": "en", 21 | "name": "English", 22 | "nativeName": "English" 23 | }, 24 | "tags": [ 25 | { 26 | "name": "conflict", 27 | "_id": "533a28bec906a78c36984a36", 28 | "confidence": 1 29 | } 30 | ], 31 | "geo": {}, 32 | "lifespan": "temporary", 33 | "createdAt": "2014-04-01T02: 47: 26.495Z", 34 | "__v": 0 35 | } 36 | 37 | data = extract_place.run(data) 38 | 39 | """ The article associated with data contains many references to Afghanistan 40 | and Kabul """ 41 | 42 | assert 'addressComponents' in data['geo'] 43 | 44 | assert data['geo']['addressComponents']['adminArea1'] == 'Afghanistan' 45 | assert data['geo']['addressComponents']['adminArea5'] == 'Kabul' 46 | 47 | """ Without a fromURL the extractor should use the content property """ 48 | del data['geo']['addressComponents'] 49 | del data['fromURL'] 50 | 51 | data['content'] = """ Perfect just Perfect! It's a perfect storm for Nairobi on a Friday evening! horrible traffic here is your cue to become worse @Ma3Route """ 52 | 53 | data = extract_place.run(data) 54 | 55 | assert data['geo']['addressComponents']['adminArea1'] == 'Kenya' 56 | assert data['geo']['addressComponents']['adminArea5'] == 'Nairobi' 57 | 58 | """ Checking other branches in the logic. In theory same to above. """ 59 | data['geo']['addressComponents'] = { 60 | 'adminArea1': 'Kenya' 61 | } 62 | 63 | data = extract_place.run(data) 64 | 65 | assert data['geo']['addressComponents']['adminArea5'] == 'Nairobi' 66 | 67 | data['geo']['addressComponents'] = { 68 | 'adminArea5': 'Nairobi' 69 | } 70 | 71 | data = extract_place.run(data) 72 | 73 | assert data['geo']['addressComponents']['adminArea1'] == 'Kenya' 74 | 75 | data['geo']['addressComponents'] = { 76 | 'formattedAddress': '' 77 | } 78 | 79 | data = extract_place.run(data) 80 | 81 | assert data['geo']['addressComponents']['adminArea1'] == 'Kenya' 82 | assert data['geo']['addressComponents']['adminArea5'] == 'Nairobi' 83 | 84 | data['content'] = "#مكتب_دمشق_الإعلامي | # Goobers | 10.5.2014 p for | Bombing was described as the deadliest targeting neighborhood Goobers from multiple sources since about the time amid violent clashes on the kafersoseh, he heard loud ambulance East of Damascus tanker dead and wounded troops.\nActivists said that several mortar shells landed in the area of the Abbasids along the lgobr." 85 | data['geo']['addressComponents'] = { 86 | 'adminArea1': 'Syria' 87 | } 88 | 89 | data = extract_place.run(data) 90 | 91 | assert 'Damascus' in data['entities'] 92 | assert data['geo']['addressComponents']['adminArea5'] == 'Damascus' 93 | 94 | data['geo']['addressComponents'] = { 95 | 'adminArea1': 'Syria' 96 | } 97 | 98 | data['content'] = "# Flash _ Syria | | # Aleppo | | # Hayyan: wounding three children and two women, some in critical condition, after warplanes targeting the city's missile interstitial." 99 | data = extract_place.run(data) 100 | 101 | assert 'Aleppo' in data['entities'] 102 | assert data['geo']['addressComponents']['adminArea5'] == 'Aleppo' 103 | -------------------------------------------------------------------------------- /tests/test_format_address_task.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | from src.tasks import format_address 3 | import datetime 4 | 5 | data = {u'remoteID': u'132', u'language': {u'nativeName': u'English', u'code': u'en', u'name': u'English'}, u'license': u'unknown', u'tags': [u'death', u'accident', u'road', u'injury'], u'publishedAt': datetime.datetime(2011, 1, 5, 8, 0), u'summary': u'Traffic Accident: Pedestrian was knocked down by a matatu.', u'content': u'Traffic Accident: Pedestrian was knocked down by a matatu.', u'source': u'kenya-traffic-incidents-2011', u'__v': 0, u'lifespan': u'temporary', u'updatedAt': datetime.datetime(2014, 2, 23, 16, 8, 16, 832000), u'_id': '530a1cf010a84e0000392a65', u'geo': {u'addressComponents': {u'adminArea1': u'Kenya', u'neighborhood': u'Yala', u'adminArea4': u'Siaya', u'adminArea5': u'Siaya'}}, u'createdAt': datetime.datetime(2014, 2, 23, 16, 8, 16, 831000)} 6 | 7 | data = format_address.run(data) 8 | 9 | assert 'formattedAddress' in data['geo']['addressComponents'] 10 | assert data['geo']['addressComponents']['formattedAddress'] == 'Yala,Siaya,Siaya,Kenya' 11 | 12 | data2 = {u'remoteID': u'446712115228061696', u'language': {u'nativeName': u'English', u'code': u'en', u'name': u'English'}, u'license': u'unknown', u'tags': [], u'publishedAt': datetime.datetime(2014, 3, 20, 18, 17, 22), u'summary': u'What exactly has improved in Nairobi county ? Traffic lights? Water bill down? Security? Traffic...', u'content': u'What exactly has improved in Nairobi county ? Traffic lights? Water bill down? Security? Traffic jams? ....not very convinced...', u'source': u'twitter', u'__v': 0, u'lifespan': u'temporary', u'updatedAt': datetime.datetime(2014, 3, 20, 18, 18, 19, 1000), u'_id': '532b30eb7e93ef0000d6596f', u'geo': {u'locationIdentifiers': {u'authorTimeZone': u'Nairobi', u'authorLocationName': u'Nairobi - Kenya '}}, u'createdAt': datetime.datetime(2014, 3, 20, 18, 18, 18, 999000)} 13 | 14 | data2 = format_address.run(data2) 15 | 16 | assert 'formattedAddress' in data2['geo']['addressComponents'] 17 | #assert data2['geo']['addressComponents']['formattedAddress'] == 'Nairobi - Kenya' 18 | 19 | data3 = { 20 | "remoteID": "291506692", 21 | "content": "Expelordeportindividuals", 22 | "source": "gdelt", 23 | "fromURL": "http: //www.theage.com.au/world/taliban-attackers-mistake-armed-contractors-for-christian-daycare-workers-20140330-zqolw.html", 24 | "summary": "Expelordeportindividuals", 25 | "_id": "533a28bec906a78c36984a35", 26 | "license": "unknown", 27 | "language": { 28 | "code": "en", 29 | "name": "English", 30 | "nativeName": "English" 31 | }, 32 | "tags": [ 33 | { 34 | "name": "conflict", 35 | "_id": "533a28bec906a78c36984a36", 36 | "confidence": 1 37 | } 38 | ], 39 | "geo": { 40 | "coords": [ 41 | 69.1833, 42 | 34.5167 43 | ], 44 | "addressComponents": { 45 | "formattedAddress": "Kabul, Kabol, Afghanistan" 46 | } 47 | }, 48 | "lifespan": "temporary", 49 | "createdAt": "2014-04-01T02: 47: 26.495Z", 50 | "__v": 0 51 | } 52 | 53 | data3 = format_address.run(data3) 54 | 55 | assert 'formattedAddress' in data3['geo']['addressComponents'] 56 | assert data3['geo']['addressComponents']['formattedAddress'] == "Kabul, Kabol, Afghanistan" 57 | 58 | -------------------------------------------------------------------------------- /tests/test_geocode_task.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | from src.tasks import geocode 3 | import datetime 4 | 5 | data = {u'remoteID': u'132', u'language': {u'nativeName': u'English', u'code': u'en', u'name': u'English'}, u'license': u'unknown', u'tags': [u'death', u'accident', u'road', u'injury'], u'publishedAt': datetime.datetime(2011, 1, 5, 8, 0), u'summary': u'Traffic Accident: Pedestrian was knocked down by a matatu.', u'content': u'Traffic Accident: Pedestrian was knocked down by a matatu.', u'source': u'kenya-traffic-incidents-2011', u'__v': 0, u'lifespan': u'temporary', u'updatedAt': datetime.datetime(2014, 2, 23, 16, 8, 16, 832000), u'_id': '530a1cf010a84e0000392a65', u'geo': {u'addressComponents': {u'formattedAddress':'Yala,Siaya,Siaya,Kenya', u'adminArea1': u'Kenya', u'neighborhood': u'Yala', u'adminArea4': u'Siaya', u'adminArea5': u'Siaya'}}, u'createdAt': datetime.datetime(2014, 2, 23, 16, 8, 16, 831000)} 6 | 7 | data = geocode.run(data) 8 | 9 | assert 'coords' in data['geo'] 10 | print data['geo']['coords'] 11 | assert len(data['geo']['coords']) == 2 -------------------------------------------------------------------------------- /tests/test_identify_language.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | from src.tasks import identify_language 3 | import datetime 4 | 5 | data = {u'remoteID': u'132', 'language': None, u'license': u'unknown', u'tags': [u'death', u'accident', u'road', u'injury'], u'publishedAt': datetime.datetime(2011, 1, 5, 8, 0), u'summary': u'Traffic Accident: Pedestrian was knocked down by a matatu.', u'content': u'Traffic Accident: Pedestrian was knocked down by a matatu.', u'source': u'kenya-traffic-incidents-2011', u'__v': 0, u'lifespan': u'temporary', u'updatedAt': datetime.datetime(2014, 2, 23, 16, 8, 16, 832000), u'_id': '530a1cf010a84e0000392a65', u'geo': {u'addressComponents': {u'formattedAddress':'Yala,Siaya,Siaya,Kenya', u'adminArea1': u'Kenya', u'neighborhood': u'Yala', u'adminArea4': u'Siaya', u'adminArea5': u'Siaya'}}, u'createdAt': datetime.datetime(2014, 2, 23, 16, 8, 16, 831000)} 6 | 7 | data = identify_language.run(data) 8 | 9 | assert 'language' in data 10 | assert data['language'] is not None 11 | assert data['language']['code'] == 'en' 12 | assert data['language']['name'] == 'English' -------------------------------------------------------------------------------- /tests/test_pipeline.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ['GRIMLOCK'] = 'test' 3 | import logging 4 | requests_log = logging.getLogger("pycountry.db") 5 | requests_log.setLevel(logging.WARNING) 6 | 7 | from src.tasks import (geocode, format_address, update_doc, identify_language, 8 | add_default_values, reverse_geocode, extract_place, translate_content, 9 | relevance_classifier, extract_content, donation_classifier, image_tagger) 10 | 11 | default_tasks = [ 12 | add_default_values, 13 | extract_content, 14 | identify_language, 15 | translate_content, 16 | extract_place, 17 | #relevance_classifier, 18 | image_tagger, 19 | donation_classifier, 20 | format_address, 21 | geocode, 22 | reverse_geocode, 23 | update_doc 24 | ] 25 | 26 | def test(): 27 | import json 28 | import uuid 29 | from src.app import App, source 30 | from cn_search_py.connect import (setup_indexes, 31 | get_connection as get_search_connection) 32 | from cn_search_py.collections import ItemCollection 33 | 34 | app = App("transform", pipeline_steps = default_tasks) 35 | 36 | random_id = str(uuid.uuid4()) 37 | 38 | data = { 39 | 'remoteID': random_id, 40 | 'content': "U.S. aerial intervention against ISIS could give the upper hand to Iraqi security forces on the ground. But air power alone won't decide the battle against the jihadist group, says Karl Mueller. http://on.rand.org/yc6jH", 41 | 'source': "facebook", 42 | 'image': 'https://fbcdn-photos-g-a.akamaihd.net/hphotos-ak-xap1/t1.0-0/10436089_642636699158835_6716614903784028712_s.jpg', 43 | 'fromURL': "http://www.theage.com.au/world/taliban-attackers-mistake-armed-contractors-for-christian-daycare-workers-20140330-zqolw.html", 44 | 'summary': "U.S. aerial intervention against ISIS could give the upper hand to Iraqi security forces on the ground. But air power alone won't decide the battle against the jihadist group, says Karl Mueller. http://on.rand.org/yc6jH", 45 | 'license': "unknown", 46 | 'language': { 47 | 'code': "en", 48 | 'name': "English", 49 | 'nativeName': "English" 50 | }, 51 | 'tags': [ 52 | { 53 | 'name': "Christianity", 54 | 'confidence': 1 55 | }, 56 | { 57 | 'name': "deportation", 58 | 'confidence': 1 59 | }, 60 | { 61 | 'name': "conflict", 62 | 'confidence': 1 63 | } 64 | ], 65 | 'geo': { 66 | 'addressComponents': { 67 | 'formattedAddress': "Kabul, Kabol, Afghanistan" 68 | } 69 | }, 70 | 'lifespan': "temporary" 71 | } 72 | 73 | item = app.item_collection.make_model(data) 74 | saved = item.save(refresh=True) 75 | 76 | app.work(json.dumps({"id":str(saved['_id'])})) 77 | doc = source(app.item_collection, saved['_id'])() 78 | 79 | assert doc['remoteID'] == random_id 80 | assert 'Iraqi' in doc['entities'] 81 | assert 'photo-person' in [tag['name'] for tag in doc['tags']] 82 | 83 | rand_id2 = str(uuid.uuid4()) 84 | data2 = { 85 | 'remoteID': rand_id2, 86 | 'content': "RT @dharmabum1: @chrisdbianchi @USTornadoes here's another shot of the tornado. It touched down near the Denver jail. http://t.co/wRQiGuOIVC", 87 | 'source': "twitter", 88 | 'image': 'http://pbs.twimg.com/media/Btqddf2CIAIVDSo.jpg', 89 | 'summary': "RT @dharmabum1: @chrisdbianchi @USTornadoes here's another shot of the tornado. It touched down near the Denver jail. http://t.co/wRQiGuOIVC", 90 | 'license': "unknown", 91 | 'language': { 92 | 'code': "en", 93 | 'name': "English", 94 | 'nativeName': "English" 95 | }, 96 | 'tags': [ 97 | { 98 | 'name': "weather", 99 | 'confidence': 1 100 | } 101 | ], 102 | 'lifespan': "temporary" 103 | } 104 | 105 | item = app.item_collection.make_model(data2) 106 | saved = item.save(refresh=True) 107 | 108 | app.work(json.dumps({"id":str(saved['_id'])})) 109 | doc = source(app.item_collection, saved['_id'])() 110 | 111 | assert doc['remoteID'] == rand_id2 112 | 113 | assert 'Weather' not in doc['entities'] 114 | #assert 'tornado' in [tag['name'] for tag in doc['tags']] 115 | -------------------------------------------------------------------------------- /tests/test_relevance_classifier.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | from src.tasks import relevance_classifier 3 | import datetime 4 | 5 | data = {u'remoteID': u'132', u'language': {u'nativeName': u'English', u'code': u'en', u'name': u'English'}, u'license': u'unknown', u'tags': [], u'publishedAt': datetime.datetime(2011, 1, 5, 8, 0), u'summary': u'Traffic death : Pedestrian was knocked down by a matatu.', u'content': u'Traffic Accident: Pedestrian was knocked down by a matatu clashes.', u'source': u'kenya-traffic-incidents-2011', u'__v': 0, u'lifespan': u'temporary', u'updatedAt': datetime.datetime(2014, 2, 23, 16, 8, 16, 832000), u'_id': '530a1cf010a84e0000392a65', u'geo': {u'addressComponents': {u'formattedAddress':'Yala,Siaya,Siaya,Kenya', u'adminArea1': u'Kenya', u'neighborhood': u'Yala', u'adminArea4': u'Siaya', u'adminArea5': u'Siaya'}}, u'createdAt': datetime.datetime(2014, 2, 23, 16, 8, 16, 831000)} 6 | data['searchText'] = data['content'] + ' ' + data['summary'] 7 | 8 | def get_tags(): 9 | return [ 10 | {'name': 'death'}, 11 | {'name': 'conflict'} 12 | ] 13 | 14 | kwargs = { 15 | 'get_tags': get_tags 16 | } 17 | 18 | run = relevance_classifier.setup(**kwargs) 19 | data = run(data) 20 | 21 | tag_names = [tag['name'] for tag in data['tags']] 22 | 23 | assert 'death' in tag_names 24 | assert 'conflict' in tag_names -------------------------------------------------------------------------------- /tests/test_reverse_geocode.py: -------------------------------------------------------------------------------- 1 | def test(): 2 | from src.tasks import reverse_geocode 3 | import datetime 4 | 5 | data = {u'remoteID': u'132', u'language': {u'nativeName': u'English', u'code': u'en', u'name': u'English'}, u'license': u'unknown', u'tags': [u'death', u'accident', u'road', u'injury'], u'publishedAt': datetime.datetime(2011, 1, 5, 8, 0), u'summary': u'Traffic Accident: Pedestrian was knocked down by a matatu.', u'content': u'Traffic Accident: Pedestrian was knocked down by a matatu.', u'source': u'kenya-traffic-incidents-2011', u'__v': 0, u'lifespan': u'temporary', u'updatedAt': datetime.datetime(2014, 2, 23, 16, 8, 16, 832000), u'_id': '530a1cf010a84e0000392a65', u'geo': {u'coords': [36.733132, -1.308187] }, u'createdAt': datetime.datetime(2014, 2, 23, 16, 8, 16, 831000)} 6 | 7 | data = reverse_geocode.run(data) 8 | 9 | assert 'adminArea5' in data['geo']['addressComponents'] 10 | assert 'adminArea1' in data['geo']['addressComponents'] 11 | assert data['geo']['addressComponents']['adminArea1'] == 'Kenya' --------------------------------------------------------------------------------