├── .coveragerc
├── .editorconfig
├── .eslintignore
├── .eslintrc.js
├── .gitignore
├── .gitlab-ci.yml
├── .htmllintrc
├── .stylelintrc.js
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.rst
├── doc
    └── images
    │   ├── nlg-ide-input.png
    │   ├── nlg-ide-toplist.gif
    │   └── nlg-template-settings.png
├── examples
    └── intro-narrative-api.ipynb
├── nlg
    ├── __init__.py
    ├── app
    │   ├── __init__.py
    │   ├── body.html
    │   ├── error
    │   │   ├── 400.html
    │   │   ├── 401.html
    │   │   ├── 403.html
    │   │   ├── 404.html
    │   │   └── 500.html
    │   ├── gramex.yaml
    │   ├── html
    │   │   ├── demo.html
    │   │   └── template-navbar.html
    │   ├── index.html
    │   ├── login.html
    │   ├── nlg.js
    │   ├── setup.sh
    │   ├── style.css
    │   ├── template-navbar.html
    │   └── templates
    │   │   ├── demo.tmpl
    │   │   ├── new-variable.tmpl
    │   │   ├── template-settings.tmpl
    │   │   └── variable-settings.tmpl
    ├── grammar.py
    ├── narrative.py
    ├── search.py
    ├── tests
    │   ├── __init__.py
    │   ├── data
    │   │   ├── actors.csv
    │   │   └── imdb_ratings.csv
    │   ├── test_grammar.py
    │   ├── test_narrative.py
    │   ├── test_search.py
    │   ├── test_utils.py
    │   └── test_webapp.py
    ├── utils.py
    └── webapp.py
├── requirements.txt
├── setup.cfg
└── setup.py


/.coveragerc:
--------------------------------------------------------------------------------
1 | [report]
2 | show_missing = True
3 | skip_covered = True


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # .editorconfig maintains consistent coding styles between different editors.
 2 | # Get plugins at http://editorconfig.org/
 3 | #   - Sublime text: https://github.com/sindresorhus/editorconfig-sublime
 4 | #   - Notepad++: https://github.com/editorconfig/editorconfig-notepad-plus-plus
 5 | 
 6 | root = true
 7 | 
 8 | # Apply common styles for most standard code files.
 9 | # Do not apply to * - that covers binary files as well
10 | [*.{js,html,php,py,css,svg,json,less,yaml,yml,scss,xml,sh,java,bat,R,tmpl}]
11 | end_of_line = lf
12 | insert_final_newline = true
13 | trim_trailing_whitespace = true
14 | charset = utf-8
15 | # Stick to 2-space indenting by default, to conserve space
16 | indent_style = space
17 | indent_size = 2
18 | 
19 | [*.py]
20 | indent_size = 4
21 | 
22 | [Makefile]
23 | indent_style = tab
24 | indent_size = 4
25 | 
26 | [testlib/test_config/config.empty.yaml]
27 | insert_final_newline = false
28 | [tests/dir/gramex.yaml]
29 | insert_final_newline = false
30 | 


--------------------------------------------------------------------------------
/.eslintignore:
--------------------------------------------------------------------------------
1 | **/node_modules/*
2 | app/node_modules/*
3 | docs/*
4 | 
5 | # Our Gitlab runner uses eslint@2.6.0 to allow eslint-template.
6 | # chromecapture.js requires ecmaVersion 8 which eslint@2.6.0 does not support
7 | # So let's not eslint that
8 | gramex/apps/capture/chromecapture.js
9 | 


--------------------------------------------------------------------------------
/.eslintrc.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |   "plugins": [
 3 |     "template"          // Handle Tornado templates and JS in HTML files
 4 |   ],
 5 |   "env": {
 6 |     "es6": true,        // Allow ES6 in JavaScript
 7 |     "browser": true,    // Include browser globals
 8 |     "jquery": true,     // Include jQuery and $
 9 |     "mocha": true       // Include it(), assert(), etc
10 |   },
11 |   "globals": {
12 |     "_": true,          // underscore.js
13 |     "d3": true,         // d3.js
14 |     "vg": true,         // vega.js
15 |     "L": true,          // leaflet.js
16 |     "ga": true,         // Google analytics
17 |     "G": true,          // G.min.js
18 |     "topojson": true,   // topojson.js
19 |     "moment": true,     // moment.js
20 |     "numeral": true,    // numeral.js
21 |     "assert": true      // chai.js
22 |   },
23 |   "extends": "eslint:recommended",
24 |   "rules": {
25 |     /* Override default rules */
26 |     "indent": ["off", 2],                   // We eventually want 2 space indentation
27 |     "linebreak-style": ["off", "unix"],     // We eventually want UNIX style line
28 |     "quotes": ["off", "double"],            // We may go for a double-quotes style
29 |     "semi": ["off", "never"]                // We may go for a no-semicolon style
30 |   }
31 | };
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | TODO
  2 | 
  3 | # Ignore auto-generated documentation
  4 | docs/gramex*.rst
  5 | docs/modules.rst
  6 | 
  7 | # Ignore files generated by testcases
  8 | tests/**/*.test
  9 | tests/**/gen.*
 10 | testlib/**/gen.*
 11 | .noseids
 12 | 
 13 | # Ignore byte-compiled / optimised / DLL files
 14 | *.py[cod]
 15 | 
 16 | # Filenames should NOT have spaces
 17 | * *
 18 | 
 19 | # Ignore SQLite3 files. Gramex creates some automatically
 20 | *.sqlite3
 21 | *.sqlite3-journal
 22 | 
 23 | # Cache folders used for testing
 24 | .cache-*
 25 | .pytest_cache
 26 | 
 27 | # Ignore log files
 28 | *.log*
 29 | 
 30 | # Don't commit data files, except what's required for testing or by Gramex apps
 31 | *.csv
 32 | *.xls*
 33 | !tests/**/*.csv
 34 | !tests/*.xlsx
 35 | !testlib/**/*.csv
 36 | !testlib/*.xlsx
 37 | !gramex/apps/**/*.csv
 38 | !gramex/apps/guide/**/*.xlsx
 39 | 
 40 | *.ppt*
 41 | !testlib/input.pptx
 42 | !tests/template.pptx
 43 | !gramex/apps/guide/formhandler/input.pptx
 44 | !gramex/apps/guide/pptxhandler/examples-input.pptx
 45 | 
 46 | # Don't commit databases created by test cases
 47 | tests/*.db
 48 | 
 49 | # Don't commit uploads created by test cases
 50 | tests/uploads
 51 | 
 52 | # Don't commit ZIP files, except what's required for testing
 53 | *.7z
 54 | *.zip
 55 | !tests/*.zip
 56 | 
 57 | # Documents
 58 | *.doc*
 59 | *.pdf
 60 | 
 61 | # Avoid media files
 62 | *.avi
 63 | *.mp*
 64 | *.wmv
 65 | 
 66 | # Backup files
 67 | ~$*
 68 | *~
 69 | *.bak*
 70 | 
 71 | # Sublime-text workspaces, etc
 72 | *.sublime-*
 73 | .vscode/
 74 | .vim/
 75 | 
 76 | # IPython Notebook checkpoints
 77 | .ipynb_checkpoints
 78 | 
 79 | # Typically bash.exe.stackdump on Cygwin
 80 | *.stackdump
 81 | 
 82 | # Node modules and bower components
 83 | node_modules
 84 | bower_components
 85 | 
 86 | # Prefer yarn.lock over package-lock.json
 87 | package-lock.json
 88 | 
 89 | # Windows shortcut files
 90 | *.lnk
 91 | 
 92 | # Windows / Mac OS junk files
 93 | Desktop.ini
 94 | $RECYCLE.BIN/
 95 | *[Tt]humbs.db
 96 | *.DS_Store
 97 | 
 98 | # R history files
 99 | .RHistory
100 | 
101 | # C extensions
102 | *.so
103 | 
104 | # Packages
105 | *.egg
106 | *.eggs
107 | *.egg-info
108 | dist
109 | build
110 | eggs
111 | parts
112 | bin
113 | var
114 | sdist
115 | develop-eggs
116 | .installed.cfg
117 | lib
118 | lib64
119 | 
120 | # Installer logs
121 | pip-log.txt
122 | 
123 | # Unit test / coverage reports
124 | .coverage
125 | .tox
126 | nosetests.xml
127 | htmlcov
128 | cover
129 | 
130 | # Translations
131 | *.mo
132 | 
133 | # Mr Developer
134 | .mr.developer.cfg
135 | .project
136 | .pydevproject
137 | 
138 | # Pycharm
139 | .idea
140 | 
141 | # Complexity
142 | output/*.html
143 | output/*/index.html
144 | 
145 | # Sphinx
146 | docs/_build
147 | 
148 | # For Linux FUSE file system
149 | .fuse_hidden*
150 | 
151 | # IDE
152 | .vim
153 | 


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
 1 | validate:
 2 |   tags: [py3]
 3 |   script:
 4 |       - python setup.py develop
 5 |       - python -m spacy download en
 6 |       - gramex license accept
 7 |       - nosetests -sv --with-coverage --cover-package=nlg
 8 | 
 9 | 
10 | deploy:
11 |   stage: deploy
12 |   script: deploy
13 |   only: [dev]
14 |   tags: [py3]
15 |   variables:
16 |     SERVER: ubuntu@uat.gramener.com       # Deploy to uat.gramener.com/app-name/
17 |     URL: nlg-tmplgen
18 |     SETUP: sh setup.sh
19 |     VERSION: py3v1
20 |     PORT: 8040
21 | 


--------------------------------------------------------------------------------
/.htmllintrc:
--------------------------------------------------------------------------------
 1 | // NLG .htmllintrc v1.2
 2 | {
 3 |     "plugins": [],
 4 | 
 5 |     "attr-bans": [
 6 |         "align",
 7 |         "background",
 8 |         "border",
 9 |         // "frameborder",       // frameborder is used in YouTube embeds
10 |         "longdesc",
11 |         "marginwidth",
12 |         "marginheight",
13 |         "scrolling"
14 |     ],
15 |     "attr-name-style": false,
16 |     "attr-no-dup": false,                   // attr name may be computed, and get replaced by {}
17 |     "attr-no-unsafe-char": false,           // title contains single quotes '
18 |     "attr-quote-style": "double",           // attributes contain double quotes
19 |     "attr-req-value": false,
20 |     "class-no-dup": true,                   // no duplicate classes in a tag
21 |     "doctype-first": false,                 // snippet templates need not begin with doctype
22 |     "doctype-html5": true,
23 |     "fig-req-figcaption": false,
24 |     "focusable-tabindex-style": false,
25 |     "head-req-title": false,                // title may be inside a Block.run()
26 |     "href-style": false,
27 |     "html-req-lang": false,
28 |     "id-class-ignore-regex": "\\{ *\\}",    // ignore tornado template id / class
29 |     "id-class-no-ad": false,
30 |     "id-class-style": false,                // no styles enforced for now
31 |     "id-no-dup": false,                     // template replacement IDs { } cause duplication
32 |     "img-req-alt": "allownull",             // <img alt=""> for dynamic image content
33 |     "img-req-src": false,
34 |     "indent-style": "spaces",
35 |     "indent-width": 2,
36 |     "label-req-for": false,                 // cannot use if multiple forms with same key
37 |     "line-end-style": false,                // raises too many errors
38 |     "raw-ignore-regex": "<%.*?%>\\s*|{[%#{].*?[%#}]}\\s*",  // ignore templates
39 |     "spec-char-escape": false,              // using > or < is not that big a deal
40 |     "table-req-caption": false,
41 |     "tag-bans": [
42 |         // "b",                             // Bootstrap caret example uses <b>
43 |         // "i",                             // Font-awesome icons use <i>
44 |         "s",                                // avoid strike tag, deprecated
45 |         // "style",                         // Single-page templates need style tag
46 |         "u",
47 |         "strike",
48 |         "font",
49 |         "center"
50 |     ],
51 |     "tag-name-lowercase": true,
52 |     "tag-name-match": true,
53 |     "tag-self-close": false,
54 |     "title-max-len": false,                 // we sometimes have tables inside the title=""
55 |     "title-no-dup": true
56 | }
57 | 


--------------------------------------------------------------------------------
/.stylelintrc.js:
--------------------------------------------------------------------------------
 1 | "use strict"
 2 | 
 3 | module.exports = {
 4 |   rules: {
 5 |     "at-rule-no-unknown": true,
 6 |     "block-no-empty": true,
 7 |     "color-no-invalid-hex": true,
 8 |     "comment-no-empty": true,
 9 |     "declaration-block-no-duplicate-properties": [
10 |       true,
11 |       {
12 |         ignore: ["consecutive-duplicates-with-different-values"]
13 |       }
14 |     ],
15 |     "declaration-block-no-shorthand-property-overrides": true,
16 |     "font-family-no-duplicate-names": true,
17 |     "font-family-no-missing-generic-family-keyword": true,
18 |     "function-calc-no-unspaced-operator": true,
19 |     "function-linear-gradient-no-nonstandard-direction": true,
20 |     "keyframe-declaration-no-important": true,
21 |     "media-feature-name-no-unknown": true,
22 |     "no-descending-specificity": true,
23 |     "no-duplicate-at-import-rules": true,
24 |     "no-duplicate-selectors": true,
25 |     "no-empty-source": true,
26 |     "no-extra-semicolons": true,
27 |     "no-invalid-double-slash-comments": true,
28 |     "property-no-unknown": true,
29 |     "selector-pseudo-class-no-unknown": true,
30 |     "selector-pseudo-element-no-unknown": true,
31 |     "selector-type-no-unknown": true,
32 |     "string-no-newline": true,
33 |     "unit-no-unknown": true
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Config file for automatic testing at travis-ci.org
 2 | 
 3 | # Run in Python 3 only. Drop Python 2 testing
 4 | language: python
 5 | python: '3.7'
 6 | 
 7 | dist: xenial
 8 | sudo: yes
 9 | 
10 | # Cache modules for faster builds
11 | cache:
12 |   timeout: 1000
13 |   pip: true
14 |   npm: true
15 |   yarn: true
16 |   # Don't cache miniconda directory. It's slower. Fresh install takes ~200s.
17 |   # But caching takes ~150s (extraction) + ~190s (re-packing) = ~340s (slower).
18 |   # directories:
19 |   #   - $HOME/miniconda
20 | 
21 | install:
22 |   # Install miniconda
23 |   - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O $HOME/miniconda.sh
24 |   - bash $HOME/miniconda.sh -b -u -p $HOME/miniconda
25 |   - export PATH="$HOME/miniconda/bin:$PATH"
26 |   - hash -r
27 |   - conda config --set always_yes yes --set changeps1 no
28 |   # Install pip modules
29 |   - pip install flake8 pep8-naming flake8-gramex flake8-blind-except flake8-print flake8-debugger nose coverage
30 |   - npm install -g yarn
31 |   - yarn global add eclint eslint eslint-plugin-html eslint-plugin-template htmllint-cli
32 |   - yarn install
33 |   # Set up variables
34 |   - export BRANCH=$TRAVIS_BRANCH
35 | 
36 | script:
37 |   - eclint check '**/*.html' '**/*.js' '**/*.css' '**/*.yaml' '**/*.md'
38 |   - htmllint
39 |   - flake8
40 |   - bandit nlg --recursive --format csv || true
41 |   - pip install -e .
42 |   - gramex setup nlg/app
43 |   - nosetests -sv --with-coverage --cover-package=nlg
44 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Gramex-NLG is licensed under the [MIT License][1]
 2 | 
 3 | Copyright (c) 2019, Gramener
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 
22 | 
23 | Gramex includes [third party libraries][2] with permissive licenses.
24 | 
25 | [1]: https://opensource.org/licenses/MIT
26 | [2]: https://learn.gramener.com/guide/license/thirdparty.md
27 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | graft nlg/app
2 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | |Build Status|
  2 | 
  3 | nlg
  4 | ===
  5 | 
  6 | Natural Language Generation component for
  7 | `Gramex <https://github.com/gramener/gramex>`__. The NLG module is
  8 | designed to work as a Python library, as well as a `Gramex
  9 | application <https://learn.gramener.com/guide/apps/#gramex-apps>`__.
 10 | 
 11 | The library:
 12 | 
 13 | 1. Automatically creates tornado templates from English text in the
 14 |    context of a dataset.
 15 | 2. Allows for modification and generalization of these templates.
 16 | 3. Renders these templates as a unified narrative.
 17 | 
 18 | Installation
 19 | ------------
 20 | 
 21 | The NLG library can be installed from PyPI as follows:
 22 | 
 23 | .. code:: bash
 24 | 
 25 |     $ pip install nlg
 26 |     $ python -m spacy download en_core_web_sm
 27 |     $ gramex setup ui
 28 | 
 29 | or from source as follows:
 30 | 
 31 | .. code:: bash
 32 | 
 33 |     $ git clone https://github.com/gramener/gramex-nlg.git
 34 |     $ cd gramex-nlg
 35 |     $ pip install -e .
 36 |     $ gramex setup ./app
 37 | 
 38 | Usage
 39 | -----
 40 | 
 41 | Using the Python library
 42 | ~~~~~~~~~~~~~~~~~~~~~~~~
 43 | 
 44 | To get started, see the `example notebook here <https://github.com/gramener/gramex-nlg/tree/dev/examples/intro-narrative-api.ipynb>`_.
 45 | 
 46 | .. code:: python
 47 | 
 48 |     >>> import pandas as pd
 49 |     >>> from gramex import data
 50 | 
 51 |     >>> # load some data
 52 |     >>> df = pd.read_csv('iris.csv')
 53 | 
 54 |     >>> # specify a FormHandler operation - find the average sepal_width per species
 55 |     >>> fh_args = {'_by': ['species'], '_c': ['sepal_width|avg'], '_sort': ['sepal_width|avg']}
 56 | 
 57 |     >>> # Draw a sample
 58 |     >>> xdf = df.sample(frac=0.1, random_state=10)
 59 | 
 60 |     >>> # perform the FormHandler operation on the data
 61 |     >>> print(data.filter(xdf, fh_args.copy()))
 62 |           species  sepal_width|avg
 63 |     2   virginica             2.70
 64 |     1  versicolor             2.92
 65 |     0      setosa             3.15
 66 | 
 67 |     >>> # Write something about the output
 68 |     >>> from nlg.utils import load_spacy_model
 69 |     >>> text = nlp("The virginica species has the least average sepal_width.")
 70 | 
 71 |     >>> # Generate a template
 72 |     >>> from nlg.search import templatize
 73 |     >>> tmpl = templatize(text, fh_args, xdf)
 74 |     >>> print(tmpl)
 75 |     {% set fh_args = {"_by": ["species"], "_c": ["sepal_width|avg"], "_sort": ["sepal_width|avg"]}  %}
 76 |     {% set df = U.gfilter(orgdf, fh_args.copy()) %}
 77 |     {% set fh_args = U.sanitize_fh_args(fh_args, orgdf) %}
 78 |     The {{ df["species"].iloc[0] }} species has the least average {{ fh_args["_sort"][0].lower() }}.
 79 | 
 80 |     >>> # Render the same template with new data.
 81 |     >>> print(render(df, tmpl).decode('utf8'))
 82 |     The versicolor species has the least average sepal_width|avg.
 83 | 
 84 | Using the NLG IDE
 85 | ~~~~~~~~~~~~~~~~~
 86 | 
 87 | The NLG module ships with an IDE. The IDE is a `Gramex
 88 | application <https://learn.gramener.com/guide/apps/>`__.
 89 | 
 90 | To use it, install the NLG module as indicated above, and add the
 91 | following to your ``gramex.yaml``:
 92 | 
 93 | .. code:: yaml
 94 | 
 95 |     variables:
 96 |       NLG_ROOT:
 97 |         function: nlg.utils._locate_app_config()
 98 | 
 99 |     import:
100 |       nlg:
101 |         path: $NLG_ROOT
102 |         YAMLURL: $YAMLURL/nlg
103 | 
104 | This configuration mounts the app at the ``/nlg/`` resource. Start gramex to access it.
105 | 
106 | The Gramex NLG IDE
107 | ------------------
108 | 
109 | The NLG component depends on two sources of information:
110 | 
111 | 1. A source dataset, which can be uploaded on to the IDE. A dataset is
112 |    uniquely identified with its filename. Once uploaded, the file
113 |    persists and is available for selection from the app. Any *file* that
114 |    makes a valid URL for
115 |    `FormHandler <http://learn.gramener.com/guide/formhandler>`__ can be
116 |    used with the NLG app.
117 | 2. A *narrative*, which is a collection of templates and rules around
118 |    them. The narrative consists of the configuration which governs the
119 |    rendered text. An existing narrative can be uploaded through the "Add
120 |    Data" button, or can be created through the IDE. Once created, the
121 |    narrative can be named and becomes available for selection from the
122 |    "Add Data" modal.
123 | 
124 | The NLG IDE
125 | -----------
126 | 
127 | The primary purpose of the IDE is to create or edit narratives based on
128 | a dataset. Once a dataset has been selected, it is exposed in the IDE as
129 | a `FormHandler
130 | table <https://learn.gramener.com/guide/formhandler/#formhandler-tables>`__.
131 | 
132 | .. figure:: doc/images/nlg-ide-input.png
133 |    :alt: 
134 | 
135 | Users can now type English text into the IDE and add it to the
136 | narrative. This automatically templatizes the text, and adds the
137 | template to the narrative. For example, typing "Humphrey Bogart is at
138 | the top of the list." does this:
139 | 
140 | .. figure:: doc/images/nlg-ide-toplist.gif
141 |    :alt: 
142 | 
143 | This means that the input statement has been templatized and added to
144 | the narrative. The part of the input text that was successfully
145 | templatized is highlighted in green. Clicking on the spanner button next
146 | to a template opens the `Template Settings <#template-settings>`__
147 | modal.
148 | 
149 | Template Settings
150 | -----------------
151 | 
152 | .. figure:: doc/images/nlg-template-settings.png
153 |    :alt: 
154 | 
155 | This dialog provides configuration options for all template attributes:
156 | 
157 | 1. **Template Name** - Each template can optionally be named.
158 | 2. **Condition** - Any Python expression which evaluates to a boolean
159 |    may be set as a condition, which controls whether the template is
160 |    rendered.
161 | 3. The actual Tornado template itself can be edited. Any valid Tornado
162 |    template is acceptable.
163 | 4. **Token Settings** - Every token from the input text that finds a
164 |    match in the dataset or in FormHandler arguments (i.e. every token
165 |    that is highlighted in the preview) is converted into a `template
166 |    expression <https://www.tornadoweb.org/en/stable/template.html#syntax-reference>`__.
167 |    Such tokens have their own attributes, as follows:
168 | 
169 |    -  **Token search results** - if a token is found in more than one
170 |       place (say, a dataframe cell as well as a FormHandler argument),
171 |       this setting allows the user to select the right result.
172 |    -  **Grammar options** - the NLG engine may automatically apply
173 |       certain string formatting or lexical operations to the template
174 |       expression to make it match the input text. Any number of these
175 |       operations can be enabled / disabled through this setting.
176 |    -  **Make variable** - a token may be set as a local variable within
177 |       the template.
178 |    -  **Ignore** - the template expression corresponding to the token
179 |       may be ignored, and set back to the literal input text.
180 | 
181 | 5. **Run Template** - Run the current template against the dataframe and
182 |    preview its output.
183 | 6. **Save Template** - Save the template. Note that this is required if
184 |    the template has been manually edited in the textarea.
185 | 
186 | Naming and Saving a Narrative
187 | -----------------------------
188 | 
189 | Once a narrative has been fully configured, it can be named and saved.
190 | Doing so causes it to appear the narrative dropdown menu on the app.
191 | 
192 | Sharing a Narrative
193 | -------------------
194 | 
195 | After a narrative has been named and saved, it be shared in two modes:
196 | 
197 | 1. **IDE mode** - This option lets users copy a URL that redirects to
198 |    the IDE, with the current dataset and the current narrative set in
199 |    the session.
200 | 2. **Embed mode** - Copy an HTML snippet to embed into a page which
201 |    contains a Formhandler table. The template will render live as the
202 |    table changes.
203 | 
204 | 
205 | Glossary: Grammar of Data-Driven Narratives
206 | ===========================================
207 | 
208 | This section describes the building blocks of Gramex's approach to natural language generation.
209 | These concepts serve as primitives to the logic and automation capabilities of the NLG engine.
210 | 
211 | 1. **Narrative** - A *narrative* is a piece of text written by a user or generated by a machine which contains facts about a dataset.
212 |    A narrative in its entirity is assumed to be a function of three items:
213 | 
214 |    a. A dataset
215 |    b. Operations on that dataset
216 |    c. Some "source text" provided by the user.
217 | 
218 |    For example, the following is a narrative about the `Fisher Iris dataset <https://archive.ics.uci.edu/ml/datasets/Iris>`_.
219 | 
220 |       The iris dataset contains measurements from a hundred and fifty samples of three unique species of the iris flower - setosa, versicolor and virginica. The species are equally distributed within the dataset, so that each species has fifty samples. For each sample, four measurements are taken - sepal width, petal width, sepal width and sepal length. The average petal length of the setosa is significantly less than that of versicolor or virginica. The average petal width of virginica is much higher than that of versicolor. However, there is no pair of features that can uniquely identify a species. The presence of such properties makes the iris dataset ideal for explaining machine learning concepts.
221 | 
222 | 2. **Nugget** - A *nugget* is ideally a single sentence which conveys a fact about the data. Each sentence in the example narrative except the last two is a nugget. Note that each nugget derives its facts from the source data directly, or from the result of some operation on the data. For example, the following nugget
223 | 
224 |    The average petal length of the setosa is significantly less than that of versicolor or virginica.
225 |    
226 |    derives from a groupby-and-average operation on one column of the dataset. Some nuggets, like the one enumerating the number of samples in the dataset, derive from the raw dataset, *not* from the result of any operations on it. A narrative is essentially an ordered collection of nuggets.
227 | 
228 | 3. **Variables**  - A *variable* is a piece of text which can change with the data or the operations performed on it. Here is a reproduction of the example narrative, with all variables shown in bold.
229 | 
230 |       The iris dataset contains measurements from **a hundred and fifty** samples of **three** unique species of the iris flower - **setosa, versicolor and virginica**. The species are equally distributed within the dataset, so that each species has **fifty** samples. For each sample, **four** measurements are taken - **sepal width, petal width, sepal width and sepal length**. The **average petal length** of the setosa is significantly **less** than that of versicolor or virginica. The **average petal width** of virginica is much **higher** than that of versicolor. However, there is no pair of features that can uniquely identify a species. The presence of such properties makes the iris dataset ideal for explaining machine learning concepts.
231 | 
232 |    Note that each variable has two defining components:
233 | 
234 |    * a *source text*, as initially provided by the user
235 |    * one or more *formulae*, which compute the value of the variable for a specific instance of the data. Note that the source text of a variable may be found in multiple places within a dataset, and as such, a variable may have multiple formulae - one of which will have to be preferred by the user.
236 | 
237 |    For example, for the first variable in example narrative, "hundred and fifty" is the source text, and the formula is any machine code that counts the number of rows in the dataset and translates it into a human-readable form. A variable may additionally have other attributes, like:
238 | 
239 |    * a set of linguistic *inflections* which determine the form of the rendered variable text - these are distinct from the formula itself, in that the formula creates the base form of the text and inflections modify the base form.
240 |    * a *name* used to identify the variable within the template of the nugget
241 | 
242 | 
243 | Thus, narratives are composed from nuggets, and nuggets from variables. This grammar allows the NLG engine to approach the problem of data-driven, machine-generated narratives in a more *compositional* manner than a *generative* one.
244 | 
245 | .. |Build Status| image:: https://travis-ci.org/gramener/gramex-nlg.svg?branch=dev
246 |    :target: https://travis-ci.org/gramener/gramex-nlg
247 | 
248 | 


--------------------------------------------------------------------------------
/doc/images/nlg-ide-input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gramener/gramex-nlg/aa508f66b6ce38eeb3ea3deac96f458865b896ff/doc/images/nlg-ide-input.png


--------------------------------------------------------------------------------
/doc/images/nlg-ide-toplist.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gramener/gramex-nlg/aa508f66b6ce38eeb3ea3deac96f458865b896ff/doc/images/nlg-ide-toplist.gif


--------------------------------------------------------------------------------
/doc/images/nlg-template-settings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gramener/gramex-nlg/aa508f66b6ce38eeb3ea3deac96f458865b896ff/doc/images/nlg-template-settings.png


--------------------------------------------------------------------------------
/examples/intro-narrative-api.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Introduction to NLG's Narrative API\n",
  8 |     "===================================\n",
  9 |     "\n",
 10 |     "This notebook is an introduction to Gramex NLG's Narrative API. Here we will learn how to create data-driven narratives with the NLG module, by going over the building blocks of the API.\n",
 11 |     "\n",
 12 |     "Getting Started\n",
 13 |     "---------------\n",
 14 |     "\n",
 15 |     "If the NLG module is not installed, install it as follows:\n",
 16 |     "\n",
 17 |     "```bash\n",
 18 |     "$ pip install nlg\n",
 19 |     "```"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "Test the installation by running the following cell:"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 1,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "from nlg.search import templatize\n",
 36 |     "import pandas as pd"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "Next, let's load some data. For this tutorials, we will be using [this](https://raw.githubusercontent.com/gramener/gramex-nlg/master/nlg/tests/data/actors.csv) dataset. Please download the file and load it as a pandas dataframe."
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 2,
 49 |    "metadata": {},
 50 |    "outputs": [
 51 |     {
 52 |      "data": {
 53 |       "text/html": [
 54 |        "<div>\n",
 55 |        "<style scoped>\n",
 56 |        "    .dataframe tbody tr th:only-of-type {\n",
 57 |        "        vertical-align: middle;\n",
 58 |        "    }\n",
 59 |        "\n",
 60 |        "    .dataframe tbody tr th {\n",
 61 |        "        vertical-align: top;\n",
 62 |        "    }\n",
 63 |        "\n",
 64 |        "    .dataframe thead th {\n",
 65 |        "        text-align: right;\n",
 66 |        "    }\n",
 67 |        "</style>\n",
 68 |        "<table border=\"1\" class=\"dataframe\">\n",
 69 |        "  <thead>\n",
 70 |        "    <tr style=\"text-align: right;\">\n",
 71 |        "      <th></th>\n",
 72 |        "      <th>category</th>\n",
 73 |        "      <th>name</th>\n",
 74 |        "      <th>rating</th>\n",
 75 |        "      <th>votes</th>\n",
 76 |        "    </tr>\n",
 77 |        "  </thead>\n",
 78 |        "  <tbody>\n",
 79 |        "    <tr>\n",
 80 |        "      <th>0</th>\n",
 81 |        "      <td>Actors</td>\n",
 82 |        "      <td>Humphrey Bogart</td>\n",
 83 |        "      <td>0.570197</td>\n",
 84 |        "      <td>109</td>\n",
 85 |        "    </tr>\n",
 86 |        "    <tr>\n",
 87 |        "      <th>1</th>\n",
 88 |        "      <td>Actors</td>\n",
 89 |        "      <td>Cary Grant</td>\n",
 90 |        "      <td>0.438602</td>\n",
 91 |        "      <td>142</td>\n",
 92 |        "    </tr>\n",
 93 |        "    <tr>\n",
 94 |        "      <th>2</th>\n",
 95 |        "      <td>Actors</td>\n",
 96 |        "      <td>James Stewart</td>\n",
 97 |        "      <td>0.988374</td>\n",
 98 |        "      <td>120</td>\n",
 99 |        "    </tr>\n",
100 |        "    <tr>\n",
101 |        "      <th>3</th>\n",
102 |        "      <td>Actors</td>\n",
103 |        "      <td>Marlon Brando</td>\n",
104 |        "      <td>0.102045</td>\n",
105 |        "      <td>108</td>\n",
106 |        "    </tr>\n",
107 |        "    <tr>\n",
108 |        "      <th>4</th>\n",
109 |        "      <td>Actors</td>\n",
110 |        "      <td>Fred Astaire</td>\n",
111 |        "      <td>0.208877</td>\n",
112 |        "      <td>84</td>\n",
113 |        "    </tr>\n",
114 |        "    <tr>\n",
115 |        "      <th>5</th>\n",
116 |        "      <td>Actresses</td>\n",
117 |        "      <td>Katharine Hepburn</td>\n",
118 |        "      <td>0.039188</td>\n",
119 |        "      <td>63</td>\n",
120 |        "    </tr>\n",
121 |        "    <tr>\n",
122 |        "      <th>6</th>\n",
123 |        "      <td>Actresses</td>\n",
124 |        "      <td>Bette Davis</td>\n",
125 |        "      <td>0.282807</td>\n",
126 |        "      <td>14</td>\n",
127 |        "    </tr>\n",
128 |        "    <tr>\n",
129 |        "      <th>7</th>\n",
130 |        "      <td>Actresses</td>\n",
131 |        "      <td>Audrey Hepburn</td>\n",
132 |        "      <td>0.120197</td>\n",
133 |        "      <td>94</td>\n",
134 |        "    </tr>\n",
135 |        "    <tr>\n",
136 |        "      <th>8</th>\n",
137 |        "      <td>Actresses</td>\n",
138 |        "      <td>Ingrid Bergman</td>\n",
139 |        "      <td>0.296140</td>\n",
140 |        "      <td>52</td>\n",
141 |        "    </tr>\n",
142 |        "    <tr>\n",
143 |        "      <th>9</th>\n",
144 |        "      <td>Actors</td>\n",
145 |        "      <td>Spencer Tracy</td>\n",
146 |        "      <td>0.466311</td>\n",
147 |        "      <td>192</td>\n",
148 |        "    </tr>\n",
149 |        "    <tr>\n",
150 |        "      <th>10</th>\n",
151 |        "      <td>Actors</td>\n",
152 |        "      <td>Charlie Chaplin</td>\n",
153 |        "      <td>0.244426</td>\n",
154 |        "      <td>76</td>\n",
155 |        "    </tr>\n",
156 |        "  </tbody>\n",
157 |        "</table>\n",
158 |        "</div>"
159 |       ],
160 |       "text/plain": [
161 |        "     category               name    rating  votes\n",
162 |        "0      Actors    Humphrey Bogart  0.570197    109\n",
163 |        "1      Actors         Cary Grant  0.438602    142\n",
164 |        "2      Actors      James Stewart  0.988374    120\n",
165 |        "3      Actors      Marlon Brando  0.102045    108\n",
166 |        "4      Actors       Fred Astaire  0.208877     84\n",
167 |        "5   Actresses  Katharine Hepburn  0.039188     63\n",
168 |        "6   Actresses        Bette Davis  0.282807     14\n",
169 |        "7   Actresses     Audrey Hepburn  0.120197     94\n",
170 |        "8   Actresses     Ingrid Bergman  0.296140     52\n",
171 |        "9      Actors      Spencer Tracy  0.466311    192\n",
172 |        "10     Actors    Charlie Chaplin  0.244426     76"
173 |       ]
174 |      },
175 |      "execution_count": 2,
176 |      "metadata": {},
177 |      "output_type": "execute_result"
178 |     }
179 |    ],
180 |    "source": [
181 |     "# Replace the path with wherever you have downloaded the dataset.\n",
182 |     "df = pd.read_csv('../nlg/tests/data/actors.csv')\n",
183 |     "df"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "Let us now sort the dataframe by the `rating` column. NLG is designed to work with Gramex's [FormHandler](https://learn.gramener.com/guide/formhandler). Therefore, we will use FormHandler's own DSL to make any transformation on the dataset."
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": 3,
196 |    "metadata": {},
197 |    "outputs": [],
198 |    "source": [
199 |     "from gramex.data import filter as gfilter  # do not clobber the `filter` function from the Python stdlib\n",
200 |     "sort_args = {'_sort': ['-rating']}"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "markdown",
205 |    "metadata": {},
206 |    "source": [
207 |     "Note that the `_sort` key in the dictionary above tells Gramex to sort a dataframe by the given columns. The value of they key is a _list_, indicating that dataframes can be sorted by multiple columns. Also, the hyphen before the column name indicates that the sorting is _descending_."
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": 4,
213 |    "metadata": {},
214 |    "outputs": [],
215 |    "source": [
216 |     "xdf = gfilter(df, sort_args.copy())"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": 5,
222 |    "metadata": {},
223 |    "outputs": [
224 |     {
225 |      "data": {
226 |       "text/html": [
227 |        "<div>\n",
228 |        "<style scoped>\n",
229 |        "    .dataframe tbody tr th:only-of-type {\n",
230 |        "        vertical-align: middle;\n",
231 |        "    }\n",
232 |        "\n",
233 |        "    .dataframe tbody tr th {\n",
234 |        "        vertical-align: top;\n",
235 |        "    }\n",
236 |        "\n",
237 |        "    .dataframe thead th {\n",
238 |        "        text-align: right;\n",
239 |        "    }\n",
240 |        "</style>\n",
241 |        "<table border=\"1\" class=\"dataframe\">\n",
242 |        "  <thead>\n",
243 |        "    <tr style=\"text-align: right;\">\n",
244 |        "      <th></th>\n",
245 |        "      <th>category</th>\n",
246 |        "      <th>name</th>\n",
247 |        "      <th>rating</th>\n",
248 |        "      <th>votes</th>\n",
249 |        "    </tr>\n",
250 |        "  </thead>\n",
251 |        "  <tbody>\n",
252 |        "    <tr>\n",
253 |        "      <th>2</th>\n",
254 |        "      <td>Actors</td>\n",
255 |        "      <td>James Stewart</td>\n",
256 |        "      <td>0.988374</td>\n",
257 |        "      <td>120</td>\n",
258 |        "    </tr>\n",
259 |        "    <tr>\n",
260 |        "      <th>0</th>\n",
261 |        "      <td>Actors</td>\n",
262 |        "      <td>Humphrey Bogart</td>\n",
263 |        "      <td>0.570197</td>\n",
264 |        "      <td>109</td>\n",
265 |        "    </tr>\n",
266 |        "    <tr>\n",
267 |        "      <th>9</th>\n",
268 |        "      <td>Actors</td>\n",
269 |        "      <td>Spencer Tracy</td>\n",
270 |        "      <td>0.466311</td>\n",
271 |        "      <td>192</td>\n",
272 |        "    </tr>\n",
273 |        "    <tr>\n",
274 |        "      <th>1</th>\n",
275 |        "      <td>Actors</td>\n",
276 |        "      <td>Cary Grant</td>\n",
277 |        "      <td>0.438602</td>\n",
278 |        "      <td>142</td>\n",
279 |        "    </tr>\n",
280 |        "    <tr>\n",
281 |        "      <th>8</th>\n",
282 |        "      <td>Actresses</td>\n",
283 |        "      <td>Ingrid Bergman</td>\n",
284 |        "      <td>0.296140</td>\n",
285 |        "      <td>52</td>\n",
286 |        "    </tr>\n",
287 |        "  </tbody>\n",
288 |        "</table>\n",
289 |        "</div>"
290 |       ],
291 |       "text/plain": [
292 |        "    category             name    rating  votes\n",
293 |        "2     Actors    James Stewart  0.988374    120\n",
294 |        "0     Actors  Humphrey Bogart  0.570197    109\n",
295 |        "9     Actors    Spencer Tracy  0.466311    192\n",
296 |        "1     Actors       Cary Grant  0.438602    142\n",
297 |        "8  Actresses   Ingrid Bergman  0.296140     52"
298 |       ]
299 |      },
300 |      "execution_count": 5,
301 |      "metadata": {},
302 |      "output_type": "execute_result"
303 |     }
304 |    ],
305 |    "source": [
306 |     "xdf.head()"
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "markdown",
311 |    "metadata": {},
312 |    "source": [
313 |     "Now, let's write something about this dataset. It is apparent that James Stewart has the highest rating."
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": 6,
319 |    "metadata": {},
320 |    "outputs": [],
321 |    "source": [
322 |     "from nlg.utils import load_spacy_model\n",
323 |     "nlp = load_spacy_model()\n",
324 |     "\n",
325 |     "text = nlp(\"James Stewart is the actor with the highest rating.\")"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "markdown",
330 |    "metadata": {},
331 |    "source": [
332 |     "The entry-point into the NLG module is the [`nlg.search.templatize`](https://github.com/gramener/gramex-nlg/blob/dev/nlg/search.py#L478) function. This function uses:\n",
333 |     "* a dataframe\n",
334 |     "* operations on the dataframe (as FormHandler arguments)\n",
335 |     "* some text about the dataset\n",
336 |     "\n",
337 |     "to create a [`Nugget`](https://github.com/gramener/gramex-nlg/blob/dev/nlg/narrative.py#L102) object. To learn more about the `Nugget` object and it's methods, see the [README](https://github.com/gramener/gramex-nlg/tree/dev#glossary-grammar-of-data-driven-narratives)."
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "code",
342 |    "execution_count": 7,
343 |    "metadata": {},
344 |    "outputs": [
345 |     {
346 |      "name": "stderr",
347 |      "output_type": "stream",
348 |      "text": [
349 |       "/home/jaidevd/src/nlg/nlg/search.py:62: UserWarning: Ignoring lemmatization.\n",
350 |       "  warnings.warn('Ignoring lemmatization.')\n",
351 |       "/home/jaidevd/src/nlg/nlg/search.py:92: UserWarning: Ignoring lemmatization.\n",
352 |       "  warnings.warn('Ignoring lemmatization.')\n",
353 |       "/home/jaidevd/src/nlg/nlg/search.py:80: FutureWarning: Series.nonzero() is deprecated and will be removed in a future version.Use Series.to_numpy().nonzero() instead\n",
354 |       "  indices = {array[i]: i for i in mask.nonzero()[0]}\n",
355 |       "/home/jaidevd/src/nlg/nlg/search.py:109: UserWarning: Cannot lemmatize multi-word cells.\n",
356 |       "  warnings.warn('Cannot lemmatize multi-word cells.')\n"
357 |      ]
358 |     }
359 |    ],
360 |    "source": [
361 |     "nugget = templatize(text, sort_args, df)"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "code",
366 |    "execution_count": 8,
367 |    "metadata": {},
368 |    "outputs": [
369 |     {
370 |      "data": {
371 |       "text/plain": [
372 |        "{% set fh_args = {\"_sort\": [\"-rating\"]}  %}\n",
373 |        "{% set df = U.gfilter(orgdf, fh_args.copy()) %}\n",
374 |        "{% set fh_args = U.sanitize_fh_args(fh_args, orgdf) %}\n",
375 |        "{# Do not edit above this line. #}\n",
376 |        "{{ df[\"name\"].iloc[0] }} is the {{ G.singular(df[\"category\"].iloc[-2]).lower() }} with the highest rating."
377 |       ]
378 |      },
379 |      "execution_count": 8,
380 |      "metadata": {},
381 |      "output_type": "execute_result"
382 |     }
383 |    ],
384 |    "source": [
385 |     "nugget"
386 |    ]
387 |   },
388 |   {
389 |    "cell_type": "markdown",
390 |    "metadata": {},
391 |    "source": [
392 |     "As we see, a nugget has an underlying [Tornado template](https://www.tornadoweb.org/en/stable/template.html) which has been auto-generated by the `templatize` function. Let's see how well this template re-renders on the dataset."
393 |    ]
394 |   },
395 |   {
396 |    "cell_type": "code",
397 |    "execution_count": 9,
398 |    "metadata": {},
399 |    "outputs": [
400 |     {
401 |      "name": "stdout",
402 |      "output_type": "stream",
403 |      "text": [
404 |       "b'    James Stewart is the actor with the highest rating.'\n"
405 |      ]
406 |     }
407 |    ],
408 |    "source": [
409 |     "print(nugget.render(df))"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "markdown",
414 |    "metadata": {},
415 |    "source": [
416 |     "The text above is identical to the input text, but this is generated from a template. Essentially, we can pass any dataframe to the [`.render`](https://github.com/gramener/gramex-nlg/blob/dev/nlg/narrative.py#L190) method of the nugget object, and the text will be rendered in the context of that data. To test this, let's create a copy of the dataframe and give all the artists a random rating."
417 |    ]
418 |   },
419 |   {
420 |    "cell_type": "code",
421 |    "execution_count": 10,
422 |    "metadata": {},
423 |    "outputs": [],
424 |    "source": [
425 |     "import numpy as np\n",
426 |     "np.random.seed(12345)\n",
427 |     "\n",
428 |     "fake_ratings = df.copy()\n",
429 |     "fake_ratings['rating'] = np.random.rand(df.shape[0])"
430 |    ]
431 |   },
432 |   {
433 |    "cell_type": "markdown",
434 |    "metadata": {},
435 |    "source": [
436 |     "Let's see who the top rated artist is in this new, fake dataset."
437 |    ]
438 |   },
439 |   {
440 |    "cell_type": "code",
441 |    "execution_count": 11,
442 |    "metadata": {},
443 |    "outputs": [
444 |     {
445 |      "data": {
446 |       "text/html": [
447 |        "<div>\n",
448 |        "<style scoped>\n",
449 |        "    .dataframe tbody tr th:only-of-type {\n",
450 |        "        vertical-align: middle;\n",
451 |        "    }\n",
452 |        "\n",
453 |        "    .dataframe tbody tr th {\n",
454 |        "        vertical-align: top;\n",
455 |        "    }\n",
456 |        "\n",
457 |        "    .dataframe thead th {\n",
458 |        "        text-align: right;\n",
459 |        "    }\n",
460 |        "</style>\n",
461 |        "<table border=\"1\" class=\"dataframe\">\n",
462 |        "  <thead>\n",
463 |        "    <tr style=\"text-align: right;\">\n",
464 |        "      <th></th>\n",
465 |        "      <th>category</th>\n",
466 |        "      <th>name</th>\n",
467 |        "      <th>rating</th>\n",
468 |        "      <th>votes</th>\n",
469 |        "    </tr>\n",
470 |        "  </thead>\n",
471 |        "  <tbody>\n",
472 |        "    <tr>\n",
473 |        "      <th>6</th>\n",
474 |        "      <td>Actresses</td>\n",
475 |        "      <td>Bette Davis</td>\n",
476 |        "      <td>0.964515</td>\n",
477 |        "      <td>14</td>\n",
478 |        "    </tr>\n",
479 |        "    <tr>\n",
480 |        "      <th>0</th>\n",
481 |        "      <td>Actors</td>\n",
482 |        "      <td>Humphrey Bogart</td>\n",
483 |        "      <td>0.929616</td>\n",
484 |        "      <td>109</td>\n",
485 |        "    </tr>\n",
486 |        "    <tr>\n",
487 |        "      <th>8</th>\n",
488 |        "      <td>Actresses</td>\n",
489 |        "      <td>Ingrid Bergman</td>\n",
490 |        "      <td>0.748907</td>\n",
491 |        "      <td>52</td>\n",
492 |        "    </tr>\n",
493 |        "    <tr>\n",
494 |        "      <th>10</th>\n",
495 |        "      <td>Actors</td>\n",
496 |        "      <td>Charlie Chaplin</td>\n",
497 |        "      <td>0.747715</td>\n",
498 |        "      <td>76</td>\n",
499 |        "    </tr>\n",
500 |        "    <tr>\n",
501 |        "      <th>9</th>\n",
502 |        "      <td>Actors</td>\n",
503 |        "      <td>Spencer Tracy</td>\n",
504 |        "      <td>0.653570</td>\n",
505 |        "      <td>192</td>\n",
506 |        "    </tr>\n",
507 |        "  </tbody>\n",
508 |        "</table>\n",
509 |        "</div>"
510 |       ],
511 |       "text/plain": [
512 |        "     category             name    rating  votes\n",
513 |        "6   Actresses      Bette Davis  0.964515     14\n",
514 |        "0      Actors  Humphrey Bogart  0.929616    109\n",
515 |        "8   Actresses   Ingrid Bergman  0.748907     52\n",
516 |        "10     Actors  Charlie Chaplin  0.747715     76\n",
517 |        "9      Actors    Spencer Tracy  0.653570    192"
518 |       ]
519 |      },
520 |      "execution_count": 11,
521 |      "metadata": {},
522 |      "output_type": "execute_result"
523 |     }
524 |    ],
525 |    "source": [
526 |     "fake_ratings.sort_values('rating', ascending=False).head()"
527 |    ]
528 |   },
529 |   {
530 |    "cell_type": "markdown",
531 |    "metadata": {},
532 |    "source": [
533 |     "Now, let's see if our original nugget is able to adapt to this new dataset."
534 |    ]
535 |   },
536 |   {
537 |    "cell_type": "code",
538 |    "execution_count": 12,
539 |    "metadata": {},
540 |    "outputs": [
541 |     {
542 |      "data": {
543 |       "text/plain": [
544 |        "b'    Bette Davis is the actor with the highest rating.'"
545 |       ]
546 |      },
547 |      "execution_count": 12,
548 |      "metadata": {},
549 |      "output_type": "execute_result"
550 |     }
551 |    ],
552 |    "source": [
553 |     "nugget.render(fake_ratings)"
554 |    ]
555 |   },
556 |   {
557 |    "cell_type": "markdown",
558 |    "metadata": {},
559 |    "source": [
560 |     "Clearly, that is false. Bette Davis is the _actress_ with the highest rating. To see what went wrong, let's take a look at the template again."
561 |    ]
562 |   },
563 |   {
564 |    "cell_type": "code",
565 |    "execution_count": 13,
566 |    "metadata": {},
567 |    "outputs": [
568 |     {
569 |      "name": "stdout",
570 |      "output_type": "stream",
571 |      "text": [
572 |       "{% set fh_args = {\"_sort\": [\"-rating\"]}  %}\n",
573 |       "{% set df = U.gfilter(orgdf, fh_args.copy()) %}\n",
574 |       "{% set fh_args = U.sanitize_fh_args(fh_args, orgdf) %}\n",
575 |       "{# Do not edit above this line. #}\n",
576 |       "{{ df[\"name\"].iloc[0] }} is the {{ G.singular(df[\"category\"].iloc[-2]).lower() }} with the highest rating.\n"
577 |      ]
578 |     }
579 |    ],
580 |    "source": [
581 |     "print(nugget.template)"
582 |    ]
583 |   },
584 |   {
585 |    "cell_type": "markdown",
586 |    "metadata": {},
587 |    "source": [
588 |     "As we can see, the words 'actor' or 'actress' don't appear in the template. This means that the template-generator has correctly figured out that these words are dependent on the transformed dataset. However, it has not managed to determine the exact formula for this.\n",
589 |     "\n",
590 |     "Any token in the input text which is data-dependent, is called a [`Variable`](https://github.com/gramener/gramex-nlg/blob/dev/nlg/narrative.py#L27). To see which words in a nugget are variables, take a look at the `.variables` attribute of the nugget."
591 |    ]
592 |   },
593 |   {
594 |    "cell_type": "code",
595 |    "execution_count": 14,
596 |    "metadata": {},
597 |    "outputs": [
598 |     {
599 |      "data": {
600 |       "text/plain": [
601 |        "{James Stewart: {{ df[\"name\"].iloc[0] }},\n",
602 |        " actor: {{ G.singular(df[\"category\"].iloc[-2]).lower() }}}"
603 |       ]
604 |      },
605 |      "execution_count": 14,
606 |      "metadata": {},
607 |      "output_type": "execute_result"
608 |     }
609 |    ],
610 |    "source": [
611 |     "nugget.variables"
612 |    ]
613 |   },
614 |   {
615 |    "cell_type": "markdown",
616 |    "metadata": {},
617 |    "source": [
618 |     "We see here that there are two tokens from the original text - `\"James Stewart\"` and `\"actor\"` that have been identified as variables. Only, the Python _expression_ for determining one of them is wrong. Whether the highest rated artist is an actor or an actress needs to be found from the `\"category\"` column of the first row.\n",
619 |     "\n",
620 |     "To fix this, we can use the [`.set_expr`](https://github.com/gramener/gramex-nlg/blob/dev/nlg/narrative.py#L58) method of the respective variable. The `.set_expr` method accepts any valid Python expression as a string."
621 |    ]
622 |   },
623 |   {
624 |    "cell_type": "code",
625 |    "execution_count": 15,
626 |    "metadata": {},
627 |    "outputs": [],
628 |    "source": [
629 |     "var = nugget.get_var('actor')"
630 |    ]
631 |   },
632 |   {
633 |    "cell_type": "code",
634 |    "execution_count": 16,
635 |    "metadata": {},
636 |    "outputs": [],
637 |    "source": [
638 |     "var.set_expr('df[\"category\"].iloc[0]')"
639 |    ]
640 |   },
641 |   {
642 |    "cell_type": "code",
643 |    "execution_count": 17,
644 |    "metadata": {},
645 |    "outputs": [
646 |     {
647 |      "data": {
648 |       "text/plain": [
649 |        "{{ G.singular(df[\"category\"].iloc[0]).lower() }}"
650 |       ]
651 |      },
652 |      "execution_count": 17,
653 |      "metadata": {},
654 |      "output_type": "execute_result"
655 |     }
656 |    ],
657 |    "source": [
658 |     "var"
659 |    ]
660 |   },
661 |   {
662 |    "cell_type": "markdown",
663 |    "metadata": {},
664 |    "source": [
665 |     "Now that we have fixed the variable. Let's re-render the nugget on the fake dataset."
666 |    ]
667 |   },
668 |   {
669 |    "cell_type": "code",
670 |    "execution_count": 18,
671 |    "metadata": {},
672 |    "outputs": [
673 |     {
674 |      "data": {
675 |       "text/plain": [
676 |        "b'    Bette Davis is the actress with the highest rating.'"
677 |       ]
678 |      },
679 |      "execution_count": 18,
680 |      "metadata": {},
681 |      "output_type": "execute_result"
682 |     }
683 |    ],
684 |    "source": [
685 |     "nugget.render(fake_ratings)"
686 |    ]
687 |   },
688 |   {
689 |    "cell_type": "markdown",
690 |    "metadata": {},
691 |    "source": [
692 |     "----"
693 |    ]
694 |   },
695 |   {
696 |    "cell_type": "markdown",
697 |    "metadata": {},
698 |    "source": [
699 |     "There is scope for yet more automation. Note that the last word in the text, \"rating\", matches the name of the column by which the dataframe has been sorted. Therefore, even that can be turned into a variable. Essentially, we want the template to render the name of whichever column is used to sort the data, in place of rating.\n",
700 |     "\n",
701 |     "New variables can be added to a nugget using the [`.add_var`](https://github.com/gramener/gramex-nlg/blob/dev/nlg/narrative.py#L236) method of the nugget object, as follows:"
702 |    ]
703 |   },
704 |   {
705 |    "cell_type": "code",
706 |    "execution_count": 19,
707 |    "metadata": {},
708 |    "outputs": [],
709 |    "source": [
710 |     "var_token = text[-2]  # The spacy token corresponding to \"rating\""
711 |    ]
712 |   },
713 |   {
714 |    "cell_type": "code",
715 |    "execution_count": 20,
716 |    "metadata": {},
717 |    "outputs": [],
718 |    "source": [
719 |     "var_expr = 'fh_args[\"_sort\"][0]'  # The Python expression to detect the sorted column"
720 |    ]
721 |   },
722 |   {
723 |    "cell_type": "code",
724 |    "execution_count": 21,
725 |    "metadata": {},
726 |    "outputs": [
727 |     {
728 |      "data": {
729 |       "text/plain": [
730 |        "{% set fh_args = {\"_sort\": [\"-rating\"]}  %}\n",
731 |        "{% set df = U.gfilter(orgdf, fh_args.copy()) %}\n",
732 |        "{% set fh_args = U.sanitize_fh_args(fh_args, orgdf) %}\n",
733 |        "{# Do not edit above this line. #}\n",
734 |        "{{ df[\"name\"].iloc[0] }} is the {{ G.singular(df[\"category\"].iloc[0]).lower() }} with the highest {{ fh_args[\"_sort\"][0] }}."
735 |       ]
736 |      },
737 |      "execution_count": 21,
738 |      "metadata": {},
739 |      "output_type": "execute_result"
740 |     }
741 |    ],
742 |    "source": [
743 |     "nugget.add_var(var_token, expr=var_expr)\n",
744 |     "nugget"
745 |    ]
746 |   },
747 |   {
748 |    "cell_type": "markdown",
749 |    "metadata": {},
750 |    "source": [
751 |     "----\n",
752 |     "Let us now test a scenario where we sort the dataframe by votes."
753 |    ]
754 |   },
755 |   {
756 |    "cell_type": "code",
757 |    "execution_count": 22,
758 |    "metadata": {},
759 |    "outputs": [
760 |     {
761 |      "data": {
762 |       "text/plain": [
763 |        "{% set fh_args = {\"_sort\": [\"-votes\"]}  %}\n",
764 |        "{% set df = U.gfilter(orgdf, fh_args.copy()) %}\n",
765 |        "{% set fh_args = U.sanitize_fh_args(fh_args, orgdf) %}\n",
766 |        "{# Do not edit above this line. #}\n",
767 |        "{{ df[\"name\"].iloc[0] }} is the {{ G.singular(df[\"category\"].iloc[0]).lower() }} with the highest {{ fh_args[\"_sort\"][0] }}."
768 |       ]
769 |      },
770 |      "execution_count": 22,
771 |      "metadata": {},
772 |      "output_type": "execute_result"
773 |     }
774 |    ],
775 |    "source": [
776 |     "nugget.fh_args = {'_sort': ['-votes']}\n",
777 |     "nugget"
778 |    ]
779 |   },
780 |   {
781 |    "cell_type": "code",
782 |    "execution_count": 23,
783 |    "metadata": {},
784 |    "outputs": [
785 |     {
786 |      "data": {
787 |       "text/plain": [
788 |        "b'    Spencer Tracy is the actor with the highest votes.'"
789 |       ]
790 |      },
791 |      "execution_count": 23,
792 |      "metadata": {},
793 |      "output_type": "execute_result"
794 |     }
795 |    ],
796 |    "source": [
797 |     "nugget.render(df)"
798 |    ]
799 |   },
800 |   {
801 |    "cell_type": "markdown",
802 |    "metadata": {},
803 |    "source": [
804 |     "---"
805 |    ]
806 |   },
807 |   {
808 |    "cell_type": "markdown",
809 |    "metadata": {},
810 |    "source": [
811 |     "Now we know how to create templates from raw text, and how to assign tokens within the text as data-dependent variables. In forthcoming examples, we will explore:\n",
812 |     "\n",
813 |     "1. how to design more complex variable expression - especially those that cannot be defined a short and simple Python strings\n",
814 |     "2. how to create longer narratives by putting together different nuggets."
815 |    ]
816 |   }
817 |  ],
818 |  "metadata": {
819 |   "kernelspec": {
820 |    "display_name": "Python 3",
821 |    "language": "python",
822 |    "name": "python3"
823 |   },
824 |   "language_info": {
825 |    "codemirror_mode": {
826 |     "name": "ipython",
827 |     "version": 3
828 |    },
829 |    "file_extension": ".py",
830 |    "mimetype": "text/x-python",
831 |    "name": "python",
832 |    "nbconvert_exporter": "python",
833 |    "pygments_lexer": "ipython3",
834 |    "version": "3.6.8"
835 |   }
836 |  },
837 |  "nbformat": 4,
838 |  "nbformat_minor": 4
839 | }
840 | 


--------------------------------------------------------------------------------
/nlg/__init__.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | __version__ = '0.1.3'
 4 | 
 5 | try:
 6 |     __NLG_SETUP__
 7 | except NameError:
 8 |     __NLG_SETUP__ = False
 9 | 
10 | 
11 | if __NLG_SETUP__:
12 |     sys.stderr.write('Partial import of nlg during the build process.\n')
13 | else:
14 |     from .search import templatize  # NOQA: F401
15 |     from .grammar import get_gramopts
16 |     grammar_options = get_gramopts()
17 |     __all__ = ['templatize', 'grammar_options']
18 | 


--------------------------------------------------------------------------------
/nlg/app/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gramener/gramex-nlg/aa508f66b6ce38eeb3ea3deac96f458865b896ff/nlg/app/__init__.py


--------------------------------------------------------------------------------
/nlg/app/body.html:
--------------------------------------------------------------------------------
  1 | {% set admin_kwargs = handler.kwargs.get('admin_kwargs', '') or {} %}
  2 | <!--IDE HTML-->
  3 | {% from nlg.webapp import read_current_config, get_user_dir, is_user_authenticated %}
  4 | {% set dsid = read_current_config(handler).get('dsid') %}
  5 | 
  6 | <div class="container-fluid pt-4">
  7 |   <div class="form-inline">
  8 |     <div class="form-row align-items-center">
  9 |       <div class="col-auto">
 10 |         <button type="button" class="btn btn-primary round"
 11 |           id="addDataBtn" data-toggle="modal"
 12 |           data-target=".grmform-modal">Add Data <i class="fa fa-plus"></i></button>
 13 |       </div>
 14 |     </div>
 15 |   </div>
 16 | 
 17 |   <!-- GRMFORM HTML -->
 18 |   <div id="configmodal" class="modal fade grmform-modal" tabindex="-1" role="dialog" aria-labelledby="grmformModalLabel" aria-hidden="true">
 19 |     {% from nlg.webapp import get_dataset_files, get_narrative_config_files %}
 20 |     {% set NLG_DATASETS = get_dataset_files(handler) %}
 21 |     {% set NLG_NARRATIVES = get_narrative_config_files(handler) %}
 22 |     {% import os.path as op %}
 23 |     <div class="modal-dialog modal-lg">
 24 |       <div class="modal-content">
 25 |         <div class="modal-header">
 26 |           <h5 class="modal-title">Select Data & Narrative</h5>
 27 |           <button type="button" class="close" data-dismiss="modal" aria-label="Close">
 28 |             <span aria-hidden="true">&times;</span>
 29 |           </button>
 30 |         </div>
 31 |         <div class="modal-body">
 32 |           <form id="nlg-init-form" action="{{ nlg_base }}/initform?next=."
 33 |           method="POST" enctype="multipart/form-data">
 34 |             <div class="row">
 35 |               <div class="col">
 36 |                 <select name="dataset" id="dataset-select" class="form-control" form="nlg-init-form">
 37 |                   {% for filename in NLG_DATASETS %}
 38 |                   <option value="{{ filename }}">{{ op.basename(filename) }}</option>
 39 |                   {% end %}
 40 |                   <option value="" hidden selected>Select Dataset</option>
 41 |                 </select>
 42 |               </div>
 43 |               <div class="col">
 44 |                 <select name="narrative" id="narrative-select" class="form-control" form="nlg-init-form">
 45 |                   {% for filename in NLG_NARRATIVES %}
 46 |                   <option value="{{ filename }}">{{ op.basename(filename) }}</option>
 47 |                   {% end %}
 48 |                   <option value="" selected hidden>Select Narrative</option>
 49 |                 </select>
 50 |               </div>
 51 |             </div>
 52 |             <div class="divider">Or Upload New</div>
 53 |             <div class="row">
 54 |               <input type="hidden" name="_xsrf" value="{{ handler.xsrf_token }}">
 55 |               <div class="col">
 56 |                 <label for="fileupload">Dataset:</label>
 57 |                 <input id="fileupload" name="data-file" type="file" class="form-control-file">
 58 |               </div>
 59 |               <div class="col">
 60 |                 <label for="narrativeupload">Config:</label>
 61 |                 <input id="narrativeupload" name="config-file" type="file"
 62 |                 class="form-control-file">
 63 |               </div>
 64 |             </div>
 65 |           </form>
 66 |         </div>
 67 |         <div class="modal-footer">
 68 |           <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
 69 |           <button type="submit" form="nlg-init-form" class="btn btn-primary">Submit</button>
 70 |         </div>
 71 |       </div>
 72 |     </div>
 73 |   </div> <!--GRMFORM HTML END-->
 74 | 
 75 |   <div class="container-fluid">
 76 |     <div class="row">
 77 |       <div class="col">
 78 |         <script id="template-preview" type="text/html">
 79 |           <table id="controltable" class="table table-borderless table-hover table-sm">
 80 |             <tbody id="controlbody">
 81 |             <% for (let i=0; i < n_templates; i++) { %>
 82 |               <tr id="controlrow-<%= i %>" draggable="true">
 83 |                 <td>
 84 |                   <button id="rm-btn-<%= i %>" title="Remove" type="button"
 85 |                   class="btn btn-danger btn-sm"><i class="fa fa-trash"></i></button>
 86 |                 </td>
 87 |                 <td>
 88 |                   <button id="settings-btn-<%= i %>" title="Settings" type="button"
 89 |                   class="btn btn-primary btn-sm"><i class="fa fa-wrench"></i></button>
 90 |                 </td>
 91 |                 <td>
 92 |                   <div id="preview-<%= i %>"></div>
 93 |                 </td>
 94 |               </tr>
 95 |             <% } %>
 96 |             </tbody>
 97 |           </table>
 98 |         </script>
 99 |       </div>
100 |       <div class="col">
101 |         <div class="container">
102 |           <div class="row">
103 |             <div class="col d-flex justify-content-left bd-highlight">
104 |               <button id="boldpreview" type="button" class="btn btn-primary"
105 |                 data-toggle="button" aria-pressed="false" autocomplete="off">
106 |                 <i class="fa fa-bold"></i></button>
107 |               <button id="italicpreview" type="button" class="btn btn-primary"
108 |                 data-toggle="button" aria-pressed="false" autocomplete="off">
109 |                 <i class="fa fa-italic"></i></button>
110 |               <button id="ulinepreview" type="button" class="btn btn-primary"
111 |                 data-toggle="button" aria-pressed="false" autocomplete="off">
112 |                 <i class="fa fa-underline"></i></button>
113 |               <div class="btn-group btn-group-toggle" data-toggle="buttons">
114 |                 <label class="btn btn-primary" id="paralabel">
115 |                   <input type="radio" name="renderstyle" id="parastyle"
116 |                     autocomplete="off"><i class="fa fa-paragraph"></i>
117 |                 </label>
118 |                 <label class="btn btn-primary" id="listlabel">
119 |                   <input type="radio" name="renderstyle" id="liststyle"
120 |                     autocomplete="off"><i class="fa fa-list"></i>
121 |                 </label>
122 |               </div>
123 |             </div>
124 |             <div class="w-100"></div>
125 |             <div class="col border border-primary pl-2 pr-2 bg-light">
126 |               <div class="text-justify" id="previewspan"></div>
127 |             </div>
128 |           </div>
129 |         </div>
130 |       </div>
131 |       <!-- <div class="col-sm-3">
132 |         <div class="alert alert-success alert-dismissable collapse" role="alert">
133 |           <strong>Saved!</strong>
134 |           <button type="button" class="close" data-dismiss="alert" aria-label="Close">
135 |               <span aria-hidden="true">&times;</span>
136 |           </button>
137 |         </div>
138 |       </div> -->
139 |     </div>
140 |   </div>
141 | 
142 | 
143 |   <div class="divider"></div>
144 |   <form id="myform" enctype="multipart/form-data">
145 |       <div class="form-group">
146 |           <label for="textbox">Type Something:</label>
147 |           <textarea class="form-control" id="textbox" name="textbox"
148 |           class="text-monospace"></textarea>
149 |       </div>
150 |       <div class="form-group row">
151 |         <div class="col">
152 |           <button id="inspect" type="button" title="Add to Template"
153 |               class="btn btn-primary">Add to Narrative
154 |               <i class="fa fa-plus-square"></i>
155 |           </button>
156 |         </div>
157 |         <div class="col">
158 |           <button id="save-config-btn" type="button" title="Save Narrative"
159 |             class="btn btn-primary">Save Narrative <i class="fa fa-save"></i></button>
160 |         </div>
161 |         <div class="col">
162 |           <button id="share-narrative-btn" type="button" title="Embed Narrative"
163 |             class="btn btn-primary">Embed <i class="fa fa-code"></i></button>
164 |         </div>
165 |       </div>
166 |   </form>
167 |   <div class="divider"></div>
168 |   {% if dsid %}
169 |   {% from os.path import basename %}
170 |   {% set user_dir = basename(get_user_dir(handler)) %}
171 |   <div class="formhandler" data-src="{{ nlg_base }}/preview/{{ user_dir }}/{{ dsid }}"></div>
172 |   {% end %}
173 | 
174 |   <div class="modal fade" id="template-settings" role="dialog">
175 |     <div class="modal-dialog modal-lg">
176 |       <div class="modal-content">
177 |         <div class="modal-header"><h6>Template Settings</h6></div>
178 |         <div id="tmpllist" class="modal-body"></div>
179 |       </div>
180 |     </div>
181 |   </div>
182 | 
183 |   <div class="modal fade" id="embed-modal" role="dialog">
184 |     <div class="modal-dialog modal-lg">
185 |       <div class="modal-content">
186 |         <div class="modal-header"><h6>Embed Narrative</h6></div>
187 |         <div id="embed-body" class="modal-body">
188 |           <textarea readonly id="embedCodeText" class="text-monospace" rows="15" cols="80"></textarea>
189 |         </div>
190 |         <div class="modal-footer">
191 |           <button type="button" class="btn btn-primary" id="embedDemo">See Demo</button>
192 |         </div>
193 |       </div>
194 |     </div>
195 |   </div>
196 | 
197 |   <!-- save modal -->
198 |   <div class="modal fade" id="saveModal" role="dialog">
199 |     <div class="modal-dialog modal-sm">
200 |       <div class="modal-content">
201 |         <div class="modal-header"><h6>Save Narrative</h6></div>
202 |         <div class="modal-body">
203 |           <div class="row">
204 |             <div class="col">Name</div>
205 |             <div class="col"><input type="text" id="narrativeName"></div>
206 |           </div>
207 |           <div class="row">
208 |             <div class="col"><button class="btn btn-primary" id="saveNarrativeBtn">Save</button></div>
209 |             <div class="col"><button class="btn btn-primary" id="downloadNarrativeBtn">Download</button></div>
210 |           </div>
211 |         </div>
212 |       </div>
213 |     </div>
214 |   </div>
215 | 
216 |   <!-- traceback modal-->
217 |   <div id="tb-modal" class="modal fade bd-example-modal-lg" tabindex="-1"
218 |   role="dialog" aria-labelledby="myLargeModalLabel" aria-hidden="true">
219 |     <div class="modal-dialog modal-lg">
220 |       <div class="modal-content">
221 |         <div class="modal-header">
222 |           <h5 class="modal-title">Traceback</h5>
223 |           <button type="button" class="close" data-dismiss="modal" aria-label="Close">
224 |             <span aria-hidden="true">&times;</span>
225 |           </button>
226 |         </div>
227 |         <div class="modal-body" id="traceback">
228 |         </div>
229 |       </div>
230 |     </div>
231 |   </div>
232 | </div>
233 | 
234 | <!-- Include JS dependencies -->
235 | <script src="{{ nlg_base }}/ui/bootstrap-select/dist/js/bootstrap-select.min.js"></script>
236 | <script src="{{ nlg_base }}/nlg.js"></script>
237 | <script>
238 |   $.fn.selectpicker.Constructor.BootstrapVersion = '4'
239 |   var nlg_base = "{{ nlg_base }}"
240 |   var df = null;
241 |   var args = null;
242 |   var templates = [];
243 |   var currentTemplateIndex = null;
244 |   var currentTokenIndex = null;
245 |   var narrative_name = "";
246 |   var dataset_name = "";
247 |   // var styleparams = {bold: true, italic: false, underline: false, style: 'para'}
248 | 
249 |   $(document).ready(setInitialConfig)
250 | 
251 |   $('.formhandler').on('load', renderPreview)
252 |   $('.formhandler').formhandler()
253 | 
254 |   $(document)
255 |   .on('click', '#inspect', addToNarrative)
256 |   .on('focus', "#edit-template", (e) => {$('#save-template').attr('disabled', false)})
257 |   .on('click', '#save-config-btn', (e) => {
258 |     $('#saveModal').modal({show: true})
259 |   })
260 |   .on('click', '#saveNarrativeBtn', (e) => {
261 |     saveNarrative($('#narrativeName').val())
262 |   })
263 |   .on('click', '#downloadNarrativeBtn', (e) => {window.open(`${nlg_base}/download`)})
264 |   .on('click', "#tmpl-name-btn", addName)
265 |   .on('click', '#share-narrative-btn', (e) => {
266 |     $('#embedCodeText').val(getNarrativeEmbedCode())
267 |     $('#embed-modal').modal({show: true})
268 |   })
269 |   .on('click', '#embedDemo', openDemoPage)
270 |   .on('click', '#boldpreview', toggleRenderStyle)
271 |   .on('click', '#italicpreview', toggleRenderStyle)
272 |   .on('click', '#ulinepreview', toggleRenderStyle)
273 |   .on('change', '#parastyle', toggleRenderStyle)
274 |   .on('change', '#liststyle', toggleRenderStyle)
275 | 
276 |   $("#textbox").attr('cols', 119)
277 |   $("#edit-template").attr('cols', 69).attr('rows', 10)
278 | </script>
279 | 


--------------------------------------------------------------------------------
/nlg/app/error/400.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="utf-8">
 5 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |   <meta http-equiv="X-UA-Compatible" content="IE=edge">
 7 |   <title>Bad Request</title>
 8 |   <style>
 9 |   * { box-sizing: border-box; }
10 |   html { width: 100%; height: 100%; }
11 |   body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; font-size: 18px; }
12 |   .content { margin: 0 auto; width: 20rem;}
13 |   pre { white-space: pre-wrap; background-color: #f8f8f8; padding: 0.5em; overflow-x: auto; }
14 |   th, td { border-bottom: 1px solid #eee; padding: 0.5rem 0.25rem; vertical-align: top; }
15 |   th { text-align: left; }
16 |   </style>
17 | </head>
18 | <body class="content">
19 |   <h1>Bad request</h1>
20 |   {% set exception = kwargs['exc_info'][1] %}
21 |   <table>
22 |     <tbody>
23 |       {% for key, val in vars(exception).items() %}
24 |         <tr>
25 |           <th>{{ key.replace('_', ' ').title() }}</th>
26 |           <td>{{ val }}</td>
27 |         </tr>
28 |       {% end %}
29 |     </tbody>
30 |   </table>
31 | </body>
32 | </html>
33 | 


--------------------------------------------------------------------------------
/nlg/app/error/401.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="utf-8">
 5 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |   <meta http-equiv="X-UA-Compatible" content="IE=edge">
 7 |   <title>Forbidden</title>
 8 |   <style>
 9 |   * { box-sizing: border-box; }
10 |   html { width: 100%; height: 100%; }
11 |   body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; font-size: 18px; }
12 |   .content { margin: 0 auto; width: 20rem;}
13 |   pre { white-space: pre-wrap; background-color: #f8f8f8; padding: 0.5em; overflow-x: auto; }
14 |   </style>
15 | </head>
16 | <body class="content">
17 |   <h1>You are not logged in</h1>
18 |   <p>You must be logged in to perform this action.</p>
19 |   {% set login_url = handler.kwargs.get('login_url', None) %}
20 |   {% import gramex %}
21 |   {% set app_login = 'gramex.conf.app.settings.login_url' %}
22 |   <p><a href="{{ login_url or app_login }}">Log in and try again</a>. If that fails, contact the app owner.</p>
23 |   <pre>HTTP 401: Unauthorized</pre>
24 | </body>
25 | </html>
26 | {#
27 | 
28 | Note: this template is rarely called, because:
29 | 
30 | - basehandler.py redirects to login_url if 401: UNAUTHORIZED
31 | - authhandler.py explicitly renders specific templates if 401: UNAUTHORIZED
32 | 
33 | The rare cases where this is used are:
34 | 
35 | - If an application explicitly raises a 401
36 | - If basehandler.py raises a 401
37 |   - for an OTP request when user is not logged in
38 |   - if the request is not GET/HEAD, it is not redirected to login_url
39 | 
40 | #}
41 | 


--------------------------------------------------------------------------------
/nlg/app/error/403.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="utf-8">
 5 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |   <meta http-equiv="X-UA-Compatible" content="IE=edge">
 7 |   <title>Forbidden</title>
 8 |   <style>
 9 |   * { box-sizing: border-box; }
10 |   html { width: 100%; height: 100%; }
11 |   body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; font-size: 18px; }
12 |   .content { margin: 0 auto; width: 20rem;}
13 |   pre { background-color: #f8f8f8; padding: 0.5em; overflow-x: auto; }
14 |   </style>
15 | </head>
16 | <body class="content">
17 |   <h1>You need access</h1>
18 |   {% try %}{% set xsrf = handler.check_xsrf_cookie() or True %}{% except %}{% set xsrf = False %}{% end %}
19 |   {% if handler.request.method not in {'GET', 'HEAD', 'OPTIONS'} and not xsrf %}
20 |     <p>Your app sent a {{ handler.request.method }} request without an XSRF cookie.</p>
21 |   {% elif handler.current_user %}
22 |     <p>You are logged in, but as a user that cannot access this page.</p>
23 |     {% import yaml %}
24 |     <pre>{{ yaml.safe_dump(handler.current_user, default_flow_style=False) }}</pre>
25 |   {% else %}
26 |     <p>You <strong>are not</strong> logged in.</p>
27 |   {% end %}
28 |   {% set login_url = None %}
29 |   {% try %}
30 |     {% set login_url = handler.kwargs.auth.login_url %}
31 |   {% except %}
32 |     {% try %}
33 |       {% import gramex %}
34 |       {% set login_url = gramex.conf.app.settings.login_url %}
35 |     {% except %}
36 |     {% end %}
37 |   {% end %}
38 |   {% if login_url %}
39 |   <p><a href="{{ login_url }}">Try logging in again</a>.</p>
40 |   {% end %}
41 |   <p>Contact the app owner for more information.</p>
42 |   <pre>HTTP 403: Forbidden</pre>
43 | </body>
44 | </html>
45 | 


--------------------------------------------------------------------------------
/nlg/app/error/404.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="utf-8">
 5 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |   <meta http-equiv="X-UA-Compatible" content="IE=edge">
 7 |   <title>File missing</title>
 8 |   <style>
 9 |   * { box-sizing: border-box; }
10 |   html { width: 100%; height: 100%; }
11 |   body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; font-size: 18px; }
12 |   .content { margin: 0 auto; width: 20rem;}
13 |   pre { white-space: pre-wrap; background-color: #f8f8f8; padding: 0.5em; overflow-x: auto; }
14 |   </style>
15 | </head>
16 | <body class="content">
17 |   <h1>File missing</h1>
18 |   <p>This page cannot be found</p>
19 |   <pre>{{ handler.request.uri }}</pre>
20 |   <p>Details are on the server log.</p>
21 |   <pre>HTTP 404: Not Found</pre>
22 | </body>
23 | </html>
24 | 


--------------------------------------------------------------------------------
/nlg/app/error/500.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="utf-8">
 5 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |   <meta http-equiv="X-UA-Compatible" content="IE=edge">
 7 |   <title>Application error</title>
 8 |   <style>
 9 |   * { box-sizing: border-box; }
10 |   html { width: 100%; height: 100%; }
11 |   body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; font-size: 18px; }
12 |   .content { margin: 0 auto; width: 20rem;}
13 |   pre { white-space: pre-wrap; background-color: #f8f8f8; padding: 0.5em; overflow-x: auto; }
14 |   pre code { white-space: pre; }
15 |   #traceback { display: none; }
16 |   #show-traceback { position: fixed; bottom: 0; left: 50%; width: 10rem; margin-left: -5rem; padding: 0.5rem 0; text-align: center; cursor: pointer; opacity: 0.4; }
17 |   </style>
18 | </head>
19 | <body>
20 |   <section class="content">
21 |     <h1>Application error</h1>
22 |     <p>This application made a mistake.</p>
23 |     {% set exception = kwargs['exc_info'][1] %}
24 |     {% if hasattr(exception, 'reason') %}
25 |       <p><strong>Reason:</strong> {{ exception.reason }}</p>
26 |     {% end %}
27 |     <pre>{{ repr(exception) }}</pre>
28 |     <p>Details are on the server log.</p>
29 |   </section>
30 | 
31 |   {% from gramex import conf %}
32 |   {% if conf.app.settings.serve_traceback %}
33 |     <div id="traceback">
34 |       {% import traceback %}
35 |       <pre><code>{{ ''.join(traceback.format_exception(*kwargs['exc_info'])) }}</code></pre>
36 |       <p class="content">Traceback is only for development. Disable it in <code>gramex.yaml</code> with <code>app.settings.serve_traceback: false</code></p>
37 |     </div>
38 |     <p id="show-traceback">Show traceback</p>
39 |     <script>
40 |       var $show = document.getElementById('show-traceback')
41 |       var $traceback = document.getElementById('traceback')
42 |       $show.addEventListener('click', function() {
43 |         $traceback.style.display = 'block'
44 |         $show.style.display = 'none'
45 |       }, false)
46 |     </script>
47 |   {% end %}
48 | </body>
49 | 
50 | </html>
51 | 


--------------------------------------------------------------------------------
/nlg/app/gramex.yaml:
--------------------------------------------------------------------------------
  1 | # Configurable variables:
  2 | # $NLG_AUTH:
  3 | #   Authentication is needed for saving and sharing narratives.
  4 | #   Set this variable to any valid auth configuration to use it
  5 | #   within the NLG app.
  6 | variables:
  7 |   NLG_AUTH: /$YAMLURL/login/
  8 |   NLG_BASE: /$YAMLURL/
  9 | 
 10 | import:
 11 |   ui:
 12 |     path: $GRAMEXAPPS/ui/gramex.yaml
 13 |     YAMLURL: $YAMLURL/ui/
 14 |   languagetool:
 15 |     path: $GRAMEXAPPS/languagetool/gramex.yaml
 16 |     YAMLURL: $YAMLURL/languagetool/
 17 | 
 18 | url:
 19 |   demo-embed-$*:
 20 |     pattern: /$YAMLURL/demoembed
 21 |     handler: FileHandler
 22 |     kwargs:
 23 |       path: $YAMLPATH/templates/demo.tmpl
 24 |       template: true
 25 |       headers:
 26 |         Cache-Control: no-store
 27 |   move-nuggets-$*:
 28 |     pattern: /$YAMLURL/movenugget/(\d+)/(\d+)
 29 |     handler: FunctionHandler
 30 |     kwargs:
 31 |       function: nlg.webapp.move_nuggets
 32 |       headers:
 33 |         Cache-Control: no-store
 34 |   save-narrative-$*:
 35 |     pattern: /$YAMLURL/saveNarrative/(.*)
 36 |     handler: FunctionHandler
 37 |     kwargs:
 38 |       function: nlg.webapp.save_narrative
 39 |   nlg-default-login-$*:
 40 |     pattern: /$YAMLURL/login/
 41 |     handler: SimpleAuth
 42 |     kwargs:
 43 |       credentials:
 44 |         alpha: alpha
 45 |         beta: beta
 46 |         gamma: gamma
 47 |       template: $YAMLPATH/login.html
 48 |   view-narrative-cache-$*:
 49 |     pattern: /$YAMLURL/narratives
 50 |     handler: FunctionHandler
 51 |     kwargs:
 52 |       function: nlg.webapp.get_narrative_cache
 53 |       xsrf_cookies: false
 54 |       headers:
 55 |         Content-Type: application/json
 56 |         Cache-Control: no-store
 57 |   narrative-download-$*:
 58 |     pattern: /$YAMLURL/download
 59 |     handler: FunctionHandler
 60 |     kwargs:
 61 |       function: nlg.webapp.download_narrative
 62 |       xsrf_cookies: false
 63 |       headers:
 64 |         Content-Type: application/json
 65 |         Content-Disposition: attachment; filename=narrative.json
 66 |   nlg-new-variable-tmpl-$*:
 67 |     pattern: /$YAMLURL/newvariable/(\d+)/(.*)
 68 |     handler: FunctionHandler
 69 |     kwargs:
 70 |       function: nlg.webapp.new_variable_tmpl
 71 |       xsrf_cookies: false
 72 |       headers:
 73 |         Content-Type: text/html
 74 |   nlg-new-variable-add-$*:
 75 |     pattern: /$YAMLURL/newvar/(\d+)/(.*)
 76 |     handler: FunctionHandler
 77 |     kwargs:
 78 |       function: nlg.webapp.add_new_variable
 79 |       xsrf_cookies: false
 80 |   nlg-variable-settings-$*:
 81 |     pattern: /$YAMLURL/variablesettings/(\d+)/(.*)
 82 |     handler: FunctionHandler
 83 |     kwargs:
 84 |       function: nlg.webapp.get_variable_settings_tmpl
 85 |       xsrf_cookies: false
 86 |       headers:
 87 |         Content-Type: text/html
 88 |   nlg-update-variable-$*:
 89 |     pattern: /$YAMLURL/updatevar/(\d+)/(.*)
 90 |     handler: FunctionHandler
 91 |     kwargs:
 92 |       function: nlg.webapp.set_variable_settings_tmpl
 93 |       xsrf_cookies: false
 94 |   nlg-get-nugget-$*:
 95 |     pattern: /$YAMLURL/nuggets/(\d+)
 96 |     handler: FunctionHandler
 97 |     kwargs:
 98 |       function: nlg.webapp.get_nugget
 99 |       xsrf_cookies: false
100 |       methods: [GET, POST, DELETE]
101 |       headers:
102 |         Content-Type: application/json
103 |   nlg-nugget-settings-$*:
104 |     pattern: /$YAMLURL/nuggetsettings/(\d+)
105 |     handler: FunctionHandler
106 |     kwargs:
107 |       function: nlg.webapp.get_nugget_settings_tmpl
108 |       xsrf_cookies: false
109 |       headers:
110 |         Content-Type: text/html
111 |   nlg-home-$*:
112 |     pattern: /$YAMLURL/
113 |     handler: FileHandler
114 |     kwargs:
115 |       auth:
116 |         login_url: $NLG_AUTH
117 |       path: $YAMLPATH/index.html
118 |       transform:
119 |         "index.html":
120 |           function: template
121 |   nlg-condition-$*:
122 |     pattern: /$YAMLURL/condition/(\d+)
123 |     handler: FunctionHandler
124 |     kwargs:
125 |       function: nlg.webapp.add_condition
126 |       xsrf_cookies: false
127 |   nlg-data-selector-$*:
128 |     pattern: /$YAMLURL/initform
129 |     handler: FunctionHandler
130 |     kwargs:
131 |       headers:
132 |         Cache-Control: no-store
133 |       xsrf_cookies: false
134 |       function: nlg.webapp.init_form
135 |       redirect:
136 |         query: next
137 |   tablepreview-$*:
138 |     pattern: /$YAMLURL/preview/(.*)/(.*)
139 |     handler: FormHandler
140 |     kwargs:
141 |       url: $GRAMEXDATA/nlg/{_0}/{_1}
142 |       headers:
143 |         Cache-Control: no-store
144 |   nlg-static_files-$*:
145 |     pattern: /$YAMLURL/(.*)
146 |     handler: FileHandler
147 |     kwargs:
148 |       path: $YAMLPATH
149 |   nlg-config-handler-$*:
150 |     pattern: /$YAMLURL/initconf
151 |     handler: FunctionHandler
152 |     kwargs:
153 |       function: nlg.webapp.get_init_config
154 |       headers:
155 |         Content-Type: application/json
156 |         Cache-Control: no-store
157 |   textproc-$*:
158 |     pattern: /$YAMLURL/textproc
159 |     handler: FunctionHandler
160 |     kwargs:
161 |       function: nlg.webapp.process_text
162 |       xsrf_cookies: false
163 |       headers:
164 |         Content-Type: application/json
165 |   rendertmpl-$*:
166 |     pattern: /$YAMLURL/render-template/(\d+)?
167 |     handler: FunctionHandler
168 |     kwargs:
169 |       function: nlg.webapp.render_template
170 |       xsrf_cookies: false
171 |       headers:
172 |         Content-Type: text/plain
173 |         Cache-Control: no-store
174 |   renderall-$*:
175 |     pattern: /$YAMLURL/renderall
176 |     handler: FunctionHandler
177 |     kwargs:
178 |       function: nlg.webapp.render_narrative
179 |       headers:
180 |         Content-Type: application/json
181 |         Cache-Control: no-store
182 |   render-live-template-$*:
183 |     pattern: /$YAMLURL/render-live-template
184 |     handler: FunctionHandler
185 |     kwargs:
186 |       function: nlg.webapp.render_live_template
187 |       headers:
188 |         Content-Type: text/plain
189 | 


--------------------------------------------------------------------------------
/nlg/app/html/demo.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 | 
 4 | <head>
 5 |   <meta charset="utf-8">
 6 |   <meta http-equiv="x-ua-compatible" content="ie=edge">
 7 |   <title>nlg-demos</title>
 8 |   <meta name="viewport" content="width=device-width, initial-scale=1">
 9 |   <link rel="stylesheet" href="style.css">
10 | </head>
11 | 
12 | <body>
13 |   {% set base = '.' %}
14 |   {% include template-navbar.html %}
15 |   {% from web_app import read_demo_config %}
16 |   {% set dsid, nrid = read_demo_config(handler) %}
17 |   <!-- TODO: Filter bars -->
18 | 
19 |   <script src="ui/jquery/dist/jquery.min.js"></script>
20 |   <script src="ui/bootstrap/dist/js/bootstrap.bundle.min.js"></script>
21 |   <script src="ui/lodash/lodash.min.js"></script>
22 |   <script src="ui/g1/dist/g1.min.js"></script>
23 |   <script src="ui/popper.js/dist/umd/popper.min.js"></script>
24 |   <!-- Commonly used libraries:
25 |   <script src="ui/d3v5/dist/d3.min.js"></script>
26 |   <script src="ui/morphdom/dist/morphdom-umd.min.js"></script>
27 |   <script src="ui/moment/min/moment-with-locales.min.js"></script>
28 |   <script src="ui/daterangepicker/daterangepicker.js"></script>
29 |   <script src="ui/leaflet/dist/leaflet.js"></script>
30 |   <script src="ui/topojson/dist/topojson.min.js"></script>
31 |   -->
32 |   <div class="card shadow">
33 |     <div class="card-body">
34 |       <div id="narrative-result" class="card-text"></div>
35 |     </div><!-- .card-body -->
36 |   </div><!-- .card -->
37 |   <div class="divider"></div>
38 |   <div class="formhandler" data-src="preview/{{ handler.current_user.email }}/{{ dsid }}"></div>
39 |   <script>
40 |     function get_fh_args() {
41 |       return JSON.stringify(g1.url.parse(g1.url.parse(window.location.href).hash).searchList)
42 |     }
43 |     $('.formhandler').on('load',
44 |       function (e) {
45 |         $.ajax({
46 |           url: "render-live-template",
47 |           type: "POST",
48 |           data: {
49 |               data: JSON.stringify(e.formdata),
50 |               nrid: "{{ nrid }}",
51 |               style: true,
52 |               fh_args: get_fh_args()
53 |           },
54 |           success: function (pl) {
55 |               document.getElementById("narrative-result").innerHTML = pl
56 |           }
57 |         })
58 |       }
59 |     )
60 |     $('.formhandler').formhandler();
61 |   </script>
62 | </body>
63 | 
64 | </html>
65 | 


--------------------------------------------------------------------------------
/nlg/app/html/template-navbar.html:
--------------------------------------------------------------------------------
 1 | <nav class="navbar navbar-expand-lg navbar-dark bg-dark py-0">
 2 |   <a class="navbar-brand" href="{{ base }}">
 3 |     <!-- Replace with your logo. Just specify height, not width. -->
 4 |     <img height="36" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACQAAAAkAQAAAADJe6U1AAAABGdBTUEAALGPC/xhBQAAAAJ0Uk5TAAB2k804AAAANUlEQVR42mNgwAX4/x8AkvZgsh5M/geTP8CyH0ggP1BKMoLIH8wfwG54AHbPA7jb+P9jcz0AKpcxBQbNJ8sAAAAASUVORK5CYII=" alt="Logo">
 5 |     <span class="text-uppercase">nlg-demos</span>
 6 |   </a>
 7 |   <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
 8 |     <span class="navbar-toggler-icon"></span>
 9 |   </button>
10 |   <div class="collapse navbar-collapse" id="navbarSupportedContent">
11 |     <!-- Navbar left -->
12 |     <ul class="navbar-nav mr-auto">
13 |       <li class="nav-item active">
14 |         <a class="nav-link" href="{{ base }}">Home <span class="sr-only">(current)</span></a>
15 |       </li>
16 |       <li class="nav-item dropdown">
17 |         <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
18 |           Dropdown
19 |         </a>
20 |         <div class="dropdown-menu" aria-labelledby="navbarDropdown">
21 |           <a class="dropdown-item" href="#">Action</a>
22 |           <a class="dropdown-item" href="#">Another action</a>
23 |           <div class="dropdown-divider"></div>
24 |           <a class="dropdown-item" href="#">Something else here</a>
25 |         </div>
26 |       </li>
27 |     </ul>
28 |     <!-- Navbar middle -->
29 |     <div class="navbar-nav mr-auto">
30 |       <span class="nav-item nav-link active text-uppercase h5 mb-0">Dashboard</span>
31 |     </div>
32 |     <!-- Navbar right -->
33 |     <div class="navbar-nav mr-2">
34 |       {% if handler.current_user %}
35 |         <a class="nav-item nav-link" href="{{ base }}/login/" title="Log in again" data-placement="bottom">
36 |           {{ handler.current_user.id }}
37 |         </a>
38 |         <a class="nav-item nav-link" href="{{ base }}/logout/" title="Log out" data-placement="bottom"><span class="fas fa-arrow-right bg-light round text-dark p-1"></span></a>
39 |       {% else %}
40 |         <a class="nav-item nav-link" href="{{ base }}/login/" title="Log in again" data-placement="bottom">
41 |           Log in
42 |         </a>
43 |       {% end %}
44 |     </div>
45 |     <form class="form-inline my-2 my-lg-0">
46 |       <input class="form-control mr-sm-2" type="search" placeholder="Search" aria-label="Search">
47 |       <button class="btn btn-light my-2 my-sm-0" type="submit">Search</button>
48 |     </form>
49 |   </div>
50 | </nav>
51 | 


--------------------------------------------------------------------------------
/nlg/app/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 
 4 | <head>
 5 |   <meta charset="utf-8" />
 6 |   <title>GramexNLG Template Generator</title>
 7 |   <meta name="viewport" content="width=device-width, initial-scale=1">
 8 |   <link rel="stylesheet" href="ui/bootstraptheme.css">
 9 |   <link rel="stylesheet" href="ui/font-awesome/css/font-awesome.min.css">
10 | </head>
11 | 
12 | <script src="ui/jquery/dist/jquery.min.js"></script>
13 | <script src="ui/bootstrap/dist/js/bootstrap.bundle.min.js"></script>
14 | <script src="ui/lodash/lodash.min.js"></script>
15 | <script src="ui/g1/dist/g1.min.js"></script>
16 | 
17 | <body>
18 |   {% from gramex import variables %}
19 |   {% set nlg_base = variables['NLG_BASE'].rstrip('/') %}
20 |   <div class="container-fluid">
21 |     {% include "./body.html" %}
22 |   </div>
23 | </body>
24 | 
25 | </html>
26 | 


--------------------------------------------------------------------------------
/nlg/app/login.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 | 
 4 | <head>
 5 |   <meta charset="utf-8">
 6 |   <meta http-equiv="x-ua-compatible" content="ie=edge">
 7 |   <title>foo Login</title>
 8 |   <meta name="viewport" content="width=device-width, initial-scale=1">
 9 |   <link rel="stylesheet" href="../style.css">
10 | </head>
11 | 
12 | <body class="bg-primary gradient-tc bg-no-repeat text-white">
13 |   {% set base = '..' %}
14 |   {% include template-navbar.html %}
15 |   {% set kwargs = handler.kwargs %}
16 |   {% try %}{% set user = kwargs.user.arg %}{% except %}{% set user = 'user' %}{% end %}
17 |   {% try %}{% set password = kwargs.password.arg %}{% except %}{% set password = 'password' %}{% end %}
18 |   <div class="container d-flex flex-column align-items-center">
19 |     <div class="card shadow text-dark mx-auto my-4 px-5 py-3 col-md-6">
20 |       {% if error %}
21 |         <div class="alert alert-danger mx-n3">
22 |           <h1 class="h4">Error logging in</h1>
23 |           <p>{{ error['error'] }}</p>
24 |           <div><small><strong>code</strong>: {{ error['code'] }}</small></div>
25 |         </div>
26 |       {% end %}
27 |       <form method="POST">
28 |         <div class="form-group">
29 |           <label for="{{ user }}">Login</label>
30 |           <input type="text" class="form-control" name="{{ user }}" id="{{ user }}" value="{{ handler.get_argument(user, '') }}" placeholder="Login ID" autofocus required>
31 |         </div>
32 |         <div class="form-group">
33 |           <label for="{{ password  }}">Password</label>
34 |           <input type="password" class="form-control" name="{{ password }}" id="{{ password }}" placeholder="Password" required>
35 |         </div>
36 |         <input type="hidden" name="_xsrf" value="{{ handler.xsrf_token }}">
37 |         <p><button type="submit" class="btn btn-primary w-100 small">Login</button></p>
38 |         {% if kwargs.get('forgot') %}
39 |           <p class="small"><a href="?{{ kwargs.forgot.key }}">Forgot password</a></p>
40 |         {% end %}
41 |         <div>Default login: alpha (password: alpha)</div>
42 |       </form>
43 |     </div><!-- .card -->
44 |   </div>
45 |   <script src="../ui/jquery/dist/jquery.min.js"></script>
46 |   <script src="../ui/bootstrap/dist/js/bootstrap.bundle.min.js"></script>
47 |   {% if 'hash' in kwargs.get('password', {}) %}
48 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/js-sha256/0.9.0/sha256.min.js"></script>
49 |     <script>
50 |       /* globals sha256 */
51 |       // hash the password before submitting
52 |       $('form').on('submit', function() {
53 |         var $password = $('#{{ password }}').get(0)
54 |         $password.value = sha256($password.value)
55 |       })
56 |     </script>
57 |   {% end %}
58 | </body>
59 | 
60 | </html>
61 | 


--------------------------------------------------------------------------------
/nlg/app/nlg.js:
--------------------------------------------------------------------------------
  1 | /* globals currentTemplateIndex, grammarOptions, templates, args, df, currentEventHandlers, nlg_base, g1 */
  2 | /* exported addToNarrative, setInitialConfig, checkTemplate, saveTemplate,
  3 | addCondition, addName, shareNarrative, copyToClipboard,
  4 | findAppliedInflections, checkSelection */
  5 | /* eslint-disable no-global-assign */
  6 | var narrative_name, dataset_name
  7 | var styleparams = {bold: true, italic: false, underline: false, style: 'para'}
  8 | 
  9 | function openDemoPage() {
 10 |   if (!(narrative_name)) {
 11 |     $('#saveModal').modal({show: true})
 12 |   } else {
 13 |     let fh_url = encodeURIComponent($('.formhandler').attr('data-src'))
 14 |     let url = `${nlg_base}/demoembed?fh_url=${fh_url}&nname=${narrative_name}`
 15 |     window.open(url)
 16 |   }
 17 | }
 18 | 
 19 | function activateStyleControl() {
 20 |   if (styleparams.bold) {
 21 |     $('#boldpreview').addClass('active')
 22 |   }
 23 |   if (styleparams.italic) {
 24 |     $('#italicpreview').addClass('active')
 25 |   }
 26 |   if (styleparams.underline) {
 27 |     $('#ulinepreview').addClass('active')
 28 |   }
 29 | 
 30 |   if (styleparams.style == 'para') {
 31 |     $('#parastyle').prop('checked', true)
 32 |   } else {
 33 |     $('#liststyle').prop('checked', true)
 34 |   }
 35 |   renderByStyle()
 36 | }
 37 | 
 38 | function toggleRenderStyle(e) {
 39 |   if (e.currentTarget.id == "parastyle") {
 40 |     if ($('#parastyle').prop('checked')) {
 41 |       styleparams.style = "para"
 42 |     }
 43 |   } else if (e.currentTarget.id == "liststyle") {
 44 |     if ($('#liststyle').prop('checked')) {
 45 |       styleparams.style = "list"
 46 |     }
 47 |   } else if (e.currentTarget.id == "boldpreview") {
 48 |     if ($('#boldpreview').hasClass('active')) {
 49 |       styleparams.bold = true
 50 |     } else {
 51 |       styleparams.bold = false
 52 |     }
 53 |   } else if (e.currentTarget.id == "italicpreview") {
 54 |     if ($('#italicpreview').hasClass('active')) {
 55 |       styleparams.italic = true
 56 |     } else {
 57 |       styleparams.italic = false
 58 |     }
 59 |   } else if (e.currentTarget.id == "ulinepreview") {
 60 |     if ($('#ulinepreview').hasClass('active')) {
 61 |       styleparams.underline = true
 62 |     } else {
 63 |       styleparams.underline = false
 64 |     }
 65 |   }
 66 |   renderByStyle()
 67 | }
 68 | 
 69 | function renderByStyle() {
 70 |   let url = g1.url.parse(`${nlg_base}/renderall`)
 71 |   url.update(styleparams)
 72 |   $.getJSON(url.toString()).done((e) => {
 73 |     $(`#previewspan`).html(e.render)
 74 |     styleparams = e.style
 75 |   })
 76 | }
 77 | 
 78 | function makeControlDroppable(elem, ondrop) {
 79 |   elem.on('dragover', (e) => {
 80 |     e.preventDefault()
 81 |     e.stopPropagation()
 82 |   })
 83 |   elem.on('dragleave', (e) => {
 84 |     e.preventDefault()
 85 |     e.stopPropagation()
 86 |   })
 87 |   elem.on('drop', (e) => {
 88 |     e.preventDefault()
 89 |     e.stopPropagation()
 90 |     ondrop(e)
 91 |   })
 92 | }
 93 | 
 94 | function prepDrag(row) {
 95 |   row.on('dragstart', (e) => {
 96 |     e.dataTransfer = e.originalEvent.dataTransfer
 97 |     e.dataTransfer.setData('text', e.target.id)})
 98 | }
 99 | 
100 | 
101 | function findControlRow(elem) {
102 |   if (!(elem.id)) {
103 |     return false
104 |   } else {
105 |     return elem.id.match(/^controlrow-\d+$/)
106 |   }
107 | }
108 | 
109 | function findDropPosition(y) {
110 |   let rows = _.filter(Object.values($('tr[id^=controlrow]')), findControlRow)
111 |   let bottoms = _.flatMap(rows, (r) => {return r.getBoundingClientRect().bottom})
112 |   for (let i=0; i<bottoms.length; i++) {
113 |     if (y <= bottoms[i]) {
114 |       return i
115 |     }
116 |   }
117 | }
118 | 
119 | function handleDrop(e) {
120 |   e.preventDefault();
121 |   let rowid = e.originalEvent.dataTransfer.getData('text')
122 |   if (rowid.match(/^controlrow-\d+$/g)) {
123 |     let droppos = findDropPosition(e.originalEvent.clientY)
124 |     if (droppos) {
125 |       let poppos = rowid.replace(/^controlrow-/, "")
126 |       $.get(`${nlg_base}/movenugget/${poppos}/${droppos}`).done(
127 |         refreshTemplates(null)
128 |       )
129 |     }
130 |   }
131 | }
132 | 
133 | class Template {
134 |   // Class to hold a piece of text that gets rendered as a
135 |   // tornado template when the narrative is invoked anywhere.
136 |   constructor({text, tokenmap, fh_args, condition, name, template, previewHTML}) {
137 |     this.text = text
138 |     this.tokenmap = tokenmap
139 |     this.fh_args = fh_args
140 |     this.condition = condition
141 |     this.name = name
142 |     this.template = template
143 |     this._previewHTML = previewHTML
144 |   }
145 | 
146 |   previewHTML() {
147 |     return this._previewHTML
148 |   }
149 | }
150 | 
151 | function hasTextSelection() {
152 |   let sel = window.getSelection()
153 |   if (sel.type != "Range") {
154 |     return false
155 |   }
156 |   let siblings = sel.anchorNode.parentNode.childNodes
157 |   let offset = 0
158 |   let endOffset = 0
159 |   for (let i=0; i<siblings.length; i++) {
160 |     let sibling = siblings[i]
161 |     if (!(sel.containsNode(sibling))) {
162 |       offset += sibling.textContent.length
163 |     } else {
164 |       let range = sel.getRangeAt(0)
165 |       endOffset = offset + range.endOffset
166 |       offset += range.startOffset
167 |       break
168 |     }
169 |   }
170 |   return [offset, endOffset]
171 | 
172 | }
173 | 
174 | function hasClickedVariable(e) {
175 |   let is_variable = false
176 |   let tokenmap = templates[currentTemplateIndex].tokenmap
177 |   let variable_ix = null
178 |   for (let i=0; i < tokenmap.length; i++) {
179 |     var token = tokenmap[i]
180 |     let text = token.text
181 |     if (e.target.innerText == text) {
182 |       is_variable = true
183 |       variable_ix = token.index
184 |       break
185 |     }
186 |   }
187 |   if (is_variable) {
188 |     return variable_ix
189 |   }
190 |   return false
191 | }
192 | 
193 | function checkSelection(e) {
194 |   let clickedVar = hasClickedVariable(e)
195 |   if (clickedVar || (clickedVar === 0)) {
196 |     if (typeof(clickedVar) != "number") {
197 |       clickedVar = clickedVar.join(',')
198 |     }
199 |     $.get(`${nlg_base}/variablesettings/${currentTemplateIndex}/${clickedVar}`).done(
200 |       (e) => {
201 |         $('#variable-settings').html(e)
202 |       }
203 |     )
204 |   } else {
205 |     let textSel = hasTextSelection()
206 |     if (textSel) {
207 |       $.get(`${nlg_base}/newvariable/${currentTemplateIndex}/${textSel.join(',')}`).done(
208 |         (e) => {
209 |           $('#variable-settings').html(e)
210 |         }
211 |       )
212 |     }
213 |   }
214 | }
215 | 
216 | function addToNarrative() {
217 |   // Pick text from the input textarea, templatize, and add to the narrative.
218 |   $.post(
219 |     `${nlg_base}/textproc`,
220 |     JSON.stringify({
221 |       'args': args, 'data': df,
222 |       'text': $('#textbox').val()
223 |     }), (pl) => {
224 |       pl = new Template(pl)
225 |       templates.push(pl)
226 |       renderPreview(null)
227 |     }
228 |   )
229 | }
230 | 
231 | function renderPreview(fh) {
232 |   // Render the preview of all current templates on the front page.
233 |   if (fh) {
234 |     df = fh.formdata
235 |     args = g1.url.parse(g1.url.parse(window.location.href).hash).searchList
236 |     refreshTemplates()
237 |     return true
238 |   }
239 |   $('#template-preview').template({n_templates: templates.length})
240 |   makeControlDroppable($('#controltable'), handleDrop)
241 |   for (let i = 0; i < templates.length; i++) {
242 |     // add the remove listener
243 |     var deleteListener = function () { deleteTemplate(i) }
244 |     $(`#rm-btn-${i}`).on('click', deleteListener)
245 | 
246 |     // add setting listener
247 |     var settingsListener = function () { triggerTemplateSettings(i) }
248 |     $(`#settings-btn-${i}`).on('click', settingsListener)
249 | 
250 |     // Add the preview
251 |     $.get(`${nlg_base}/render-template/${i}`).done(
252 |       (e) => {$(`#preview-${i}`).html(e)}
253 |     )
254 |     // prep the row for dragging
255 |     prepDrag($(`#controlrow-${i}`))
256 |   }
257 |   renderByStyle()
258 | }
259 | 
260 | 
261 | function refreshTemplate(n) {
262 |   // Refresh the nth template from the backend
263 |   $.getJSON(`${nlg_base}/nuggets/${n}`).done((e) => {
264 |     templates[n] = new Template(e)
265 |     $('#tmpl-setting-preview').html(templates[n].previewHTML())
266 |     renderPreview(null)
267 |   })
268 | }
269 | 
270 | function refreshTemplates() {
271 |   // Refresh the output of all templates in the current narrative.
272 |   templates = []
273 |   $.getJSON(`${nlg_base}/narratives`).done((e) => {
274 |     if (e.narrative.length > 0) {
275 |       for (let i=0; i<e.narrative.length;i++) {
276 |         refreshTemplate(i)
277 |       }
278 |     } else {
279 |       renderPreview(null)
280 |     }
281 |   })
282 | }
283 | 
284 | function deleteTemplate(n) {
285 |   // Delete a template
286 |   $.getJSON(`${nlg_base}/nuggets/${n}?delete`).done(refreshTemplates)
287 | }
288 | 
289 | function triggerTemplateSettings(sentid) {
290 |   // Show the template settings modal for a given template.
291 |   currentTemplateIndex = sentid
292 |   editTemplate(currentTemplateIndex)
293 |   $('#template-settings').modal({ 'show': true })
294 |   $('#condition-editor').focus()
295 | }
296 | 
297 | function editTemplate(n) {
298 |   // Edit and update a template source.
299 |   $.get(`${nlg_base}/nuggetsettings/${n}`).done(
300 |     (e) => {
301 |       $('#tmpllist').html(e)
302 |     }
303 |   )
304 | }
305 | 
306 | 
307 | function setInitialConfig() {
308 |   // At page ready, load the latest config for the authenticated user
309 |   // and show it.
310 |   $.getJSON(`${nlg_base}/initconf`).done((e) => {
311 |     narrative_name = e.nrid
312 |     refreshTemplates()
313 |     Object.assign(styleparams, e.style)
314 |     activateStyleControl()
315 |   })
316 | }
317 | 
318 | function addName() {
319 |   // Add an optional name to a template.
320 |   let name = $('#tmpl-name-editor').val()
321 |   if (name) {
322 |     templates[currentTemplateIndex].name = name
323 |   }
324 | }
325 | 
326 | function embedNarrative(url, nname, selector) {
327 |   renderLiveNarrative(url, nname, selector)
328 |   $(window).on('#?', function (e, fh_args) {
329 |     let xurl = g1.url.parse(url).join(fh_args).toString()
330 |     renderLiveNarrative(xurl, nname, selector)
331 |   }).urlchange()
332 | }
333 | 
334 | 
335 | function renderLiveNarrative(url, nname, selector) {
336 |   $.getJSON(url).done((e) => {
337 |     $.post(
338 |       `${nlg_base}/render-live-template`,
339 |       JSON.stringify({
340 |         data: e,
341 |         nrid: nname
342 |       }),
343 |       (f) => {$(selector).html(f)}
344 |     )
345 |   })
346 | }
347 | 
348 | 
349 | function getNarrativeEmbedCode() {
350 |   // Generate embed code for this narrative.
351 |   let html = `
352 |     <div id="narrative-result"></div>
353 |     <script src="${nlg_base}/nlg.js"></script>
354 |     <script>
355 |       var nlg_base = "${nlg_base}"
356 |       // NOTE: Replace fh_url below with the formhandler URL.
357 |       embedNarrative(fh_url, "${narrative_name}", "#narrative-result")
358 |     </script>`
359 |   return html
360 | }
361 | 
362 | function generateEmbedCode() {
363 |   let url = $('#embedTargetURL').val()
364 |   $('#embedCodeText').text(url)
365 | }
366 | 
367 | function saveNarrative(name) {
368 |   narrative_name = name
369 |   $.get(`${nlg_base}/saveNarrative/${name}`)
370 | }
371 | 


--------------------------------------------------------------------------------
/nlg/app/setup.sh:
--------------------------------------------------------------------------------
1 | pip install nlg
2 | python -m spacy download en_core_web_sm
3 | 


--------------------------------------------------------------------------------
/nlg/app/style.css:
--------------------------------------------------------------------------------
 1 | /*  UI component styles. Customize via ?bootstrap-variable=encoded-value. Example:
 2 |     Colors. Can be a name or a number (e.g. %23aabbcc). Preserve the hues below.
 3 |       primary=blue
 4 |       success=green
 5 |       info=cyan
 6 |       warning=orange
 7 |       danger=red
 8 |       secondary=grey
 9 |       light=lightgrey
10 |       dark=darkgrey
11 |       body-bg=white
12 |       body-color=black
13 |     Fonts. Can be a system font or Open+Sans, Roboto, Lato, Anton, Monserrat
14 |       font-family-base=Segoe+UI
15 |       headings-font-family=Segoe+UI
16 |     Other
17 |       https://github.com/twbs/bootstrap/blob/v4-dev/scss/_variables.scss
18 | */
19 | @import url("ui/bootstraptheme.css?body-bg=white&navbar-dark-color=rgba(255%2C255%2C255%2C.8)&navbar-dark-hover-color=white");
20 | /* For v4 icons, use url("ui/font-awesome/css/font-awesome.min.css") */
21 | @import url("ui/@fortawesome/fontawesome-free/css/all.min.css");
22 | 
23 | 
24 | /* custom styles for app: nlg-demos */
25 | 


--------------------------------------------------------------------------------
/nlg/app/template-navbar.html:
--------------------------------------------------------------------------------
 1 | <nav class="navbar navbar-expand-lg navbar-dark bg-dark py-0">
 2 |   <a class="navbar-brand" href="{{ base }}">
 3 |     <!-- Replace with your logo. Just specify height, not width. -->
 4 |     <img height="36" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACQAAAAkAQAAAADJe6U1AAAABGdBTUEAALGPC/xhBQAAAAJ0Uk5TAAB2k804AAAANUlEQVR42mNgwAX4/x8AkvZgsh5M/geTP8CyH0ggP1BKMoLIH8wfwG54AHbPA7jb+P9jcz0AKpcxBQbNJ8sAAAAASUVORK5CYII=" alt="Logo">
 5 |     <span class="text-uppercase">foo</span>
 6 |   </a>
 7 |   <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
 8 |     <span class="navbar-toggler-icon"></span>
 9 |   </button>
10 |   <div class="collapse navbar-collapse" id="navbarSupportedContent">
11 |     <!-- Navbar left -->
12 |     <ul class="navbar-nav mr-auto">
13 |       <li class="nav-item active">
14 |         <a class="nav-link" href="{{ base }}">Home <span class="sr-only">(current)</span></a>
15 |       </li>
16 |       <li class="nav-item dropdown">
17 |         <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
18 |           Dropdown
19 |         </a>
20 |         <div class="dropdown-menu" aria-labelledby="navbarDropdown">
21 |           <a class="dropdown-item" href="#">Action</a>
22 |           <a class="dropdown-item" href="#">Another action</a>
23 |           <div class="dropdown-divider"></div>
24 |           <a class="dropdown-item" href="#">Something else here</a>
25 |         </div>
26 |       </li>
27 |     </ul>
28 |     <!-- Navbar middle -->
29 |     <div class="navbar-nav mr-auto">
30 |       <span class="nav-item nav-link active text-uppercase h5 mb-0">Dashboard</span>
31 |     </div>
32 |     <!-- Navbar right -->
33 |     <div class="navbar-nav mr-2">
34 |       {% if handler.current_user %}
35 |         <a class="nav-item nav-link" href="{{ base }}/login/" title="Log in again" data-placement="bottom">
36 |           {{ handler.current_user.id }}
37 |         </a>
38 |         <a class="nav-item nav-link" href="{{ base }}/logout/" title="Log out" data-placement="bottom"><span class="fas fa-arrow-right bg-light round text-dark p-1"></span></a>
39 |       {% else %}
40 |         <a class="nav-item nav-link" href="{{ base }}/login/" title="Log in again" data-placement="bottom">
41 |           Log in
42 |         </a>
43 |       {% end %}
44 |     </div>
45 |     <form class="form-inline my-2 my-lg-0">
46 |       <input class="form-control mr-sm-2" type="search" placeholder="Search" aria-label="Search">
47 |       <button class="btn btn-light my-2 my-sm-0" type="submit">Search</button>
48 |     </form>
49 |   </div>
50 | </nav>
51 | 


--------------------------------------------------------------------------------
/nlg/app/templates/demo.tmpl:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 
 4 | <head>
 5 |   <meta charset="utf-8" />
 6 |   <title>GramexNLG Demo</title>
 7 |   <meta name="viewport" content="width=device-width, initial-scale=1">
 8 |   <link rel="stylesheet" href="ui/bootstraptheme.css">
 9 | </head>
10 | 
11 | <body>
12 |   <div class="container-fluid">
13 |     <div id="narrative-result"></div>
14 |     <div class="divider"></div>
15 |     <div class="formhandler" data-src="{{ handler.get_argument('fh_url') }}"></div>
16 |   </div>
17 | </body>
18 | 
19 | <script src="ui/jquery/dist/jquery.min.js"></script>
20 | <script src="ui/bootstrap/dist/js/bootstrap.bundle.min.js"></script>
21 | <script src="ui/lodash/lodash.min.js"></script>
22 | <script src="ui/g1/dist/g1.min.js"></script>
23 | <script src="./nlg.js"></script>
24 | <script>
25 |   {% from gramex.config import variables %}
26 |   {% set nlg_base = variables['NLG_BASE'].rstrip('/') %}
27 |   $('.formhandler').formhandler()
28 |   var nlg_base = "{{ nlg_base }}"
29 |   var url = $('.formhandler').attr('data-src')
30 |   embedNarrative(url, "{{ handler.get_argument('nname') }}", '#narrative-result')
31 | </script>
32 | 
33 | 
34 | </html>
35 | 


--------------------------------------------------------------------------------
/nlg/app/templates/new-variable.tmpl:
--------------------------------------------------------------------------------
 1 | <h3>Add new variable</h3>
 2 | 
 3 | <form action="{{ nlg_base }}/newvar/{{ nugget_id }}/{{ variable_ix }}" id="addvarform" method="POST"
 4 | enctype="multipart/form-data">
 5 | 
 6 |   <div class="form-group row">
 7 |     <label for="vartext" class="col-sm-2 col-form-label">Source Text</label>
 8 |     <div class="col-sm-10">
 9 |       <input name="sourcetext" readonly placeholder="{{ text }}" id="vartext">
10 |     </div>
11 |   </div>
12 | 
13 |   <div class="form-group row">
14 |     <label for="varexpr" class="col-sm-2 col-form-label">Add Expression</label>
15 |     <div class="col-sm-10">
16 |       <input id="varexpr" name="expr">
17 |     </div>
18 |   </div>
19 | 
20 |   <button type="submit" class="btn btn-primary" form="addvarform">Save</button>
21 | </form>
22 | 
23 | <script>
24 | $('#addvarform').submit((e) => {
25 |   e.preventDefault();
26 |   $.post(
27 |     $('#addvarform').attr('action'),
28 |     $('#addvarform').serialize(),
29 |     (pl) => {
30 |       $('#edit-template').val(pl)
31 |       refreshTemplate(currentTemplateIndex)
32 |     }
33 |   )
34 | })
35 | </script>
36 | 


--------------------------------------------------------------------------------
/nlg/app/templates/template-settings.tmpl:
--------------------------------------------------------------------------------
 1 | {% from tornado.escape import xhtml_unescape %}
 2 | <div>
 3 |   <div class="card shadow mb-3">
 4 |     <div class="card-body">
 5 |       <p class="card-text" id="tmpl-setting-preview">{{ xhtml_unescape(template['previewHTML']) }}</p>
 6 |     </div>
 7 |   </div>
 8 |   <form id="condtform" class="form-inline" action="{{ nlg_base }}/condition/{{ nugget_id }}" method="POST" enctype="multipart/form-data">
 9 |     <div class="form-group mb-2">
10 |       <label for="condition">Add Condition</label>
11 |       <input name="condition" type="text" class="form-control" id="condition" placeholder="{{ template['condition'] }}">
12 |     </div>
13 |     <button form="condtform" type="submit" class="btn btn-primary mb-2">Okay</button>
14 |   </form>
15 | 
16 | 
17 |   <div class="divider"></div>
18 |   <div id="variable-settings"></div>
19 |   <div class="divider"></div>
20 |   <div class="form-group">
21 |     <textarea readonly class="form-control align-center text-monospace"
22 |     id="edit-template" name="templateditor" rows="10">{{ template['template'] }}</textarea>
23 |   </div>
24 |   <div class="form-group"><p id="renderpreview"></p></div>
25 |   <button id="toggle-template" title="Show template"
26 |   type="button" class="btn btn-primary btn-sm">Show Code</button>
27 |   <button id="preview-render" title="See rendered template"
28 |   type="button" class="btn btn-primary btn-sm">Preview Render</button>
29 | </div>
30 | <script>
31 |   $('#tmpl-setting-preview').mouseup(checkSelection)
32 |   $('#edit-template').hide()
33 |   function toggleTemplateDisplay() {
34 |     if ($('#edit-template').is(':visible')) {
35 |       $('#edit-template').hide()
36 |       $('#toggle-template').attr('title', "Show Template")
37 |       $('#toggle-template').text('Show Template')
38 |     } else {
39 |       $('#edit-template').show()
40 |       $('#toggle-template').attr('title', "Hide Template")
41 |       $('#toggle-template').text('Hide Template')
42 |     }
43 |   }
44 |   function previewRender() {
45 |     $.get(`${nlg_base}/render-template/${currentTemplateIndex}`).done(
46 |       (e) => $('#renderpreview').text(e)
47 |     )
48 |   }
49 |   $('#toggle-template').on('click', toggleTemplateDisplay)
50 |   $('#preview-render').on('click', previewRender)
51 |   $('#condtform').submit((e) => {
52 |     e.preventDefault();
53 |     $.post(
54 |       $('#condtform').attr('action'),
55 |       $('#condtform').serialize()
56 |     )
57 |   })
58 | </script>
59 | 


--------------------------------------------------------------------------------
/nlg/app/templates/variable-settings.tmpl:
--------------------------------------------------------------------------------
 1 | <form action="./updatevar/{{ nugget_id }}/{{ variable_id }}" id="varsettingsform" method="POST"
 2 | enctype="multipart/form-data">
 3 | 
 4 |   <div class="form-group row">
 5 |     <label for="vartext" class="col-sm-2 col-form-label">Source Text</label>
 6 |     <div class="col-sm-10">
 7 |       <input name="sourcetext" readonly placeholder="{{ variable['text'] }}" id="vartext">
 8 |     </div>
 9 |   </div>
10 | 
11 |   <div class="form-group row">
12 |     <label for="varsources" class="col-sm-2 col-form-label">Sources</label>
13 |     <div class="col-sm-10">
14 |       <select class="custom-select" id="varsources" name="sources">
15 |       {% for i, source in enumerate(variable['sources']) %}
16 |         {% if source.get('enabled', False) %}
17 |           <option selected value="{{ i }}">{{ source['tmpl'] }}</option>
18 |         {% else %}
19 |           <option value="{{ i }}">{{ source['tmpl'] }}</option>
20 |         {% end %}
21 |       {% end %}
22 |       </select>
23 |     </div>
24 |   </div>
25 | 
26 |   <div class="form-group row">
27 |     <label for="varexpr" class="col-sm-2 col-form-label">Add Expression</label>
28 |     <div class="col-sm-10">
29 |       <input id="varexpr" name="expr">
30 |     </div>
31 |   </div>
32 | 
33 |   <div class="form-group row">
34 |     <label for="inflset" class="col-sm-2 col-form-label">Inflections</label>
35 |     <div class="col-sm-10">
36 |       <select multiple class="form-control" id="inflset" name="inflections">
37 |       {% set applied_infls = [c['fe_name'] for c in variable['inflections']] %}
38 |       {% for infl in grammar_options %}
39 |         {% if infl in applied_infls %}
40 |             <option selected value="{{ infl }}">{{ infl }}</option>
41 |         {% else %}
42 |             <option value="{{ infl }}">{{ infl }}</option>
43 |         {% end %}
44 |       {% end %}
45 |       </select>
46 |     </div>
47 |   </div>
48 | 
49 |   <button type="submit" class="btn btn-primary" form="varsettingsform">Save</button>
50 | </form>
51 | 
52 | <script>
53 | $('#varsettingsform').submit((e) => {
54 |   e.preventDefault();
55 |   $.post(
56 |     $('#varsettingsform').attr('action'),
57 |     $('#varsettingsform').serialize(),
58 |     (pl) => {
59 |       $('#edit-template').val(pl)
60 |       refreshTemplate(currentTemplateIndex)
61 |     }
62 |   )
63 | })
64 | </script>
65 | 


--------------------------------------------------------------------------------
/nlg/grammar.py:
--------------------------------------------------------------------------------
  1 | from inflect import engine
  2 | from tornado.template import Template
  3 | from math import floor  # noqa: F401
  4 | 
  5 | from nlg.utils import load_spacy_model, set_nlg_gramopt, get_lemmatizer
  6 | 
  7 | infl = engine()
  8 | nlp = load_spacy_model()
  9 | 
 10 | 
 11 | def is_plural_noun(text):
 12 |     """Whether given text is a plural noun."""
 13 |     doc = load_spacy_model()(text)
 14 |     for t in list(doc)[::-1]:
 15 |         if not t.is_punct:
 16 |             return t.tag_ in ('NNS', 'NNPS')
 17 |     return False
 18 | 
 19 | 
 20 | is_singular_noun = lambda x: not is_plural_noun(x)  # NOQA: E731
 21 | 
 22 | 
 23 | @set_nlg_gramopt(source='G', fe_name='Concate Items')
 24 | def concatenate_items(items, sep=', '):
 25 |     """Concatenate a sequence of tokens into an English string.
 26 | 
 27 |     Parameters
 28 |     ----------
 29 | 
 30 |     items : list-like
 31 |         List / sequence of items to be printed.
 32 |     sep : str, optional
 33 |         Separator to use when generating the string
 34 | 
 35 |     Returns
 36 |     -------
 37 |     str
 38 |     """
 39 |     if len(items) == 0:
 40 |         return ""
 41 |     if len(items) == 1:
 42 |         return items[0]
 43 |     items = list(map(str, items))
 44 |     if sep == ', ':
 45 |         s = sep.join(items[:-1])
 46 |         s += ' and ' + items[-1]
 47 |     else:
 48 |         s = sep.join(items)
 49 |     return s
 50 | 
 51 | 
 52 | @set_nlg_gramopt(source='G', fe_name='Pluralize')
 53 | def plural(word):
 54 |     """Pluralize a word.
 55 | 
 56 |     Parameters
 57 |     ----------
 58 | 
 59 |     word : str
 60 |         word to pluralize
 61 | 
 62 |     Returns
 63 |     -------
 64 |     str
 65 |         Plural of `word`
 66 |     """
 67 |     if not is_plural_noun(word):
 68 |         word = infl.plural(word)
 69 |     return word
 70 | 
 71 | 
 72 | @set_nlg_gramopt(source='G', fe_name='Singularize')
 73 | def singular(word):
 74 |     """
 75 |     Singularize a word.
 76 | 
 77 |     Parameters
 78 |     ----------
 79 |     word : str
 80 |         Word to singularize.
 81 | 
 82 |     Returns
 83 |     -------
 84 |     str
 85 |         Singular of `word`.
 86 |     """
 87 |     if is_plural_noun(word):
 88 |         word = infl.singular_noun(word)
 89 |     return word
 90 | 
 91 | 
 92 | # @set_nlg_gramopt(source='G', fe_name='Pluralize by')
 93 | def pluralize_by(word, by):
 94 |     """
 95 |     Pluralize a word depending on another argument.
 96 | 
 97 |     Parameters
 98 |     ----------
 99 |     word : str
100 |         Word to pluralize
101 |     by : any
102 |         Any object checked for a pluralish value. If a sequence, it must have
103 |         length greater than 1 to qualify as plural.
104 | 
105 |     Returns
106 |     -------
107 |     str
108 |         Plural or singular of `word`.
109 |     """
110 |     if hasattr(by, '__iter__'):
111 |         if len(by) > 1:
112 |             word = plural(word)
113 |         else:
114 |             word = singular(word)
115 |     else:
116 |         if by > 1:
117 |             word = plural(word)
118 |         else:
119 |             word = singular(word)
120 |     return word
121 | 
122 | 
123 | # @set_nlg_gramopt(source='G', fe_name='Pluralize like')
124 | def pluralize_like(x, y):
125 |     """
126 |     Pluralize a word if another is a plural.
127 | 
128 |     Parameters
129 |     ----------
130 |     x : str
131 |         The word to pluralize.
132 |     y : str
133 |         The word to check.
134 | 
135 |     Returns
136 |     -------
137 |     str
138 |         Plural of `x` if `y` is plural, else singular.
139 |     """
140 |     if not is_plural_noun(y):
141 |         return singular(x)
142 |     return plural(x)
143 | 
144 | 
145 | @set_nlg_gramopt(source='str', fe_name='Capitalize')
146 | def capitalize(word):
147 |     return word.capitalize()
148 | 
149 | 
150 | @set_nlg_gramopt(source='str', fe_name='Lowercase')
151 | def lower(word):
152 |     return word.lower()
153 | 
154 | 
155 | @set_nlg_gramopt(source='str', fe_name='Swapcase')
156 | def swapcase(word):
157 |     return word.swapcase()
158 | 
159 | 
160 | @set_nlg_gramopt(source='str', fe_name='Title')
161 | def title(word):
162 |     return word.title()
163 | 
164 | 
165 | @set_nlg_gramopt(source='str', fe_name='Uppercase')
166 | def upper(word):
167 |     return word.upper()
168 | 
169 | 
170 | # @set_nlg_gramopt(source='G', fe_name='Lemmatize')
171 | def lemmatize(word, target_pos):
172 |     return get_lemmatizer()(word, target_pos)
173 | 
174 | 
175 | def _token_inflections(x, y):
176 |     """
177 |     If two words share the same root, find lexical changes required for turning
178 |     one into another.
179 | 
180 |     Parameters
181 |     ----------
182 |     x : spacy.token.Tokens
183 |     y : spacy.token.Tokens
184 | 
185 |     Examples
186 |     --------
187 |     >>> _token_inflections('language', 'Language')
188 |     ['upper']
189 |     >>> _token_inflections('language', 'languages')
190 |     ['plural']
191 |     """
192 |     if x.lemma_ != y.lemma_:
193 |         return []
194 | 
195 |     inflections = []
196 | 
197 |     # check if x and y are singulars or plurals of each other.
198 |     number_infl = _number_inflection(x, y)
199 |     if number_infl:
200 |         inflections.append(number_infl)
201 | 
202 |     shp_infl = _shape_inflection(x, y, prev=number_infl)
203 |     if shp_infl:
204 |         inflections.append(shp_infl)
205 | 
206 |     # Disable detecting inflections until they can be
207 |     # processed without intervention.
208 |     # if x.pos_ != y.pos_:
209 |     #     return lemmatize
210 |     return inflections
211 | 
212 | 
213 | def _shape_inflection(x, y, prev=False):
214 |     if not prev:
215 |         prev = lambda x: x  # noqa: E731
216 |     if len(prev(x.text)) == len(y.text):
217 |         for methname in ['capitalize', 'lower', 'swapcase', 'title', 'upper']:
218 |             func = lambda x: getattr(x, methname)()  # NOQA: E731
219 |             if func(prev(x.text)) == y.text:
220 |                 return globals()[methname]
221 |     return False
222 | 
223 | 
224 | def _number_inflection(x, y):
225 |     if is_singular_noun(y.text):
226 |         if singular(x.text).lower() == y.text.lower():
227 |             return singular
228 |     elif is_plural_noun(y.text):
229 |         if plural(x.text).lower() == y.text.lower():
230 |             return plural
231 |     return False
232 | 
233 | 
234 | def find_inflections(search, fh_args, df):
235 |     """
236 |     Find lexical inflections between words in input text and the search results
237 |     obtained from FormHandler arguments and dataframes.
238 | 
239 |     Parameters
240 |     ----------
241 |     search : nlg.search.DFSearchResults
242 |         The DFSearchResults object corresponding to `text` and `df`
243 |     fh_args : dict
244 |         FormHandler arguments.
245 |     df : pandas.DataFrame
246 |         The source dataframe.
247 | 
248 |     Returns
249 |     -------
250 |     dict
251 |         With keys as tokens found in the dataframe or FH args, and values as
252 |         list of inflections applied on them to make them closer match tokens in `text`.
253 |     """
254 |     inflections = {}
255 |     for token, tklist in search.items():
256 |         tmpl = [t['tmpl'] for t in tklist if t.get('enabled', False)][0]
257 |         rendered = Template('{{{{ {} }}}}'.format(tmpl)).generate(
258 |             df=df, fh_args=fh_args).decode('utf8')
259 |         if rendered != token.text:
260 |             x = nlp(rendered)[0]
261 |             infl = _token_inflections(x, token)
262 |             if infl:
263 |                 inflections[token] = infl
264 |     return inflections
265 | 
266 | 
267 | def get_gramopts():
268 |     """Find all Grammar and token inflection options from the NLG library.
269 |     Primarily used for creating the select box in the template settings dialog."""
270 |     funcs = {}
271 |     module = globals().copy()
272 |     for attrname in module:
273 |         obj = module[attrname]
274 |         if obj and getattr(obj, 'gramopt', False):
275 |             funcs[obj.fe_name] = {
276 |                 'fe_name': obj.fe_name, 'source': obj.source, 'func_name': attrname
277 |             }
278 |     return funcs
279 | 
280 | 
281 | if __name__ == "__main__":
282 |     print(get_gramopts())  # noqa
283 | 


--------------------------------------------------------------------------------
/nlg/narrative.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # vim:fenc=utf-8
  4 | 
  5 | """The Narrative class."""
  6 | import json
  7 | import re
  8 | import warnings
  9 | 
 10 | from spacy.tokens import Token, Span, Doc
 11 | from tornado.template import Template
 12 | 
 13 | from nlg import utils, grammar
 14 | 
 15 | t_templatize = lambda x: '{{ ' + x + ' }}'  # noqa: E731
 16 | nlp = utils.load_spacy_model()
 17 | 
 18 | 
 19 | def _templatizer_factory(bold, italic, underline):
 20 |     def templatizer(x):
 21 |         x = t_templatize(x)
 22 |         if bold:
 23 |             x = f"<strong>{x}</strong>"
 24 |         if italic:
 25 |             x = f"<em>{x}</em>"
 26 |         if underline:
 27 |             x = f"<u>{x}</u>"
 28 |         return x
 29 |     return templatizer
 30 | 
 31 | 
 32 | def _check_unique_token(t, doc):
 33 |     if len([c for c in doc if c.text == t]) > 1:
 34 |         msg = f'There is more than one token in the document that matches the text "{t}".' \
 35 |             + " Using the first match." \
 36 |             + " Please use a `spacy.token.Token` instance for searching."
 37 |         warnings.warn(msg)
 38 | 
 39 | 
 40 | class Variable(object):
 41 |     """
 42 |     NLG Variable
 43 | 
 44 |     A variable is a piece of text which can change with the data or the operations performed on it.
 45 |     Each variable has two defining components:
 46 | 
 47 |        * a source text, as initially provided by the user, and
 48 |        * one or more *formulae*, which compute the value of the variable for a
 49 |          specific instance of the data.
 50 | 
 51 |     The source text of a variable may be found in multiple places within a dataset, and as such,
 52 |     a variable may have multiple formulae - one of which will have to be preferred by the user.
 53 |     A variable may additionally have other attributes, like:
 54 | 
 55 |        * a set of linguistic inflections which determine the form of the rendered variable text -
 56 |          these are distinct from the formula itself, in that the formula creates the base form
 57 |          of the text and inflections modify the base form.
 58 |        * a *name* used to identify the variable within the template of the nugget
 59 |     """
 60 | 
 61 |     def __init__(self, token, sources=None, varname='', inflections=None):
 62 |         self._token = token
 63 |         if sources is None:
 64 |             sources = []
 65 |         self.sources = sources
 66 |         self.varname = varname
 67 |         if inflections is None:
 68 |             inflections = []
 69 |         self.inflections = inflections
 70 |         self.templatizer = t_templatize
 71 | 
 72 |     def to_dict(self):
 73 |         """Serialize the variable to dict."""
 74 |         payload = {'text': self._token.text}
 75 |         token = self._token
 76 |         if isinstance(token, Token):
 77 |             payload['index'] = token.i
 78 |             payload['idx'] = token.idx
 79 |         elif isinstance(token, Span):
 80 |             payload['index'] = token.start, token.end
 81 |             payload['idx'] = token[0].idx
 82 |         elif isinstance(token, Doc):
 83 |             payload['index'] = 0
 84 |             payload['idx'] = 0
 85 |         payload['sources'] = self.sources
 86 |         payload['varname'] = self.varname
 87 |         payload['inflections'] = self.inflections
 88 |         return payload
 89 | 
 90 |     def set_expr(self, expr):
 91 |         """Change the formula or expression for the variable.
 92 | 
 93 |         Parameters
 94 |         ----------
 95 |         expr : str
 96 |             Python expression used to determine the value of the variable.
 97 |         """
 98 |         tmpl = self.enabled_source
 99 |         tmpl['tmpl'] = expr
100 | 
101 |     @property
102 |     def enabled_source(self):
103 |         for tmpl in self.sources:
104 |             if tmpl.get('enabled', False):
105 |                 return tmpl
106 | 
107 |     def enable_source(self, tmpl):
108 |         if isinstance(tmpl, int):
109 |             for source in self.sources:
110 |                 source['enabled'] = False
111 |             self.sources[tmpl]['enabled'] = True
112 |         elif tmpl in [c['tmpl'] for c in self.sources]:
113 |             for source in self.sources:
114 |                 if source['tmpl'] == tmpl:
115 |                     source['enabled'] = True
116 |                 else:
117 |                     source['enabled'] = False
118 |         else:
119 |             raise ValueError('Variable source not found.')
120 | 
121 |     @property
122 |     def template(self):
123 |         tmpl = self.enabled_source
124 |         tmplstr = tmpl['tmpl']
125 | 
126 |         for i in self.inflections:
127 |             tmplstr = self._add_inflection(tmplstr, i)
128 | 
129 |         varname = tmpl.get('varname', '')
130 |         if varname:
131 |             return tmplstr
132 | 
133 |         return self.templatizer(tmplstr)
134 | 
135 |     def _add_inflection(self, tmplstr, infl):
136 |         func = infl['func_name']
137 |         source = infl['source']
138 |         if source == 'str':
139 |             tmplstr += f'.{func}()'
140 |         else:
141 |             tmplstr = f'{source}.{func}({tmplstr})'
142 |         return tmplstr
143 | 
144 |     def __repr__(self):
145 |         return self.template
146 | 
147 | 
148 | class Nugget(object):
149 |     """
150 |     Gramex-NLG Nugget
151 | 
152 |     A nugget is ideally a single sentence which conveys an insight about the data.
153 |     It is created by searching the source dataframe and operations performed on it
154 |     for entities found in the input text.
155 | 
156 |     Note: This class is not meant to be instantiated directly. Please use `nlg.templatize`.
157 |     """
158 | 
159 |     def __init__(self, text, tokenmap=None, inflections=None, fh_args=None,
160 |                  condition=None, template="", name=""):
161 |         self.doc = text
162 |         self.tokenmap = {}
163 |         if inflections is None:
164 |             inflections = {}
165 |         if tokenmap is not None:
166 |             for tk, tkobj in tokenmap.items():
167 |                 if isinstance(tkobj, Variable):
168 |                     token = tkobj
169 |                 elif isinstance(tkobj, list):
170 |                     token = Variable(tk, tkobj, inflections=inflections.get(tk))
171 |                 self.tokenmap[tk] = token
172 |         if fh_args is not None:
173 |             self.fh_args = fh_args
174 |         else:
175 |             self.fh_args = {}
176 |         self._template = template
177 |         self.condition = condition
178 |         self.name = name
179 |         self.templatizer = t_templatize
180 | 
181 |     def to_dict(self):
182 |         """Serialze the nugget to dict."""
183 |         payload = {}
184 |         payload['text'] = self.doc.text
185 |         tokenmap = []
186 |         for _, variable in self.tokenmap.items():
187 |             tokenmap.append(variable.to_dict())
188 |         payload['tokenmap'] = tokenmap
189 |         payload['fh_args'] = self.fh_args
190 |         payload['condition'] = self.condition
191 |         payload['name'] = self.name
192 |         payload['template'] = self.template
193 |         return payload
194 | 
195 |     @classmethod
196 |     def from_json(cls, obj):
197 |         if isinstance(obj, str):
198 |             obj = json.loads(obj)
199 | 
200 |         text = obj.pop('text')
201 |         obj['text'] = nlp(text)
202 | 
203 |         tokenlist = obj.pop('tokenmap')
204 |         tokenmap = {}
205 |         for tk in tokenlist:
206 |             index = tk.pop('index')
207 |             if isinstance(index, int):
208 |                 token = obj['text'][index]
209 |             elif isinstance(index, (list, tuple)):
210 |                 start, end = index
211 |                 token = obj['text'][start:end]
212 |             tk.pop('idx')
213 |             tk.pop('text')
214 |             tokenmap[token] = Variable(token, **tk)
215 |         obj['tokenmap'] = tokenmap
216 | 
217 |         return cls(**obj)
218 | 
219 |     @property
220 |     def variables(self):
221 |         return self.tokenmap
222 | 
223 |     def get_var(self, t):
224 |         """Get a variable from the nugget.
225 | 
226 |         Parameters
227 |         ----------
228 |         t : any
229 |             The string, or token corresponding to the variable.
230 |             Using strings is discouraged, since the nugget may have
231 |             more than one variable which renders to the same string form.
232 |             Using spacy tokens is unambiguous.
233 | 
234 |         Returns
235 |         -------
236 |         nlg.narrative.Variable
237 | 
238 |         Example
239 |         -------
240 |         >>> from nlg import templatize
241 |         >>> df = pd.read_csv('actors.csv')
242 |         >>> text = nlp("Charlie Chaplin has 76 votes.")
243 |         >>> nugget = templatize(text, {}, df)
244 |         >>> nugget.get_var('Charlie Chaplin')
245 |         {{ df["name"].iloc[-1] }}
246 |         """
247 |         if len(self.tokenmap) == 1:
248 |             token, var = tuple(self.tokenmap.items())[0]
249 |             if isinstance(token, Doc):
250 |                 variable = var
251 |         elif isinstance(t, Token):
252 |             variable = self.tokenmap.get(t, False)
253 |         elif isinstance(t, str):
254 |             _check_unique_token(t, self.doc)
255 |             variable = False
256 |             for token in self.doc:
257 |                 if token.text == t:
258 |                     variable = self.tokenmap.get(token, False)
259 |         else:
260 |             if isinstance(t, int):
261 |                 token = self.doc[t]
262 |             elif isinstance(t, (list, tuple)):
263 |                 start, end = t
264 |                 token = self.doc[start:end]
265 |             variable = self.tokenmap.get(token, False)
266 |         if variable:
267 |             return variable
268 |         raise KeyError('Variable not found.')
269 | 
270 |     @property
271 |     def template(self):
272 |         sent = self.doc.text
273 |         for tk, tkobj in self.tokenmap.items():
274 |             tmpl = tkobj.template
275 |             sent = sent.replace(tk.text, tmpl)
276 |             if tkobj.varname:
277 |                 pattern = re.escape(tmpl)
278 |                 sent = re.sub(pattern, self.templatizer(tkobj.varname), sent)
279 |                 sent = f'{{% set {tkobj.varname} = {tmpl} %}}\n' + sent
280 |         if self.condition:
281 |             sent = f'{{% if {self.condition} %}}\n' + sent + '\n{% end %}'
282 |         return self.add_fh_args(sent)
283 | 
284 |     def _set_templatizer(self, func):
285 |         self.templatizer = func
286 |         for _, variable in self.tokenmap.items():
287 |             variable.templatizer = self.templatizer
288 | 
289 |     def _reset_templatizer(self):
290 |         self._set_templatizer(t_templatize)
291 | 
292 |     def to_html(self, bold=True, italic=False, underline=False, **kwargs):
293 |         self._set_templatizer(_templatizer_factory(bold, italic, underline))
294 |         try:
295 |             s = self.render(**kwargs)
296 |             return s
297 |         finally:
298 |             self._reset_templatizer()
299 | 
300 |     def __repr__(self):
301 |         return self.template
302 | 
303 |     def render(self, df, fh_args=None, **kwargs):
304 |         """Render the template for the given set of arguments.
305 | 
306 |         Parameters
307 |         ----------
308 |         df : pandas.DataFrame
309 |             The dataframe to use in the new rendering.
310 | 
311 |         fh_args : dict
312 |             FormHandler arguments to use to transform the dataframe.
313 | 
314 |         **kwargs : dict
315 |             Arguments passed to the `tornado.template.Template.generate` method.
316 | 
317 |         Returns
318 |         -------
319 |         str
320 |             Rendered string.
321 | 
322 |         Example
323 |         -------
324 |         >>> from nlg import templatize
325 |         >>> df = pd.read_csv('actors.csv')
326 |         >>> text = nlp("Humphrey Bogart is at the top of the list.")
327 |         >>> nugget = templatize(text, {}, df)
328 |         >>> nugget.render(df.iloc[1:])
329 |         b'Cary Grant is at the top of the list'
330 |         """
331 |         if fh_args is not None:
332 |             self.fh_args = fh_args
333 |         else:
334 |             fh_args = {}
335 |         kwargs['fh_args'] = fh_args
336 |         return Template(
337 |             self.template, whitespace='oneline').generate(
338 |                 df=df, orgdf=df, U=utils, G=grammar, **kwargs)
339 | 
340 |     def add_fh_args(self, sent):
341 |         if self.fh_args:
342 |             fh_args = json.dumps(self.fh_args)
343 |             tmpl = f'{{% set fh_args = {fh_args}  %}}\n'
344 |             tmpl += f'{{% set df = U.gfilter(orgdf, fh_args.copy()) %}}\n'
345 |             tmpl += f'{{% set fh_args = U.sanitize_fh_args(fh_args, orgdf) %}}\n'
346 |             tmpl += '{# Do not edit above this line. #}\n'
347 |             return tmpl + sent
348 |         return sent
349 | 
350 |     def add_var(self, token, varname='', expr=''):
351 |         """Set a token within the source document as a variable.
352 | 
353 |         Parameters
354 |         ----------
355 |         token : int or spacy.tokens.Token or spacy.tokens.Span
356 |             If `token` is an integer, it is interpreted as the position of the token
357 |             in the source document.
358 | 
359 |         varname : str, optional
360 |             Optional variable name used to refer to the variable within the Tornado template.
361 | 
362 |         expr : str, optional
363 |             Python expression used to determine the value of the variable.
364 |             Note that if `expr` is not provided, it has to be passed at the time of rendering the
365 |             template. (See the `nlg.narrative.Nugget.render` method)
366 | 
367 |         Example
368 |         -------
369 |         >>> from nlg import templatize
370 |         >>> df = pd.read_csv('actors.csv')
371 |         >>> fh_args = {'_sort': ['-rating']}
372 |         >>> text = nlp("James Stewart is the actor with the highest rating.")
373 |         >>> nugget = templatize(text, fh_args, df)
374 |         >>> nugget.add_var(-2, 'sort_col', 'fh_args["_sort"][0]')
375 |         """
376 |         if not (varname or expr):
377 |             raise ValueError('One of `varname` or `expr` must be provided.')
378 |         if isinstance(token, int):
379 |             token = self.doc[token]
380 |         elif isinstance(token, (list, tuple)):
381 |             token = self.doc.char_span(*token)
382 |         try:
383 |             if any([token in c for c in self.tokenmap if isinstance(c, (Span, Doc))]):
384 |                 raise ValueError('Token is already contained in another variable.')
385 |         except TypeError:
386 |             pass
387 |         source = [{'tmpl': expr, 'type': 'user', 'enabled': True}]
388 |         self.tokenmap[token] = Variable(token, sources=source, varname=varname)
389 | 
390 | 
391 | class Narrative(list):
392 |     """A list to hold only Nuggets."""
393 | 
394 |     default_style = dict(style='para', liststyle='html', bold=True, italic=False, underline=False)
395 | 
396 |     def render(self, sep=' ', **kwargs):
397 |         return sep.join([c.render(**kwargs).decode('utf8') for c in self])
398 | 
399 |     def to_html(self, style='para', liststyle='html', bold=True, italic=False, underline=False,
400 |                 **kwargs):
401 |         self.html_style = {
402 |             'bold': bold, 'italic': italic, 'underline': underline,
403 |             'style': style, 'liststyle': liststyle
404 |         }
405 |         rendered = [c.to_html(bold, italic, underline, **kwargs).decode('utf8') for c in self]
406 |         if style == 'para':
407 |             s = ' '.join(rendered)
408 |         elif style == 'list':
409 |             if liststyle == "html":
410 |                 l_render = "".join(["<li>{}</li>".format(r) for r in rendered])
411 |                 s = f"<ul>{l_render}</ul>"
412 |             elif liststyle == 'markdown':
413 |                 s = "\n".join(["* " + r for r in rendered])
414 |             else:
415 |                 raise ValueError('Unknown liststyle.')
416 |         return s
417 | 
418 |     def move(self, x, y):
419 |         raise NotImplementedError
420 | 
421 |     def to_dict(self):
422 |         return {'narrative': [c.to_dict() for c in self],
423 |                 'style': getattr(self, 'html_style', self.default_style)}
424 | 
425 |     @classmethod
426 |     def from_json(cls, obj):
427 |         narrative = cls()
428 |         for nugget in obj['narrative']:
429 |             narrative.append(Nugget.from_json(nugget))
430 |         narrative.html_style = obj['style']
431 |         return narrative
432 | 


--------------------------------------------------------------------------------
/nlg/search.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim:fenc=utf-8
  3 | 
  4 | """
  5 | Search tools.
  6 | """
  7 | 
  8 | from itertools import chain
  9 | import warnings
 10 | 
 11 | import numpy as np
 12 | import pandas as pd
 13 | from tornado.template import Template
 14 | 
 15 | from nlg import grammar
 16 | from nlg import narrative
 17 | from nlg import utils
 18 | 
 19 | SEARCH_PRIORITIES = [
 20 |     # {'type': 'doc'},
 21 |     {'type': 'ne'},  # A match which is a named entity gets the highest priority
 22 |     {'location': 'fh_args'},  # than one that is a formhandler arg
 23 |     {'location': 'colname'},  # than one that is a column name
 24 |     {'type': 'quant'},  # etc
 25 |     {'location': 'cell'}
 26 | ]
 27 | 
 28 | 
 29 | def _sort_search_results(items, priorities=SEARCH_PRIORITIES):
 30 |     """
 31 |     Sort a list of search results by `priorities`.
 32 | 
 33 |     Parameters
 34 |     ----------
 35 |     items : dict
 36 |         Dictionary containing search results, where keys are tokens and values
 37 |         are lists of locations where the token was found. Preferably this should
 38 |         be a `DFSearchResults` object.
 39 |     priorities : list, optional
 40 |         List of rules that allow sorting of search results. A `rule` is any
 41 |         subset of a search result dictionary. Lower indices indicate higher priorities.
 42 | 
 43 |     Returns
 44 |     -------
 45 |     dict
 46 |         Prioritized search results - for each {token: search_matches} pair, sort
 47 |         search_matches such that a higher priority search result is enabled.
 48 |     """
 49 |     if len(items) > 1:
 50 |         match_ix = [[p.items() <= item.items() for p in priorities] for item in items]
 51 |         min_match = [m.index(True) for m in match_ix]
 52 |         items[min_match.index(min(min_match))]['enabled'] = True
 53 |     else:
 54 |         items[0]['enabled'] = True
 55 |     return items
 56 | 
 57 | 
 58 | def _preprocess_array_search(text, array, literal=False, case=False, lemmatize=True,
 59 |                              nround=False):
 60 |     nlp = utils.load_spacy_model()
 61 |     if case or nround:
 62 |         raise NotImplementedError
 63 | 
 64 |     if literal and lemmatize:
 65 |         warnings.warn('Ignoring lemmatization.')
 66 | 
 67 |     if not (literal or lemmatize):
 68 |         warnings.warn(
 69 |             'One of `literal` or `lemmatize` must be True. Falling back to lemmatize=True')
 70 |         literal, lemmatize = False, True
 71 | 
 72 |     if literal:  # ignore every other flag else
 73 |         tokens = pd.Series([c.text for c in text], index=text)
 74 | 
 75 |     elif lemmatize:
 76 |         tokens = pd.Series([c.lemma_ for c in text], index=text)
 77 |         if array.ndim == 1:
 78 |             array = array.map(nlp)
 79 |             array = pd.Series([token.lemma_ for doc in array for token in doc])
 80 |         elif array.ndim == 2:
 81 |             for col in array.columns[array.dtypes == np.dtype('O')]:
 82 |                 s = [c if isinstance(c, str) else str(c) for c in array[col]]
 83 |                 s = [nlp(c) for c in s]
 84 |                 try:
 85 |                     array[col] = [token.lemma_ for doc in s for token in doc]
 86 |                 except ValueError:
 87 |                     warnings.warn('Cannot lemmatize multi-word cells.')
 88 |                     if not case:  # still need to respect the `case` param
 89 |                         array[col] = array[col].str.lower()
 90 | 
 91 |     return tokens, array
 92 | 
 93 | 
 94 | def _remerge_span_tuples(results):
 95 |     # re-merge span objects that end up as tuples; see issue #25
 96 |     unmerged_spans = [k for k in results if isinstance(k, tuple)]
 97 |     for span in unmerged_spans:
 98 |         start, end = span[0].idx, span[-1].idx + len(span[-1])
 99 |         new_span = span[0].doc.char_span(start, end)
100 |         results[new_span] = results.pop(span)
101 |     return results
102 | 
103 | 
104 | def _text_search_array(text, array, case=False):
105 |     array = array.astype(str)
106 |     if not case:
107 |         stext = text.lower()
108 |         if array.ndim == 1:
109 |             array = array.map(lambda x: x.lower())
110 |         elif array.ndim == 2:
111 |             for col in array:
112 |                 array[col] = array[col].str.lower()
113 |     else:
114 |         stext = text
115 |     mask = array == stext
116 |     if not mask.any(axis=None):
117 |         return []
118 |     indices = mask.values.nonzero()
119 |     if array.ndim == 1:
120 |         return indices[0]
121 |     if array.ndim == 2:
122 |         return indices
123 | 
124 | 
125 | def _search_1d_array(text, array, literal=False, case=False, lemmatize=True,
126 |                      nround=False):
127 |     tokens, array = _preprocess_array_search(text, array, literal, case, lemmatize, nround)
128 |     mask = array.isin(tokens)
129 |     if not mask.any():
130 |         return {}
131 |     if isinstance(mask, pd.Series):
132 |         nz = mask.to_numpy().nonzero()[0]
133 |     else:
134 |         nz = mask.nonzero()[0]
135 |     indices = {array[i]: i for i in nz}
136 |     tk = tokens[tokens.isin(array)]
137 |     return _remerge_span_tuples({token: indices[s] for token, s in tk.items()})
138 | 
139 | 
140 | def _search_2d_array(text, array, literal=False, case=False, lemmatize=True, nround=False):
141 |     array = array.astype(str)
142 |     tokens, array = _preprocess_array_search(text, array, literal, case, lemmatize, nround)
143 |     mask = array.isin(tokens.values)
144 |     if not mask.any().any():
145 |         return {}
146 |     indices = {array.iloc[i, j]: (i, j) for i, j in zip(*mask.values.nonzero())}
147 |     tk = tokens[tokens.isin(array.values.ravel())]
148 |     return _remerge_span_tuples({token: indices[s] for token, s in tk.items()})
149 | 
150 | 
151 | def _df_maxlen(df):
152 |     # Find the length of the longest string present in the columns, indices or values of a df
153 |     col_max = max([len(c) for c in df.columns.astype(str)])
154 |     ix_max = max([len(c) for c in df.index.astype(str)])
155 |     array_max = max([df[c].astype(str).apply(len).max() for c in df])
156 |     return max(col_max, ix_max, array_max)
157 | 
158 | 
159 | # TODO: Can this be done with defaultdict?
160 | class DFSearchResults(dict):
161 |     """A convenience wrapper around `dict` to collect search results.
162 | 
163 |     Different from `dict` in that values are always lists, and setting to
164 |     existing key appends to the list.
165 |     """
166 | 
167 |     def __setitem__(self, key, value):
168 |         if key not in self:
169 |             super(DFSearchResults, self).__setitem__(key, [value])
170 |         elif self[key][0] != value:
171 |             self[key].append(value)
172 | 
173 |     def update(self, other):
174 |         # Needed because the default update method doesn't seem to use setitem
175 |         for k, v in other.items():
176 |             self[k] = v
177 | 
178 |     def clean(self):
179 |         """Sort the search results for each token by priority and un-overlap tokens."""
180 |         for k, v in self.items():
181 |             _sort_search_results(v)
182 |         # unoverlap the keys
183 |         to_remove = []
184 |         for k in self:
185 |             to_search = self.keys() - {k}
186 |             if utils.is_overlap(k, to_search):
187 |                 to_remove.append(k)
188 |         for i in to_remove:
189 |             del self[i]
190 | 
191 | 
192 | class DFSearch(object):
193 |     """Make a dataframe searchable."""
194 | 
195 |     def __init__(self, df, nlp=None, **kwargs):
196 |         """Default constrictor.
197 | 
198 |         Parameters
199 |         ----------
200 |         df : pd.DataFrame
201 |             The dataframe to search.
202 |         nlp : A `spacy.lang` model, optional
203 |         """
204 |         self.df = df
205 |         # What do results contain?
206 |         # A map of tokens to list of search results.
207 |         self.results = DFSearchResults()
208 |         if not nlp:
209 |             nlp = utils.load_spacy_model()
210 |         self.matcher = kwargs.get('matcher', utils.make_np_matcher(nlp))
211 |         self.ents = []
212 | 
213 |     def search(self, text, colname_fmt='df.columns[{}]',
214 |                cell_fmt='df["{}"].iloc[{}]', **kwargs):
215 |         """
216 |         Search the dataframe.
217 | 
218 |         Parameters
219 |         ----------
220 |         text : spacy.Doc
221 |             The text to search.
222 |         colname_fmt : str, optional
223 |             String format to describe dataframe columns in the search results,
224 |             can be one of 'df.columns[{}]' or 'df[{}]'.
225 |         cell_fmt : str, optional
226 |             String format to describe dataframe values in the search results.
227 |             Can be one of 'df.iloc[{}, {}]', 'df.loc[{}, {}]', 'df[{}][{}]', etc.
228 | 
229 |         Returns
230 |         -------
231 |         dict
232 |             A dictionary who's keys are tokens from `text` found in
233 |             the source dataframe, and values are a list of locations in the df
234 |             where they are found.
235 |         """
236 |         self.search_nes(text)
237 |         if len(text.text) <= _df_maxlen(self.df):
238 |             for i in _text_search_array(text.text, self.df.columns):
239 |                 self.results[text] = {'location': 'colname', 'tmpl': colname_fmt.format(i),
240 |                                       'type': 'doc'}
241 |             for x, y in zip(*_text_search_array(text.text, self.df)):
242 |                 x = utils.sanitize_indices(self.df.shape, x, 0)
243 |                 y = utils.sanitize_indices(self.df.shape, y, 1)
244 |                 self.results[text] = {
245 |                     'location': 'cell', 'tmpl': cell_fmt.format(self.df.columns[y], x),
246 |                     'type': 'doc'}
247 | 
248 |         else:
249 |             for token, ix in self.search_columns(text, **kwargs).items():
250 |                 ix = utils.sanitize_indices(self.df.shape, ix, 1)
251 |                 self.results[token] = {'location': 'colname', 'tmpl': colname_fmt.format(ix),
252 |                                        'type': 'token'}
253 | 
254 |             for token, (x, y) in self.search_table(text, **kwargs).items():
255 |                 x = utils.sanitize_indices(self.df.shape, x, 0)
256 |                 y = utils.sanitize_indices(self.df.shape, y, 1)
257 |                 self.results[token] = {
258 |                     'location': 'cell', 'tmpl': cell_fmt.format(self.df.columns[y], x),
259 |                     'type': 'token'}
260 |             self.search_quant([c for c in text if c.pos_ == 'NUM'])
261 |         # self.search_derived_quant([c.text for c in selfdoc if c.pos_ == 'NUM'])
262 | 
263 |         return self.results
264 | 
265 |     def search_nes(self, doc, colname_fmt='df.columns[{}]', cell_fmt='df["{}"].iloc[{}]'):
266 |         """Find named entities in text, and search for them in the dataframe.
267 | 
268 |         Parameters
269 |         ----------
270 |         text : str
271 |             The text to search.
272 |         """
273 |         self.ents = utils.ner(doc, self.matcher)
274 |         for token, ix in self.search_columns(self.ents, literal=True).items():
275 |             ix = utils.sanitize_indices(self.df.shape, ix, 1)
276 |             self.results[token] = {
277 |                 'location': 'colname',
278 |                 'tmpl': colname_fmt.format(ix), 'type': 'ne'
279 |             }
280 |         for token, (x, y) in self.search_table(self.ents, literal=True).items():
281 |             x = utils.sanitize_indices(self.df.shape, x, 0)
282 |             y = utils.sanitize_indices(self.df.shape, y, 1)
283 |             self.results[token] = {
284 |                 'location': 'cell',
285 |                 'tmpl': cell_fmt.format(self.df.columns[y], x), 'type': 'ne'}
286 | 
287 |     def search_table(self, text, **kwargs):
288 |         """Search the `.values` attribute of the dataframe for tokens in `text`."""
289 |         kwargs['array'] = self.df.copy()
290 |         return self._search_array(text, **kwargs)
291 | 
292 |     def search_columns(self, text, **kwargs):
293 |         """Search df columns for tokens in `text`."""
294 |         kwargs['array'] = self.df.columns
295 |         return self._search_array(text, **kwargs)
296 | 
297 |     def search_quant(self, quants, nround=2, cell_fmt='df["{}"].iloc[{}]'):
298 |         """Search the dataframe for a set of quantitative values.
299 | 
300 |         Parameters
301 |         ----------
302 |         quants : list / array like
303 |             The values to search.
304 |         nround : int, optional
305 |             Numeric values in the dataframe are rounded to these many
306 |             significant digits before searching.
307 |         """
308 |         dfclean = utils.sanitize_df(self.df, nround)
309 |         qarray = np.array([c.text for c in quants])
310 |         quants = np.array(quants)
311 |         n_quant = qarray.astype('float').round(nround)
312 |         for x, y in zip(*dfclean.isin(n_quant).values.nonzero()):
313 |             x = utils.sanitize_indices(dfclean.shape, x, 0)
314 |             y = utils.sanitize_indices(dfclean.shape, y, 1)
315 |             tk = quants[n_quant == dfclean.iloc[x, y]][0]
316 |             self.results[tk] = {
317 |                 'location': 'cell', 'tmpl': cell_fmt.format(self.df.columns[y], x),
318 |                 'type': 'quant'}
319 | 
320 |     def search_derived_quant(self, quants, nround=2):
321 |         """Search the common derived dataframe parameters for a set of quantitative values.
322 | 
323 |         Parameters
324 |         ----------
325 |         quants : list / array like
326 |             The values to search.
327 |         nround : int, optional
328 |             Numeric values in the dataframe are rounded to these many
329 |             significant digits before searching.
330 |         """
331 |         dfclean = utils.sanitize_df(self.df, nround)
332 |         quants = np.array(quants)
333 |         #  n_quant = quants.astype('float').round(2)
334 | 
335 |         for num in quants:
336 |             if int(num) == len(dfclean):
337 |                 self.results[num] = {
338 |                     'location': 'cell', 'tmpl': "len(df)",
339 |                     'type': 'quant'}
340 | 
341 |     def _search_array(self, text, array, literal=False,
342 |                       case=False, lemmatize=True, nround=False):
343 |         """Search for tokens in text within an array.
344 | 
345 |         Parameters
346 |         ----------
347 |         text : str or spacy document
348 |             Text to search
349 |         array : array-like
350 |             Array to search in.
351 |         literal : bool, optional
352 |             Whether to match tokens to values literally.
353 |         case : bool, optional
354 |             If true, run a case sensitive search.
355 |         lemmatize : bool, optional
356 |             If true (default), search on lemmas of tokens and values.
357 |         nround : int, optional
358 |             Significant digits used to round `array` before searching.
359 | 
360 |         Returns
361 |         -------
362 |         dict
363 |             Mapping of tokens to a sequence of indices within `array`.
364 | 
365 |         Example
366 |         -------
367 |         >>> _search_array('3', np.arange(5))
368 |         {'3': [3]}
369 |         >>> df = pd.DataFrame(np.eye(3), columns='one punch man'.split())
370 |         >>> _search_array('1', df.values)
371 |         {'1': [(0, 0), (1, 1), (2, 2)]}
372 |         >>> _search_array('punched man', df.columns)
373 |         {'punched': [1], 'man': [2]}
374 |         >>> _search_array('1 2 buckle my shoe', df.index)
375 |         {'1': [1], '2': [2]}
376 |         """
377 |         if array.ndim == 1:
378 |             func = _search_1d_array
379 |         else:
380 |             func = _search_2d_array
381 |         return func(text, array, literal, case, lemmatize, nround)
382 |         # if len(res) == 0:  # Fall back on searching the whole string, not just the entities
383 |         #     res = func([text], array, literal, case, lemmatize, nround)
384 |         # return res
385 | 
386 | 
387 | def _search_fh_args(entities, args, key, lemmatized):
388 |     colnames = args.get(key, False)
389 |     if not colnames:
390 |         return {}
391 |     nlp = utils.load_spacy_model()
392 |     argtokens = list(chain(*[nlp(c) for c in colnames]))
393 |     res = {}
394 |     for i, token in enumerate(argtokens):
395 |         for ent in entities:
396 |             if lemmatized and (token.lemma_ == ent.lemma_):
397 |                 match = True
398 |             elif token.text == ent.text:
399 |                 match = True
400 |             else:
401 |                 match = False
402 |             if match:
403 |                 res[ent] = {
404 |                     'type': 'token', 'tmpl': f"fh_args['{key}'][{i}]",
405 |                     'location': 'fh_args'
406 |                 }
407 |     return res
408 | 
409 | 
410 | def _search_groupby(entities, args, lemmatized=True):
411 |     return _search_fh_args(entities, args, key='_by', lemmatized=lemmatized)
412 | 
413 | 
414 | def _search_sort(entities, args, lemmatized=True):
415 |     return _search_fh_args(entities, args, key='_sort', lemmatized=lemmatized)
416 | 
417 | 
418 | def _search_select(entities, args, lemmatized=True):
419 |     return _search_fh_args(entities, args, key='_c', lemmatized=lemmatized)
420 | 
421 | 
422 | def search_args(entities, args, lemmatized=True, fmt='fh_args["{}"][{}]',
423 |                 argkeys=('_sort', '_by', '_c')):
424 |     """
425 |     Search formhandler arguments provided as URL query parameters.
426 | 
427 |     Parameters
428 |     ----------
429 |     entities : list
430 |         list of named entities found in the source text
431 |     args : dict
432 |         FormHandler args as parsed by g1.url.parse(...).searchList
433 |     lemmatized : bool, optional
434 |         whether to search on lemmas of text values
435 |     fmt : str, optional
436 |         String format used to describe FormHandler arguments in the template
437 |     argkeys : list, optional
438 |         Formhandler argument keys to be considered for the search. Any key not
439 |         present in this will be ignored.
440 |         # TODO: Column names can be keys too!!
441 | 
442 |     Returns
443 |     -------
444 |     dict
445 |         Mapping of entities / tokens to objects describing where they are found
446 |         in Formhandler arguemnts. Each search result object has the following
447 |         structure:
448 |         {
449 |             'type': 'some token',
450 |             'location': 'fh_args',
451 |             'tmpl': 'fh_args['_by'][0]'  # The template that gets this token from fh_args
452 |         }
453 |     """
454 |     args = {k: v for k, v in args.items() if k in argkeys}
455 |     search_res = {}
456 |     entities = list(chain(*entities))
457 |     search_res.update(_search_groupby(entities, args, lemmatized=lemmatized))
458 |     search_res.update(_search_sort(entities, args, lemmatized=lemmatized))
459 |     search_res.update(_search_select(entities, args, lemmatized=lemmatized))
460 |     return search_res
461 | 
462 | 
463 | def _search(text, args, df, copy=False):
464 |     """Construct a tornado template which regenerates some
465 |     text from a dataframe and formhandler arguments.
466 | 
467 |     The pipeline consists of:
468 |     1. cleaning the text and the dataframe
469 |     2. searching the dataframe and FH args for tokens in the text
470 |     3. detecting inflections on the tokens.
471 | 
472 |     Parameters
473 |     ----------
474 |     text : spacy.Doc
475 |         Input text
476 |     args : dict
477 |         Formhandler arguments
478 |     df : pd.DataFrame
479 |         Source dataframe.
480 | 
481 |     Returns
482 |     --------
483 |     tuple
484 |         of search results, cleaned text and token inflections. The webapp uses
485 |         these to construct a tornado template.
486 |     """
487 |     # utils.load_spacy_model()
488 |     if copy:
489 |         df = df.copy()
490 |     df = utils.gfilter(df, args.copy())
491 |     # Do this only if needed:
492 |     # clean_text = utils.sanitize_text(text.text)
493 |     args = utils.sanitize_fh_args(args, df)
494 |     # Is this correct?
495 |     dfs = DFSearch(df)
496 |     dfix = dfs.search(text)
497 |     dfix.update(search_args(dfs.ents, args))
498 |     dfix.clean()
499 |     inflections = grammar.find_inflections(dfix, args, df)
500 |     _infl = {}
501 |     for token, funcs in inflections.items():
502 |         _infl[token] = []
503 |         for func in funcs:
504 |             _infl[token].append({
505 |                 'source': func.source,
506 |                 'fe_name': func.fe_name,
507 |                 'func_name': func.__name__
508 |             })
509 |     # FIXME: Why return text if it's unchanged?
510 |     return dfix, text, _infl
511 | 
512 | 
513 | def _make_inflection_string(tmpl, infl):
514 |     source = infl['source']
515 |     func_name = infl['func_name']
516 |     if source == 'str':
517 |         tmpl += f'.{func_name}()'
518 |     else:
519 |         tmpl = f'{source}.{func_name}({tmpl})'
520 |     return tmpl
521 | 
522 | 
523 | def templatize_token(token, results, inflection):
524 |     for r in results:
525 |         if r.get('enabled', False):
526 |             break
527 |     tmpl = r['tmpl']
528 |     if inflection:
529 |         for i in inflection:
530 |             tmpl = _make_inflection_string(tmpl, i)
531 |     return narrative.t_templatize(tmpl)
532 | 
533 | 
534 | def templatize(text, args, df):
535 |     """Construct an NLG Nugget which templatizes the given text in
536 |     the context of a dataframe, and FormHandler operations on it.
537 | 
538 |     Parameters
539 |     ----------
540 |     text : spacy.tokens.Doc
541 |         Input document
542 |     args : dict
543 |         Formhandler arguments
544 |     df : pd.DataFrame
545 |         Source dataframe.
546 | 
547 |     Returns
548 |     -------
549 |     nlg.narrative.Nugget
550 |         An NLG Nugget object containing the template for the input text.
551 | 
552 |     Example
553 |     -------
554 |     >>> from gramex import data
555 |     >>> from nlg.utils import load_spacy_model
556 |     >>> df = pd.read_csv('iris.csv')
557 |     >>> fh_args = {'_by': ['species']}
558 |     >>> df = data.filter(df, fh_args.copy())
559 |     >>> nlp = load_spacy_model()
560 |     >>> text = 'The iris dataset has 3 species - setosa, versicolor and virginica.'
561 |     >>> nugget = templatize(text, fh_args, df)
562 |     >>> print(template)
563 |     {% set fh_args = {"_by": ["species"]}  %}
564 |     {% set df = U.gfilter(orgdf, fh_args.copy()) %}
565 |     The iris dataset has 3 {{ df.columns[0] }} - {{ df["species"].iloc[0] }}, \
566 | {{ df["species"].iloc[1] }} and {{ df["species"].iloc[-1] }}.
567 |     """
568 |     dfix, clean_text, infl = _search(text, args, df)
569 |     return narrative.Nugget(clean_text, dfix, infl, args)
570 | 
571 | 
572 | def add_manual_template(input_template, manual_template=None):
573 |     """Append user defined template for any word in the original text.
574 | 
575 |     Parameters
576 |     ----------
577 |     input_template : str
578 |         Input text
579 |     manual_template : dict
580 |         Doct to add with key=word in the text, valu=dataframe expression
581 | 
582 | 
583 |     Returns
584 |     -------
585 |     str
586 |         Tornado template corresponding to the text and data.
587 | 
588 |     Example
589 |     -------
590 |     input_template = "The iris dataset has 3 {{ df.columns[0] }} - {{ df["species"].iloc[0] }}, \
591 | {{ df["species"].iloc[1] }} and {{ df["species"].iloc[-1] }}."
592 |     manual_template = {"3" :  "{{ "+ len(df["species"].unique()) + " }}" }
593 | 
594 |     output_template = "The iris dataset has  "{{ "+ len(df["species"].unique()) + \
595 |         " }}"  {{ df.columns[0] }} - {{ df["species"].iloc[0] }}, \
596 |         {{ df["species"].iloc[1] }} and {{ df["species"].iloc[-1] }}."
597 | 
598 |     """
599 |     if manual_template is None:
600 |         return input_template
601 | 
602 |     for key in manual_template:
603 |         replace_with = "{{ " + manual_template[key][0]['tmpl'] + " }}"
604 |         input_template = input_template.replace(key, replace_with)
605 |     return input_template
606 | 
607 | 
608 | def render(df, template):
609 |     return Template(template).generate(orgdf=df, U=utils, G=grammar)
610 | 


--------------------------------------------------------------------------------
/nlg/tests/__init__.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # vim:fenc=utf-8
4 | 


--------------------------------------------------------------------------------
/nlg/tests/data/actors.csv:
--------------------------------------------------------------------------------
 1 | category,name,rating,votes
 2 | Actors,Humphrey Bogart,0.57019677,109
 3 | Actors,Cary Grant,0.438601513,142
 4 | Actors,James Stewart,0.988373838,120
 5 | Actors,Marlon Brando,0.102044811,108
 6 | Actors,Fred Astaire,0.208876756,84
 7 | Actresses,Katharine Hepburn,0.039187792,63
 8 | Actresses,Bette Davis,0.282806963,14
 9 | Actresses,Audrey Hepburn,0.120196561,94
10 | Actresses,Ingrid Bergman,0.296140198,52
11 | Actors,Spencer Tracy,0.466310773,192
12 | Actors,Charlie Chaplin,0.244425592,76
13 | 


--------------------------------------------------------------------------------
/nlg/tests/data/imdb_ratings.csv:
--------------------------------------------------------------------------------
 1 | category,name,rating,votes
 2 | Series,How I Met Your Mother,8.3,561820.0
 3 | Movies,Inception,8.8,1915875.0
 4 | Series,Dexter,8.6,614596.0
 5 | Movies,The Shawshank Redemption,9.3,2184030.0
 6 | Movies,The Godfather,9.2,1504601.0
 7 | Series,Game of Thrones,9.3,1633121.0
 8 | Series,Sherlock,9.1,743103.0
 9 | Movies,The Dark Knight,9.0,2167100.0
10 | Movies,The Lord of the Rings: The Return of the King,8.9,1553705.0
11 | Movies,Fight Club,8.8,1744466.0
12 | Movies,The Matrix,8.7,1572571.0
13 | Series,The Big Bang Theory,8.1,677647.0
14 | Series,The Walking Dead,8.2,814294.0
15 | Series,Friends,8.9,732626.0
16 | Series,Breaking Bad,9.5,1303750.0
17 | Series,Stranger Things,8.8,701867.0
18 | Movies,Forrest Gump,8.8,1683919.0
19 | Movies,Pulp Fiction,8.9,1715698.0
20 | Movies,The Lord of the Rings: The Fellowship of the Ring,8.8,1565594.0
21 | Series,True Detective,9.0,464140.0
22 | 


--------------------------------------------------------------------------------
/nlg/tests/test_grammar.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # vim:fenc=utf-8
 4 | 
 5 | """
 6 | Tests for the nlg.grammar module.
 7 | """
 8 | import os
 9 | import unittest
10 | 
11 | import pandas as pd
12 | 
13 | import nlg.grammar as G  # noqa: N812
14 | from nlg import utils
15 | from nlg.search import search_args, DFSearch
16 | 
17 | nlp = utils.load_spacy_model()
18 | op = os.path
19 | 
20 | 
21 | class TestGrammar(unittest.TestCase):
22 | 
23 |     def test_is_plural(self):
24 |         self.assertTrue(G.is_plural_noun("languages"))
25 |         # self.assertTrue(G.is_plural("geese"))
26 |         self.assertTrue(G.is_plural_noun("bacteria"))
27 |         self.assertTrue(G.is_plural_noun("Office supplies"))
28 | 
29 |     def test_concatenate_items(self):
30 |         self.assertEqual(G.concatenate_items("abc"), "a, b and c")
31 |         self.assertEqual(G.concatenate_items([1, 2, 3], sep=""), "123")
32 |         self.assertFalse(G.concatenate_items([]))
33 | 
34 |     def test_pluralize(self):
35 |         self.assertEqual(G.plural("language"), "languages")
36 |         self.assertEqual(G.plural("languages"), "languages")
37 |         self.assertEqual(G.plural("bacterium"), "bacteria")
38 |         self.assertEqual(G.plural("goose"), "geese")
39 | 
40 |     def test_singular(self):
41 |         self.assertEqual(G.singular("languages"), "language")
42 |         self.assertEqual(G.singular("language"), "language")
43 |         self.assertEqual(G.singular("bacteria"), "bacterium")
44 |         # self.assertEqual(G.singular("geese"), "goose")
45 | 
46 |     def test_pluralize_by(self):
47 |         self.assertEqual(G.pluralize_by("language", [1, 2]), "languages")
48 |         self.assertEqual(G.pluralize_by("languages", [1]), "language")
49 |         self.assertEqual(G.pluralize_by("language", []), "language")
50 |         self.assertEqual(G.pluralize_by("language", 1), "language")
51 |         self.assertEqual(G.pluralize_by("language", 2), "languages")
52 | 
53 |     def test_number_inflection(self):
54 |         text = nlp('Actors and actors.')
55 |         x, y = text[0], text[-2]
56 |         infl = G._number_inflection(x, y)
57 |         self.assertEqual(infl, G.plural)
58 | 
59 |         text = nlp('Actors and dancers.')
60 |         x, y = text[0], text[-2]
61 |         infl = G._number_inflection(x, y)
62 |         self.assertFalse(infl)
63 | 
64 |     def test_shape_inflections(self):
65 |         text = nlp('Actors is plural of actors.')
66 |         x, y = text[0], text[-2]
67 |         infl = G._shape_inflection(x, y)
68 |         self.assertEqual(infl, G.lower)
69 | 
70 |     def test_inflections(self):
71 |         text = nlp('James Stewart is the actor with the highest rating.')
72 |         df = pd.read_csv(op.join(op.dirname(__file__), "data", "actors.csv"),
73 |                          encoding='utf8')
74 |         fh_args = {'_sort': ['-rating']}
75 |         df = utils.gfilter(df, fh_args.copy())
76 |         args = utils.sanitize_fh_args(fh_args, df)
77 |         dfs = DFSearch(df)
78 |         dfix = dfs.search(text)
79 |         dfix.update(search_args(dfs.ents, args))
80 |         dfix.clean()
81 |         infl = G.find_inflections(dfix, fh_args, df)
82 |         x, y = infl[text[4]]
83 |         self.assertEqual(x, G.singular)
84 |         self.assertEqual(y, G.lower)
85 | 


--------------------------------------------------------------------------------
/nlg/tests/test_narrative.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # vim:fenc=utf-8
  4 | 
  5 | """
  6 | Tests for the nlg.narrative module.
  7 | """
  8 | 
  9 | import os
 10 | import re
 11 | import unittest
 12 | 
 13 | import pandas as pd
 14 | from spacy.tokens import Doc
 15 | 
 16 | from nlg import templatize
 17 | from nlg.narrative import Nugget, Narrative
 18 | from nlg.utils import load_spacy_model
 19 | 
 20 | op = os.path
 21 | nlp = load_spacy_model()
 22 | 
 23 | 
 24 | class TestNarrative(unittest.TestCase):
 25 | 
 26 |     @classmethod
 27 |     def setUpClass(cls):
 28 |         cls.df = pd.read_csv(op.join(op.dirname(__file__), "data", "actors.csv"),
 29 |                              encoding='utf8')
 30 |         cls.text = nlp('James Stewart is the actor with the highest rating.')
 31 |         cls.nugget = templatize(cls.text, {'_sort': ['-rating']}, cls.df)
 32 | 
 33 |     def test_nugget_variables(self):
 34 |         varnames = set([c.text for c in self.nugget.variables])
 35 |         self.assertSetEqual(varnames, {'James Stewart', 'actor', 'rating'})
 36 | 
 37 |     def test_nugget_get_var(self):
 38 |         with self.assertRaises(KeyError):
 39 |             self.nugget.get_var('James Stewart')
 40 |         var = self.nugget.get_var('actor')
 41 |         self.assertEqual(str(var), '{{ G.singular(df["category"].iloc[0]).lower() }}')
 42 | 
 43 |     def test_nugget_render(self):
 44 |         df = self.df
 45 |         rendered = self.nugget.render(self.df)
 46 |         self.assertEqual(rendered.lstrip().decode('utf8'), self.text.text)
 47 |         xdf = df[df['category'] == 'Actors'].copy()
 48 |         xdf['rating'] = 1 - df.loc[xdf.index, 'rating']
 49 |         rendered = self.nugget.render(xdf)
 50 |         self.assertEqual(rendered.lstrip().decode('utf8'),
 51 |                          'Marlon Brando is the actor with the highest rating.')
 52 | 
 53 |     def test_set_expr(self):
 54 |         var = self.nugget.get_var('actor')
 55 |         var.set_expr('df["category"].iloc[0]')
 56 |         self.assertEqual(str(var), '{{ G.singular(df["category"].iloc[0]).lower() }}')
 57 |         xdf = self.df[self.df['category'] == 'Actresses']
 58 |         rendered = self.nugget.render(xdf)
 59 |         self.assertEqual(rendered.lstrip().decode('utf8'),
 60 |                          'Ingrid Bergman is the actress with the highest rating.')
 61 | 
 62 |     def test_add_var(self):
 63 |         var = self.nugget.get_var('actor')
 64 |         var_token, var_exp = self.text[-2], 'fh_args["_sort"][0]'
 65 |         for k in self.nugget.tokenmap:
 66 |             if k.text == 'rating':
 67 |                 break
 68 |         del self.nugget.tokenmap[k]
 69 |         var.set_expr('df["category"].iloc[0]')
 70 |         self.nugget.add_var(var_token, expr=var_exp)
 71 | 
 72 |         # sort by votes
 73 |         self.nugget.fh_args = {'_sort': ['-votes']}
 74 |         rendered = self.nugget.render(self.df)
 75 |         self.assertEqual(rendered.lstrip().decode('utf8'),
 76 |                          'Spencer Tracy is the actor with the highest votes.')
 77 |         xdf = self.df[self.df['category'] == 'Actresses']
 78 |         rendered = self.nugget.render(xdf)
 79 |         self.assertEqual(rendered.lstrip().decode('utf8'),
 80 |                          'Audrey Hepburn is the actress with the highest votes.')
 81 | 
 82 |         # Set the ratings back
 83 |         self.nugget.fh_args = {'_sort': ['-rating']}
 84 |         rendered = self.nugget.render(self.df)
 85 |         self.assertEqual(rendered.lstrip().decode('utf8'),
 86 |                          'James Stewart is the actor with the highest rating.')
 87 |         xdf = self.df[self.df['category'] == 'Actresses']
 88 |         rendered = self.nugget.render(xdf)
 89 |         self.assertEqual(rendered.lstrip().decode('utf8'),
 90 |                          'Ingrid Bergman is the actress with the highest rating.')
 91 | 
 92 |     def test_serialize(self):
 93 |         pl = self.nugget.to_dict()
 94 |         self.assertEqual(pl['text'], self.text.text)
 95 |         self.assertDictEqual(pl['fh_args'], {'_sort': ['-rating']})
 96 |         tokenmap = pl['tokenmap']
 97 |         ideal = [
 98 |             {
 99 |                 'text': 'rating', 'index': 8, 'idx': 44,
100 |                 'sources': [
101 |                     {
102 |                         'tmpl': 'fh_args["_sort"][0]', 'type': 'user',
103 |                         'enabled': True
104 |                     }
105 |                 ],
106 |                 'varname': '', 'inflections': []
107 |             },
108 |             {
109 |                 'index': (0, 2), 'idx': 0, 'text': 'James Stewart',
110 |                 'sources': [
111 |                     {
112 |                         'location': 'cell', 'tmpl': 'df["name"].iloc[0]', 'type': 'ne',
113 |                         'enabled': True
114 |                     }
115 |                 ],
116 |                 'varname': '', 'inflections': []
117 |             },
118 |             {
119 |                 'index': 4, 'idx': 21, 'text': 'actor',
120 |                 'sources': [
121 |                     {
122 |                         'location': 'cell', 'tmpl': 'df["category"].iloc[0]', 'type': 'token',
123 |                         'enabled': True
124 |                     }
125 |                 ],
126 |                 'varname': '',
127 |                 'inflections': [
128 |                     {'source': 'G', 'fe_name': 'Singularize', 'func_name': 'singular'},
129 |                     {'source': 'str', 'fe_name': 'Lowercase', 'func_name': 'lower'}
130 |                 ]
131 |             }
132 |         ]
133 |         tokenmap = sorted(tokenmap, key=lambda x: x['text'])
134 |         ideal = sorted(ideal, key=lambda x: x['text'])
135 |         self.assertListEqual(ideal, tokenmap)
136 | 
137 |     def test_deserialize(self):
138 |         pl = self.nugget.to_dict()
139 |         nugget = Nugget.from_json(pl)
140 |         actual = nugget.render(self.df).lstrip().decode('utf8')
141 |         self.assertEqual(actual, self.text.text)
142 | 
143 |     def test_doc_serialize(self):
144 |         nugget = templatize(nlp('Humphrey Bogart'), {}, self.df)
145 |         pl = nugget.to_dict()
146 |         self.assertEqual(len(pl['tokenmap']), 1)
147 |         var = nugget.get_var(0)
148 |         self.assertTrue(isinstance(var._token, Doc))
149 |         self.assertEqual(var._token.text, 'Humphrey Bogart')
150 |         var_serialized = pl['tokenmap'][0]
151 |         self.assertEqual(var_serialized['text'], 'Humphrey Bogart')
152 |         self.assertEqual(var_serialized['idx'], 0)
153 |         self.assertEqual(len(var_serialized['sources']), 1)
154 |         source = var_serialized['sources'][0]
155 |         self.assertEqual(source['tmpl'], 'df["name"].iloc[0]')
156 | 
157 |     def test_narrative_html(self):
158 |         text = nlp('Katharine Hepburn is the actress with the least rating.')
159 |         fh_args = {'_sort': ['-rating']}
160 |         nugget = templatize(text, fh_args, self.df)
161 |         narrative = Narrative([self.nugget, nugget])
162 | 
163 |         # test default render
164 |         actual = narrative.to_html(df=self.df)
165 |         actual = re.sub(r'\s+', ' ', actual)
166 |         ideal = ' <strong>James Stewart</strong> is the <strong>actor</strong> ' \
167 |             + 'with the highest <strong>rating</strong>. <strong>Katharine Hepburn</strong> is ' \
168 |             + 'the <strong>actress</strong> with the least <strong>rating</strong>.'
169 |         self.assertEqual(ideal, actual)
170 | 
171 |         # test other options
172 |         actual = narrative.to_html(bold=False, df=self.df)
173 |         actual = re.sub(r'\s+', ' ', actual)
174 |         no_bold = ideal.replace('<strong>', '')
175 |         no_bold = no_bold.replace('</strong>', '')
176 |         self.assertEqual(actual, no_bold)
177 | 
178 |         actual = narrative.to_html(italic=True, df=self.df)
179 |         actual = re.sub(r'\s+', ' ', actual)
180 |         italic = ideal.replace('<strong>', '<em><strong>')
181 |         italic = italic.replace('</strong>', '</strong></em>')
182 |         self.assertEqual(actual, italic)
183 | 
184 |         actual = narrative.to_html(underline=True, df=self.df)
185 |         actual = re.sub(r'\s+', ' ', actual)
186 |         italic = ideal.replace('<strong>', '<u><strong>')
187 |         italic = italic.replace('</strong>', '</strong></u>')
188 |         self.assertEqual(actual, italic)
189 | 
190 |     def test_parastyle(self):
191 |         text = nlp('Katharine Hepburn is the actress with the least rating.')
192 |         fh_args = {'_sort': ['-rating']}
193 |         nugget = templatize(text, fh_args, self.df)
194 |         narrative = Narrative([self.nugget, nugget])
195 | 
196 |         actual = narrative.to_html(style='list', df=self.df)
197 |         actual = re.sub(r'\s+', ' ', actual)
198 |         ideal = '<ul><li> <strong>James Stewart</strong> is the <strong>actor</strong> ' \
199 |             + 'with the highest <strong>rating</strong>.' \
200 |             + '</li><li> <strong>Katharine Hepburn</strong> is ' \
201 |             + 'the <strong>actress</strong> with the least <strong>rating</strong>.</li></ul>'
202 |         self.assertEqual(actual, ideal)
203 | 
204 |         actual = narrative.to_html(bold=False, style='list', liststyle='markdown', df=self.df)
205 |         actual = [re.sub(r'\s+', ' ', c) for c in actual.splitlines()]
206 |         ideal = [
207 |             '* James Stewart is the actor with the highest rating.',
208 |             '* Katharine Hepburn is the actress with the least rating.'
209 |         ]
210 |         self.assertListEqual(actual, ideal)
211 | 
212 |     def test_condition(self):
213 |         try:
214 |             self.nugget.condition = 'df["category"].nunique() == 2'
215 |             actual = self.nugget.render(self.df)
216 |             self.assertEqual(actual.lstrip().rstrip(),
217 |                              b'James Stewart is the actor with the highest rating.')
218 |             xdf = self.df[self.df['category'] == 'Actors']
219 |             actual = self.nugget.render(xdf)
220 |             self.assertRegexpMatches(actual.decode('utf8'), r'^\s*$')
221 |         finally:
222 |             self.nugget.condition = None
223 | 


--------------------------------------------------------------------------------
/nlg/tests/test_search.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # vim:fenc=utf-8
  4 | 
  5 | """
  6 | Tests of the nlg.search module
  7 | """
  8 | 
  9 | import os.path as op
 10 | import re
 11 | import unittest
 12 | 
 13 | import pandas as pd
 14 | from spacy.tokens import Span
 15 | from tornado.template import Template
 16 | 
 17 | from nlg import search, utils
 18 | 
 19 | nlp = utils.load_spacy_model()
 20 | matcher = utils.make_np_matcher(nlp)
 21 | 
 22 | 
 23 | class TestDFSearch(unittest.TestCase):
 24 | 
 25 |     @classmethod
 26 |     def setUpClass(cls):
 27 |         fpath = op.join(op.dirname(__file__), "data", "actors.csv")
 28 |         cls.df = pd.read_csv(fpath, encoding='utf-8')
 29 |         cls.dfs = search.DFSearch(cls.df)
 30 | 
 31 |     def test__search_1d_array_literal(self):
 32 |         text = nlp('The votes, name and rating of the artists.')
 33 |         res = search._search_1d_array(text, self.df.columns, literal=True)
 34 |         ideal = {text[1]: 3, text[3]: 1, text[5]: 2}
 35 |         self.assertDictEqual(res, ideal)
 36 | 
 37 |     def test__search_1d_array_lemmatize(self):
 38 |         text = nlp('The votes, names and ratings of the artists.')
 39 |         res = search._search_1d_array(text, self.df.columns)
 40 |         ideal = {text[1]: 3, text[3]: 1, text[5]: 2}
 41 |         self.assertDictEqual(res, ideal)
 42 | 
 43 |     def test__search_2d_array_literal(self):
 44 |         text = nlp(
 45 |             "James Stewart is the actor with the highest rating of 0.988373838 and 120 votes.")
 46 |         xdf = self.df.sort_values('rating', ascending=False)
 47 |         res = search._search_2d_array(text, xdf, literal=True)
 48 |         ideal = {text[-5]: (0, 2), text[-3]: (0, 3)}
 49 |         self.assertDictEqual(res, ideal)
 50 | 
 51 |     def test__search_2d_array_lemmatize(self):
 52 |         text = nlp(
 53 |             "James Stewart is the actor with the highest rating of 0.988373838 and 120 votes.")
 54 |         xdf = self.df.sort_values('rating', ascending=False)
 55 |         res = search._search_2d_array(text, xdf)
 56 |         ideal = {text[-5]: (0, 2), text[-3]: (0, 3), text[4]: (9, 0)}
 57 |         self.assertDictEqual(res, ideal)
 58 | 
 59 |     def test__search_array(self):
 60 |         sent = nlp("The votes, names and ratings of artists.")
 61 |         res = self.dfs._search_array(sent, self.df.columns, literal=True)
 62 |         self.assertDictEqual(res, {sent[1]: 3})
 63 | 
 64 |         res = self.dfs._search_array(sent, self.df.columns)
 65 |         self.assertDictEqual(res, {sent[1]: 3, sent[3]: 1, sent[5]: 2})
 66 | 
 67 |     def test_dfsearch_lemmatized(self):
 68 |         df = pd.DataFrame.from_dict(
 69 |             {
 70 |                 "partner": ["Lata Mangeshkar", "Asha Bhosale", "Mohammad Rafi"],
 71 |                 "song": [20, 5, 15],
 72 |             }
 73 |         )
 74 |         sent = nlp("Kishore Kumar sang the most songs with Lata Mangeshkar.")
 75 |         dfs = search.DFSearch(df)
 76 |         self.assertDictEqual(
 77 |             dfs.search(sent, lemmatize=True),
 78 |             {
 79 |                 sent[5]: [{"location": "colname", "type": "token", "tmpl": "df.columns[1]"}],
 80 |                 sent[-3:-1]: [
 81 |                     {'location': 'cell', 'tmpl': 'df["partner"].iloc[0]', 'type': 'ne'}],
 82 |             }
 83 |         )
 84 | 
 85 |     def test_search_df(self):
 86 |         fpath = op.join(op.dirname(__file__), "data", "actors.csv")
 87 |         df = pd.read_csv(fpath, encoding='utf-8')
 88 |         df.sort_values("votes", ascending=False, inplace=True)
 89 |         df.reset_index(inplace=True, drop=True)
 90 |         dfs = search.DFSearch(df)
 91 |         sent = nlp("Spencer Tracy is the top voted actor.")
 92 |         self.assertDictEqual(
 93 |             dfs.search(sent),
 94 |             {
 95 |                 sent[:2]: [
 96 |                     {'location': 'cell', 'tmpl': 'df["name"].iloc[0]', 'type': 'ne'}
 97 |                 ],
 98 |                 sent[-3]: [{'location': 'colname', 'tmpl': 'df.columns[-1]', 'type': 'token'}],
 99 |                 sent[-2]: [
100 |                     {'location': 'cell', 'tmpl': 'df["category"].iloc[-4]', 'type': 'token'}]
101 |             }
102 |         )
103 | 
104 | 
105 | class TestSearch(unittest.TestCase):
106 | 
107 |     @classmethod
108 |     def setUpClass(cls):
109 |         fpath = op.join(op.dirname(__file__), "data", "actors.csv")
110 |         cls.df = pd.read_csv(fpath, encoding='utf-8')
111 |         fpath = op.join(op.dirname(__file__), "data", "imdb_ratings.csv")
112 |         cls.imdb = pd.read_csv(fpath, encoding='utf-8')
113 | 
114 |     def test_dfsearches(self):
115 |         x = search.DFSearchResults()
116 |         x['hello'] = 'world'
117 |         x['hello'] = 'world'
118 |         self.assertDictEqual(x, {'hello': ['world']})
119 |         x = search.DFSearchResults()
120 |         x['hello'] = 'world'
121 |         x['hello'] = 'underworld'
122 |         self.assertDictEqual(x, {'hello': ['world', 'underworld']})
123 | 
124 |     def test_search_args(self):
125 |         args = utils.sanitize_fh_args({"_sort": ["-votes"]}, self.df)
126 |         doc = nlp("James Stewart is the top voted actor.")
127 |         ents = utils.ner(doc, matcher)
128 |         self.assertDictEqual(
129 |             search.search_args(ents, args),
130 |             {
131 |                 doc[-3]: {
132 |                     "tmpl": "fh_args['_sort'][0]",
133 |                     "type": "token",
134 |                     "location": "fh_args"
135 |                 }
136 |             }
137 |         )
138 | 
139 |     def test_search_args_literal(self):
140 |         args = utils.sanitize_fh_args({"_sort": ["-rating"]}, self.df)
141 |         doc = nlp("James Stewart has the highest rating.")
142 |         ents = utils.ner(doc, matcher)
143 |         self.assertDictEqual(search.search_args(ents, args, lemmatized=False),
144 |                              {doc[-2]: {
145 |                                  "tmpl": "fh_args['_sort'][0]",
146 |                                  "location": "fh_args",
147 |                                  "type": "token"}})
148 | 
149 |     def test_templatize(self):
150 |         df = self.df.sort_values("votes", ascending=False)
151 |         df.reset_index(inplace=True, drop=True)
152 | 
153 |         doc = nlp("""
154 |         Spencer Tracy is the top votes actor, followed by Cary Grant.
155 |         The least votes actress is Bette Davis, trailing at only 14 votes, followed by
156 |         Ingrid Bergman at a rating of 0.296140198.
157 |         """)
158 |         ideal = """
159 |         {{ df['name'].iloc[0] }} is the top {{ fh_args['_sort'][0] }}
160 |         {{ df['category'].iloc[-4] }}, followed by {{ df['name'].iloc[1] }}.
161 |         The least {{ fh_args['_sort'][0] }} {{ df['category'].iloc[-1] }} is
162 |         {{ df['name'].iloc[-1] }}, trailing at only {{ df['votes'].iloc[-1] }}
163 |         {{ df.columns[-1] }}, followed by {{ df['name'].iloc[-2] }} at a {{ df.columns[2] }}
164 |         of {{ df['rating'].iloc[-2] }}.
165 |         """
166 |         args = {"_sort": ["-votes"]}
167 |         tokenmap, text, inflections = search._search(doc, args, df, copy=True)
168 |         actual = text.text
169 |         for token, tmpls in tokenmap.items():
170 |             tmpl = [t for t in tmpls if t.get('enabled', False)][0]
171 |             actual = actual.replace(token.text,
172 |                                     '{{{{ {} }}}}'.format(tmpl['tmpl']))
173 |         cleaner = lambda x: re.sub(r"\s+", " ", x)  # NOQA: E731
174 |         ideal, actual = map(cleaner, (ideal, actual))
175 |         args = utils.sanitize_fh_args(args, df)
176 |         ideal = Template(ideal).generate(df=df, fh_args=args)
177 |         actual = Template(actual).generate(df=df, fh_args=args)
178 |         self.assertEqual(ideal, actual)
179 |         self.assertDictEqual(
180 |             inflections,
181 |             {
182 |                 doc[7]: [{'fe_name': 'Singularize', 'source': 'G', 'func_name': 'singular'},
183 |                          {'fe_name': 'Lowercase', 'source': 'str', 'func_name': 'lower'}],
184 |                 doc[18]: [  # noqa: E912
185 |                     {'fe_name': 'Singularize', 'source': 'G', 'func_name': 'singular'},
186 |                     {'fe_name': 'Lowercase', 'source': 'str', 'func_name': 'lower'}]
187 |             }
188 |             # Don't detect inflections until they can be processed without intervention
189 |             # 'voted': [{'source': 'G', 'fe_name': 'Lemmatize', 'func_name': 'lemmatize'}]}
190 |         )
191 | 
192 |     def test_search_sort(self):
193 |         results = [
194 |             {'tmpl': 'df.loc[0, "name"]', 'type': 'ne', 'location': 'cell'},
195 |             {'tmpl': 'df.columns[0]', 'type': 'token', 'location': 'colname'},
196 |             {'tmpl': 'args["_sort"][0]', 'type': 'token', 'location': 'fh_args'}
197 |         ]
198 |         _sorted = search._sort_search_results(results)
199 |         enabled = [c for c in _sorted if c.get('enabled', False)]
200 |         self.assertListEqual(enabled, results[:1])
201 | 
202 |         results = [
203 |             {'tmpl': 'df.columns[0]', 'type': 'token', 'location': 'colname'},
204 |             {'tmpl': 'args["_sort"][0]', 'type': 'token', 'location': 'fh_args'},
205 |             {'tmpl': 'df["foo"].iloc[0]', 'type': 'token', 'location': 'cell'}
206 |         ]
207 |         _sorted = search._sort_search_results(results)
208 |         enabled = [c for c in _sorted if c.get('enabled', False)]
209 |         self.assertListEqual(enabled, results[1:2])
210 | 
211 |         results = [
212 |             {'tmpl': 'df.columns[0]', 'type': 'token', 'location': 'colname'},
213 |             {'tmpl': 'args["_sort"][0]', 'type': 'token', 'location': 'cell'},
214 |             {'tmpl': 'df["foo"].iloc[0]', 'type': 'quant', 'location': 'cell'}
215 |         ]
216 |         _sorted = search._sort_search_results(results)
217 |         enabled = [c for c in _sorted if c.get('enabled', False)]
218 |         self.assertListEqual(enabled, results[:1])
219 | 
220 |         results = [
221 |             {'tmpl': 'args["_sort"][0]', 'type': 'token', 'location': 'cell'},
222 |             {'tmpl': 'df["foo"].iloc[0]', 'type': 'quant', 'location': 'cell'}
223 |         ]
224 |         _sorted = search._sort_search_results(results)
225 |         enabled = [c for c in _sorted if c.get('enabled', False)]
226 |         self.assertListEqual(enabled, results[1:])
227 | 
228 |     def test_single_entity_search(self):
229 |         text = nlp("Humphrey Bogart")
230 |         nugget = search.templatize(text, {}, self.df)
231 |         self.assertEqual(len(nugget.tokenmap), 1)
232 |         for token, variable in nugget.tokenmap.items():
233 |             break
234 |         self.assertEqual(token.text, text.ents[0].text)
235 |         self.assertEqual(variable.template, '{{ df["name"].iloc[0] }}')
236 | 
237 |     def test_literal_search(self):
238 |         texts = ['How I Met Your Mother', 'Sherlock', 'Dexter', 'Breaking Bad']
239 |         for t in texts:
240 |             doc = nlp(t)
241 |             nugget = search.templatize(doc, {}, self.imdb)
242 |             self.assertEqual(len(nugget.tokenmap), 1)
243 |             for token, variable in nugget.tokenmap.items():
244 |                 self.assertEqual(token.text, t)
245 |                 self.assertRegex(nugget.template, r'{{ df\["name"\].iloc\[-*\d+\] }}')
246 | 
247 |     def test_search_short_strings(self):
248 |         # Check strings that are shorter than the max length of the df,
249 |         # but still not a literal match
250 |         nugget = search.templatize(nlp('Dexter is a good show'), {}, self.imdb)
251 |         self.assertEqual(len(nugget.tokenmap), 1)
252 |         _, variable = nugget.tokenmap.popitem()
253 |         self.assertRegex(variable.enabled_source['tmpl'], r'df\["name"\].iloc\[-*\d+\]')
254 | 
255 |     def test_token_span_overlap(self):
256 |         df = pd.DataFrame([('Technology', 1, 3), ('Furniture', 2, 2), ('Office Supplies', 3, 1)])
257 |         df.columns = ['Category', 'Number', 'Sales']
258 |         text = nlp('Technology has the highest sales, followed by furniture and office supplies.')
259 |         dfs = search.DFSearch(df)
260 |         results = dfs.search(text)
261 |         results.clean()
262 |         self.assertEqual(len(results), 3)
263 |         self.assertIn('Technology', [c.text for c in results])
264 |         for k in results:
265 |             if k.text == 'Technology':
266 |                 self.assertTrue(isinstance(k, Span))
267 | 
268 | 
269 | if __name__ == "__main__":
270 |     unittest.main()
271 | 


--------------------------------------------------------------------------------
/nlg/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # vim:fenc=utf-8
 4 | 
 5 | """Tests for nlg.utils"""
 6 | 
 7 | import os
 8 | import unittest
 9 | 
10 | import pandas as pd
11 | 
12 | from nlg import utils
13 | 
14 | 
15 | nlp = utils.load_spacy_model()
16 | matcher = utils.make_np_matcher(nlp)
17 | op = os.path
18 | 
19 | 
20 | class TestUtils(unittest.TestCase):
21 | 
22 |     def test_join_words(self):
23 |         sent = 'The quick brown fox jumps over the lazy dog.'
24 |         self.assertEqual(utils.join_words(sent), sent.rstrip('.'))
25 |         self.assertEqual(utils.join_words(sent, ''), sent.rstrip('.').replace(' ', ''))
26 |         self.assertEqual(utils.join_words('-Office supplies'), 'Office supplies')
27 | 
28 |     def test_sanitize_args(self):
29 |         args = {'_by': ['category'], '_c': ['votes|avg'], '_sort': ['-votes|avg']}
30 |         df = pd.read_csv(op.join(op.dirname(__file__), 'data', 'actors.csv'), encoding='utf8')
31 |         self.assertDictEqual(
32 |             utils.sanitize_fh_args(args, df),
33 |             {
34 |                 '_by': ['category'],
35 |                 '_c': ['votes'],
36 |                 '_sort': ['votes|avg']
37 |             }
38 |         )
39 | 
40 |     @unittest.skip('NER is unstable.')
41 |     def test_ner(self):
42 |         sent = nlp(
43 |             """
44 |             US President Donald Trump is an entrepreneur and
45 |             used to run his own reality show named 'The Apprentice'."""
46 |         )
47 |         ents = utils.ner(sent, matcher)
48 |         self.assertSetEqual(
49 |             set([c.text for c in utils.unoverlap(ents)]),
50 |             {
51 |                 "Donald Trump",
52 |                 "Apprentice",
53 |                 "US President",
54 |                 "President Donald",
55 |                 "entrepreneur",
56 |                 "reality show"
57 |             },
58 |         )
59 | 
60 |     def test_sanitize_indices(self):
61 |         self.assertEqual(utils.sanitize_indices((3, 3), 0), 0)
62 |         self.assertEqual(utils.sanitize_indices((3, 3), 1), 1)
63 |         self.assertEqual(utils.sanitize_indices((3, 3), 2), -1)
64 |         self.assertEqual(utils.sanitize_indices((3, 3), 0, 1), 0)
65 |         self.assertEqual(utils.sanitize_indices((3, 3), 1, 1), 1)
66 |         self.assertEqual(utils.sanitize_indices((3, 3), 2, 1), -1)
67 | 
68 |     @unittest.skip('WIP')
69 |     def test_infer_quant(self):
70 |         text = 'Of the three species, setosa has the highest average sepal width.'
71 |         doc = nlp(text)
72 |         self.assertEqual(utils.infer_quant(doc[2]), 3)
73 | 
74 |         text = 'Of the 3 species, setosa has the highest average sepal width.'
75 |         doc = nlp(text)
76 |         self.assertEqual(utils.infer_quant(doc[2]), 3)
77 | 
78 |         text = 'The value of pi is 3.14.'
79 |         doc = nlp(text)
80 |         self.assertEqual(utils.infer_quant(doc[-2]), 3.14)  # noqa: E912
81 | 
82 | 
83 | if __name__ == "__main__":
84 |     unittest.main()
85 | 


--------------------------------------------------------------------------------
/nlg/tests/test_webapp.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from unittest import TestCase
 3 | 
 4 | import pandas as pd
 5 | 
 6 | from nlg import templatize
 7 | from nlg.utils import load_spacy_model
 8 | from nlg import webapp as app
 9 | 
10 | 
11 | nlp = load_spacy_model()
12 | op = os.path
13 | 
14 | 
15 | class TestWebApp(TestCase):
16 | 
17 |     @classmethod
18 |     def setUpClass(cls):
19 |         cls.df = pd.read_csv(op.join(op.dirname(__file__), "data", "actors.csv"),
20 |                              encoding='utf8')
21 |         fh_args = {'_sort': ['-rating']}
22 |         cls.text = nlp('James Stewart is the actor with the highest rating.')
23 |         cls.nugget = templatize(cls.text, fh_args, cls.df)
24 | 
25 |     def test_preview_html(self):
26 |         html = '<span style="background-color:#c8f442" class="cursor-pointer">{}</span>'
27 |         ideal = html.format("James Stewart") + " is the "
28 |         ideal += html.format('actor') + " with the highest " + html.format('rating') + '.'
29 |         template = self.nugget.to_dict()
30 |         self.assertEqual(app.get_preview_html(template, True), ideal)
31 | 
32 |         text = nlp("James Stewart, Humphrey Bogart, Marlon Brando and Ingrid Bergman are actors.")
33 |         names = ['James Stewart', 'Humphrey Bogart', 'Marlon Brando', 'Ingrid Bergman']
34 |         ideal = ", ".join([html.format(name) for name in names[:-1]])
35 |         ideal += " and " + html.format(names[-1]) + " are " + html.format('actors') + "."
36 |         nugget = templatize(text, {}, self.df)
37 |         template = nugget.to_dict()
38 |         actual = app.get_preview_html(template, True)
39 |         self.assertEqual(actual, ideal)
40 | 
41 |     def test_preview_html_noninteractive(self):
42 |         html = '<span style="background-color:#c8f442">{}</span>'
43 |         ideal = html.format("James Stewart") + " is the "
44 |         ideal += html.format('actor') + " with the highest " + html.format('rating') + "."
45 |         template = self.nugget.to_dict()
46 |         self.assertEqual(app.get_preview_html(template), ideal)
47 | 
48 |         text = nlp("James Stewart, Humphrey Bogart, Marlon Brando and Ingrid Bergman are actors.")
49 |         names = ['James Stewart', 'Humphrey Bogart', 'Marlon Brando', 'Ingrid Bergman']
50 |         ideal = ", ".join([html.format(name) for name in names[:-1]])
51 |         ideal += " and " + html.format(names[-1]) + " are " + html.format('actors') + "."
52 |         nugget = templatize(text, {}, self.df)
53 |         template = nugget.to_dict()
54 |         actual = app.get_preview_html(template)
55 |         self.assertEqual(actual, ideal)
56 | 


--------------------------------------------------------------------------------
/nlg/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim:fenc=utf-8
  3 | 
  4 | """
  5 | Miscellaneous utilities.
  6 | """
  7 | import os.path as op
  8 | import re
  9 | 
 10 | import pandas as pd
 11 | from spacy.tokens import Token, Doc, Span
 12 | from tornado.template import Template
 13 | 
 14 | from gramex.data import filter as gfilter  # NOQA: F401
 15 | from gramex.data import (
 16 |     _filter_groupby_columns, _filter_select_columns, _filter_sort_columns, _filter_col,
 17 |     _agg_sep
 18 | )
 19 | 
 20 | NP_RULES = {
 21 |     'NP1': [{'POS': 'PROPN', 'OP': '+'}],
 22 |     'NP2': [{'POS': 'NOUN', 'OP': '+'}],
 23 |     'NP3': [{'POS': 'ADV', 'OP': '+'}, {'POS': 'VERB', 'OP': '+'}],
 24 |     'NP4': [{'POS': 'ADJ', 'OP': '+'}, {'POS': 'VERB', 'OP': '+'}],
 25 |     'QUANT': [{'POS': 'NUM', 'OP': '+'}]
 26 | }
 27 | QUANT_PATTERN = re.compile(r'(^\.d+|^d+\.?(d?)+)')
 28 | _spacy = {
 29 |     'model': False,
 30 |     'lemmatizer': False,
 31 |     'matcher': False
 32 | }
 33 | 
 34 | 
 35 | def _locate_app_config():
 36 |     return op.join(op.dirname(__file__), 'app', 'gramex.yaml')
 37 | 
 38 | 
 39 | def load_spacy_model():
 40 |     """Load the spacy model when required."""
 41 |     if not _spacy['model']:
 42 |         from spacy import load
 43 |         nlp = load('en_core_web_sm')
 44 |         _spacy['model'] = nlp
 45 |     else:
 46 |         nlp = _spacy['model']
 47 |     return nlp
 48 | 
 49 | 
 50 | def get_lemmatizer():
 51 |     if not _spacy['lemmatizer']:
 52 |         from spacy.lang.en import LEMMA_INDEX, LEMMA_EXC, LEMMA_RULES
 53 |         from spacy.lemmatizer import Lemmatizer
 54 |         lemmatizer = Lemmatizer(LEMMA_INDEX, LEMMA_EXC, LEMMA_RULES)
 55 |         _spacy['lemmatizer'] = lemmatizer
 56 |     else:
 57 |         lemmatizer = _spacy['lemmatizer']
 58 |     return lemmatizer
 59 | 
 60 | 
 61 | def make_np_matcher(nlp, rules=NP_RULES):
 62 |     """Make a rule based noun phrase matcher.
 63 | 
 64 |     Parameters
 65 |     ----------
 66 |     nlp : `spacy.lang`
 67 |         The spacy model to use.
 68 |     rules : dict, optional
 69 |         Mapping of rule IDS to spacy attribute patterns, such that each mapping
 70 |         defines a noun phrase structure.
 71 | 
 72 |     Returns
 73 |     -------
 74 |     `spacy.matcher.Matcher`
 75 |     """
 76 |     if not _spacy['matcher']:
 77 |         from spacy.matcher import Matcher
 78 |         matcher = Matcher(nlp.vocab)
 79 |         for k, v in rules.items():
 80 |             matcher.add(k, None, v)
 81 |         _spacy['matcher'] = matcher
 82 |     else:
 83 |         matcher = _spacy['matcher']
 84 |     return matcher
 85 | 
 86 | 
 87 | def render_search_result(text, results, **kwargs):
 88 |     for token, tokenlist in results.items():
 89 |         tmpl = [t for t in tokenlist if t.get('enabled', False)][0]
 90 |         text = text.replace(token, '{{{{ {} }}}}'.format(tmpl['tmpl']))
 91 |     return Template(text).generate(**kwargs).decode('utf-8')
 92 | 
 93 | 
 94 | def join_words(x, sep=' '):
 95 |     return sep.join(re.findall(r'\w+', x, re.IGNORECASE))
 96 | 
 97 | 
 98 | class set_nlg_gramopt(object):  # noqa: class to be used as a decorator
 99 |     """Decorator for adding callables to grammar options of the webapp.
100 |     """
101 |     def __init__(self, **kwargs):
102 |         self.kwargs = kwargs
103 | 
104 |     def __call__(self, func):
105 |         func.gramopt = True
106 |         for k, v in self.kwargs.items():
107 |             if not getattr(func, k, False):
108 |                 setattr(func, k, v)
109 |         return func
110 | 
111 | 
112 | def is_overlap(x, y):
113 |     """Whether the token x is contained within any span in the sequence y."""
114 |     if len(y) == 0:
115 |         return False
116 |     if isinstance(x, Token):
117 |         if x.pos_ == "NUM":
118 |             return False
119 |     elif 'NUM' in [c.pos_ for c in x]:
120 |         return False
121 |     if len(y) > 1:
122 |         if isinstance(x, Token):
123 |             return any([x.text in yy.text for yy in y])
124 |     y = y.pop()
125 |     if isinstance(x, (Token, Span)) and isinstance(y, Doc):
126 |         return x.doc == y
127 |     return False
128 | 
129 | 
130 | def unoverlap(tokens):
131 |     """From a set of tokens, remove all tokens that are contained within
132 |     others."""
133 |     textmap = {c: c for c in tokens}
134 |     newtokens = []
135 |     for token in tokens:
136 |         if not is_overlap(textmap[token], set(tokens) - {token}):
137 |             newtokens.append(token)
138 |     return [textmap[t] for t in newtokens]
139 | 
140 | 
141 | def ner(doc, matcher, match_ids=False, remove_overlap=True):
142 |     """Find all NEs and other nouns in a spacy doc.
143 | 
144 |     Parameters
145 |     ----------
146 |     doc: spacy.tokens.doc.Doc
147 |         The document in which to search for entities.
148 |     matcher: spacy.matcher.Matcher
149 |         The rule based matcher to use for finding noun phrases.
150 |     match_ids: list, optional
151 |         IDs from the spacy matcher to filter from the matches.
152 |     remove_overlap: bool, optional
153 |         Whether to remove overlapping tokens from the result.
154 | 
155 |     Returns
156 |     -------
157 |     list
158 |         List of spacy.token.span.Span objects.
159 |     """
160 |     entities = set()
161 |     for span in doc.ents:
162 |         newtokens = [c for c in span if not c.is_space]
163 |         if newtokens:
164 |             newspan = doc[newtokens[0].i: (newtokens[-1].i + 1)]
165 |             entities.add(newspan)
166 |     if not match_ids:
167 |         entities.update([doc[start:end] for _, start, end in matcher(doc)])
168 |     else:
169 |         for m_id, start, end in matcher(doc):
170 |             if matcher.vocab.strings[m_id] in match_ids:
171 |                 entities.add(doc[start:end])
172 |     if remove_overlap:
173 |         entities = unoverlap(entities)
174 |     return entities
175 | 
176 | 
177 | def sanitize_indices(shape, i, axis=0):
178 |     n = shape[axis]
179 |     if i <= n // 2:
180 |         return i
181 |     return -(n - i)
182 | 
183 | 
184 | def sanitize_text(text, d_round=2):
185 |     """All text cleaning and standardization logic goes here."""
186 |     nums = re.findall(r'\d+\.\d+', text)
187 |     for num in nums:
188 |         text = re.sub(num, str(round(float(num), d_round)), text)
189 |     return text
190 | 
191 | 
192 | def sanitize_df(df, d_round=2, **options):
193 |     """All dataframe cleaning and standardizing logic goes here."""
194 |     for c in df.columns[df.dtypes == float]:
195 |         df[c] = df[c].round(d_round)
196 |     return df
197 | 
198 | 
199 | def sanitize_fh_args(args, df):
200 |     columns = df.columns
201 |     meta = {
202 |         'filters': [],      # Applied filters as [(col, op, val), ...]
203 |         'ignored': [],      # Ignored filters as [(col, vals), ...]
204 |         'sort': [],         # Sorted columns as [(col, asc), ...]
205 |         'offset': 0,        # Offset as integer
206 |         'limit': None,      # Limit as integer - None if not applied
207 |         'by': [],           # Group by columns as [col, ...]
208 |     }
209 |     res = {}
210 |     if '_by' in args:
211 |         res['_by'] = _filter_groupby_columns(args['_by'], columns, meta)
212 |         col_list = args.get('_c', False)
213 |         if not col_list:
214 |             col_list = [col + _agg_sep + 'sum' for col in columns # noqa
215 |                         if pd.api.types.is_numeric_dtype(df[col])]
216 |         res['_c'] = []
217 |         for c in col_list:
218 |             res['_c'].append(_filter_col(c, df.columns)[0])
219 |         columns = col_list
220 |     elif '_c' in args:
221 |         selected, _ = _filter_select_columns(args['_c'], columns, meta)
222 |         res['_c'] = [c[0] for c in selected]
223 |     if '_sort' in args:
224 |         sort = _filter_sort_columns(args, columns, meta)
225 |         res['_sort'] = [c[0] for c in sort]
226 |     return res
227 | 
228 | 
229 | def add_html_styling(template, style):
230 |     """Add HTML styling spans to template elements.
231 | 
232 |     Parameters
233 |     ----------
234 |     template : str
235 |         A tornado template
236 |     style : dict or bool
237 |         If False, no styling is added.
238 |         If True, a default bgcolor is added to template variables.
239 |         If dict, expected to contain HTML span styling elements.
240 | 
241 |     Returns
242 |     -------
243 |     str
244 |         Modified template with each variabled stylized.
245 | 
246 |     Example
247 |     -------
248 |     >>> t = 'Hello, {{ name }}!'
249 |     >>> add_html_styling(t, True)
250 |     'Hello, <span style='background-color:#c8f442'>{{ name }}</span>!'
251 |     >>> add_html_styling(t, False)
252 |     'Hello, {{ name }}!'
253 |     >>> add_html_style(t, {'background-color': '#ffffff', 'font-family': 'monospace'})
254 |     'Hello, <span style='background-color:#c8f442;font-family:monospace'>{{ name }}</span>!'
255 |     """
256 | 
257 |     if not style:
258 |         return template
259 |     pattern = re.compile(r'\{\{[^\{\}]+\}\}')
260 |     if isinstance(style, dict):
261 |         # convert the style dict into a stylized HTML span
262 |         spanstyle = ';'.join(['{}:{}'.format(k, v) for k, v in style.items()])
263 |     else:
264 |         spanstyle = 'background-color:#c8f442'
265 |     for m in re.finditer(pattern, template):
266 |         token = m.group()
267 |         repl = '<span style="{ss}">{token}</span>'.format(
268 |             ss=spanstyle, token=token)
269 |         template = re.sub(re.escape(token), repl, template, 1)
270 |     return '<p>{template}</p>'.format(template=template)
271 | 
272 | 
273 | def infer_quant(token):
274 |     """Infer the quantitative value from a token which has POS == 'NUM' or is like_num.
275 | 
276 |     Parameters
277 |     ----------
278 |     token : `spacy.tokens.Token`
279 |         A spacy token representing a number / scalar. This can be anything with a POS attribute of
280 |         'NUM' or is like_nnum
281 | 
282 |     Returns
283 |     -------
284 |     float or int
285 | 
286 |     Example
287 |     -------
288 |     >>> doc = nlp('Aryabhatta invented the zero.')
289 |     >>> infer_quant(doc[-2])
290 |     0
291 |     """
292 |     if re.fullmatch(QUANT_PATTERN, token.shape_):
293 |         if "." in token.text:
294 |             return float(token.text)
295 |         return int(token.text)
296 | 


--------------------------------------------------------------------------------
/nlg/webapp.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim:fenc=utf-8
  3 | 
  4 | """
  5 | Module for gramex exposure. This shouldn't be imported anywhere, only for use
  6 | with gramex.
  7 | """
  8 | import glob
  9 | import json
 10 | import os
 11 | import os.path as op
 12 | 
 13 | from gramex.config import variables
 14 | from gramex.config import app_log  # noqa: F401
 15 | import pandas as pd
 16 | from tornado.template import Loader
 17 | 
 18 | from nlg import utils, templatize, grammar_options
 19 | from nlg.narrative import Narrative
 20 | 
 21 | DATAFILE_EXTS = {'.csv', '.xls', '.xlsx', '.tsv'}
 22 | NARRATIVE_CACHE = {}
 23 | 
 24 | nlg_path = op.join(variables['GRAMEXDATA'], 'nlg')
 25 | nlp = utils.load_spacy_model()
 26 | tmpl_loader = Loader(op.join(op.dirname(__file__), "app", "templates"), autoescape=None)
 27 | 
 28 | if not op.isdir(nlg_path):
 29 |     os.mkdir(nlg_path)
 30 | 
 31 | 
 32 | def get_config_modal(handler):
 33 |     return tmpl_loader.load("init-config-modal.tmpl").generate(handler=handler)
 34 | 
 35 | 
 36 | def get_narrative_cache(handler):
 37 |     narrative = NARRATIVE_CACHE.get(handler.current_user.id, Narrative())
 38 |     return json.dumps(narrative.to_dict())
 39 | 
 40 | 
 41 | download_narrative = get_narrative_cache
 42 | load_narrative = get_narrative_cache
 43 | 
 44 | 
 45 | def new_variable_tmpl(handler):
 46 |     nugget_id = int(handler.path_args[0])
 47 |     variable_ix = handler.path_args[1]
 48 |     nugget = NARRATIVE_CACHE[handler.current_user.id][nugget_id]
 49 |     start, end = map(int, variable_ix.split(','))
 50 |     span = nugget.doc.text[start:end]
 51 |     nlg_base = variables['NLG_BASE'].rstrip('/')
 52 |     return tmpl_loader.load("new-variable.tmpl").generate(
 53 |         nugget_id=nugget_id, text=span, variable_ix=variable_ix, nlg_base=nlg_base)
 54 | 
 55 | 
 56 | def add_new_variable(handler):
 57 |     nugget = NARRATIVE_CACHE[handler.current_user.id][int(handler.path_args[0])]
 58 |     start, end = map(int, handler.path_args[1].split(','))
 59 |     nugget.add_var([start, end], expr=handler.args['expr'][0])
 60 |     return nugget.template
 61 | 
 62 | 
 63 | def get_preview_html(template, interactive=False):
 64 |     """get_preview_html
 65 | 
 66 |     Parameters
 67 |     ----------
 68 |         template : {{_type_}}
 69 | 
 70 | 
 71 |     Returns
 72 |     -------
 73 | 
 74 |     Example
 75 |     -------
 76 |     """
 77 |     text = template['text']
 78 |     if interactive:
 79 |         html = '<span style="background-color:#c8f442" class="cursor-pointer">{}</span>'
 80 |     else:
 81 |         html = '<span style="background-color:#c8f442">{}</span>'
 82 |     l_offset = len(html.format(''))
 83 |     offset = 0
 84 |     tokenmap = sorted(template['tokenmap'], key=lambda x: x['idx'])
 85 |     for token in tokenmap:
 86 |         start = token['idx'] + offset
 87 |         end = start + len(token['text'])
 88 |         prefix = text[:start]
 89 |         suffix = text[end:]
 90 |         text = prefix + html.format(token['text']) + suffix
 91 |         offset += l_offset
 92 |     return text
 93 | 
 94 | 
 95 | def get_variable_settings_tmpl(handler):
 96 |     nugget_id, variable_ix = handler.path_args
 97 |     nugget = NARRATIVE_CACHE[handler.current_user.id][int(nugget_id)]
 98 |     if not variable_ix.isdigit():
 99 |         start, stop = map(int, variable_ix.split(","))
100 |         variable = nugget.get_var((start, stop)).to_dict()
101 |     else:
102 |         variable_i = int(variable_ix)
103 |         variable = nugget.get_var(variable_i).to_dict()
104 |     tmpl = tmpl_loader.load("variable-settings.tmpl")
105 |     return tmpl.generate(
106 |         variable=variable, nugget_id=nugget_id, variable_id=variable_ix,
107 |         grammar_options=grammar_options)
108 | 
109 | 
110 | def set_variable_settings_tmpl(handler):
111 |     nugget_id, variable_ix = handler.path_args
112 |     nugget = NARRATIVE_CACHE[handler.current_user.id][int(nugget_id)]
113 |     if not variable_ix.isdigit():
114 |         variable_i = map(int, variable_ix.split(","))
115 |     else:
116 |         variable_i = int(variable_ix)
117 |     variable = nugget.get_var(variable_i)
118 |     # handler.args will be something like
119 |     # {'sourcetext': [''], 'sources': ['0'], 'expr': ['foo'], 'inflections': ['Singularize']}
120 | 
121 |     expr = handler.args['expr'][0]
122 |     if expr:  # Ignore the default value of the sources dropdown if expression is present
123 |         variable.set_expr(expr)
124 |     else:
125 |         source = int(handler.args['sources'][0])
126 |         if variable.sources[source]['tmpl'] != variable.enabled_source:
127 |             variable.enable_source(source)
128 | 
129 |     inflections = handler.args.get('inflections', False)
130 |     if inflections:
131 |         variable.inflections = [grammar_options[i] for i in inflections]
132 |     else:
133 |         variable.inflections = []
134 |     return nugget.template
135 | 
136 | 
137 | def get_nugget_settings_tmpl(handler):
138 |     nugget = get_nugget(handler)
139 |     nugget_id = int(handler.path_args[0])
140 |     nlg_base = variables['NLG_BASE'].rstrip('/')
141 |     return tmpl_loader.load("template-settings.tmpl").generate(
142 |         template=nugget, nugget_id=nugget_id, nlg_base=nlg_base)
143 | 
144 | 
145 | def add_condition(handler):
146 |     nugget = NARRATIVE_CACHE[handler.current_user.id][int(handler.path_args[0])]
147 |     nugget.condition = handler.args['condition'][0]
148 | 
149 | 
150 | def get_nugget(handler):
151 |     nugget_id = int(handler.path_args[0])
152 |     if 'delete' in handler.args:
153 |         del NARRATIVE_CACHE[handler.current_user.id][nugget_id]
154 |         return NARRATIVE_CACHE[handler.current_user.id].to_dict()
155 |     else:
156 |         nugget = NARRATIVE_CACHE[handler.current_user.id][nugget_id]
157 |         nugget = nugget.to_dict()
158 |         nugget['previewHTML'] = get_preview_html(nugget, True)
159 |         return nugget
160 | 
161 | 
162 | def clean_anonymous_files():
163 |     """Remove all files uploaded by anonymous users.
164 |     This may be used at startup when deploying the app."""
165 |     import shutil
166 |     anon_dir = op.join(nlg_path, 'anonymous')
167 |     if op.isdir(anon_dir):
168 |         shutil.rmtree(anon_dir)
169 | 
170 | 
171 | def is_user_authenticated(handler):
172 |     """Check if the current user is authenticated."""
173 |     current_user = getattr(handler, 'current_user', False)
174 |     return bool(current_user)
175 | 
176 | 
177 | def get_user_dir(handler):
178 |     if is_user_authenticated(handler):
179 |         dirpath = op.join(nlg_path, handler.current_user.id)
180 |     else:
181 |         dirpath = op.join(nlg_path, 'anonymous')
182 |     return dirpath
183 | 
184 | 
185 | def render_live_template(handler):
186 |     """Given a narrative ID and df records, render the template."""
187 |     payload = json.loads(handler.request.body)
188 |     df = pd.DataFrame.from_records(payload['data'])
189 |     nrid = payload['nrid']
190 |     if not nrid.endswith('.json'):
191 |         nrid += '.json'
192 |     with open(op.join(get_user_dir(handler), nrid), 'r', encoding='utf8') as fin:
193 |         narrative = json.load(fin)
194 |     narrative = Narrative.from_json(narrative)
195 |     return narrative.to_html(**narrative.html_style, df=df)
196 | 
197 | 
198 | def get_style_kwargs(handler_args):
199 |     style_kwargs = {
200 |         'style': handler_args.pop('style', ['para'])[0],
201 |         'liststyle': handler_args.pop('liststyle', ['html'])[0],
202 |     }
203 |     style_kwargs.update({k: json.loads(v[0]) for k, v in handler_args.items()})
204 |     return style_kwargs
205 | 
206 | 
207 | def render_narrative(handler):
208 |     orgdf = get_original_df(handler)
209 |     narrative = NARRATIVE_CACHE.get(handler.current_user.id, False)
210 |     if narrative:
211 |         style_kwargs = get_style_kwargs(handler.args)
212 |         pl = {'render': narrative.to_html(**style_kwargs, df=orgdf),
213 |               'style': narrative.html_style}
214 |     else:
215 |         pl = {'render': '', 'style': Narrative.default_style}
216 |     return pl
217 | 
218 | 
219 | def get_original_df(handler):
220 |     """Get the original dataframe which was uploaded to the webapp."""
221 |     data_dir = get_user_dir(handler)
222 |     meta_path = op.join(data_dir, 'meta.cfg')
223 |     if op.isfile(meta_path):
224 |         with open(meta_path, 'r') as fout:  # noqa: No encoding for json
225 |             meta = json.load(fout)
226 |         dataset_path = op.join(data_dir, meta['dsid'])
227 |         return pd.read_csv(dataset_path, encoding='utf-8')
228 | 
229 | 
230 | def render_template(handler):
231 |     """Render a set of templates against a dataframe and formhandler actions on it."""
232 |     orgdf = get_original_df(handler)
233 |     nugget = NARRATIVE_CACHE[handler.current_user.id][int(handler.path_args[0])]
234 |     return nugget.render(orgdf)
235 | 
236 | 
237 | def save_nugget(sid, nugget):
238 |     narrative = NARRATIVE_CACHE.get(sid, Narrative())
239 |     narrative.append(nugget)
240 |     if len(narrative) > 0:
241 |         NARRATIVE_CACHE[sid] = narrative
242 |     # outpath = op.join(nlg_path, sid + ".json")
243 |     # with open(outpath, 'w', encoding='utf8') as fout:
244 |     #     json.dump([n.to_dict() for n in narrative], fout, indent=4)
245 | 
246 | 
247 | def process_text(handler):
248 |     """Process English text in the context of a df and formhandler arguments
249 |     to templatize it."""
250 |     payload = json.loads(handler.request.body.decode('utf8'))
251 |     df = pd.DataFrame.from_records(payload['data'])
252 |     args = payload.get('args', {}) or {}
253 |     nugget = templatize(nlp(payload['text']), args.copy(), df)
254 |     save_nugget(handler.current_user.id, nugget)
255 |     nugget = nugget.to_dict()
256 |     nugget['previewHTML'] = get_preview_html(nugget)
257 |     return nugget
258 | 
259 | 
260 | def read_current_config(handler):
261 |     """Read the current data and narrative IDs written to the session file."""
262 |     user_dir = get_user_dir(handler)
263 |     meta_path = op.join(user_dir, 'meta.cfg')
264 |     if not op.isdir(user_dir):
265 |         os.mkdir(user_dir)
266 |     if not op.isfile(meta_path):
267 |         return {}
268 |     with open(meta_path, 'r') as fout:  # noqa: No encoding for json
269 |         meta = json.load(fout)
270 |     return meta
271 | 
272 | 
273 | def get_dataset_files(handler):
274 |     """Get all filenames uploaded by the user.
275 | 
276 |     Parameters
277 |     ----------
278 |     handler : tornado.RequestHandler
279 | 
280 |     Returns
281 |     -------
282 |     list
283 |         List of filenames.
284 |     """
285 |     files = glob.glob('{}/*'.format(get_user_dir(handler)))
286 |     return [f for f in files if op.splitext(f)[-1].lower() in DATAFILE_EXTS]
287 | 
288 | 
289 | def get_narrative_config_files(handler):
290 |     """Get list of narrative config files generated by the user.
291 | 
292 |     Parameters
293 |     ----------
294 |     handler : tornado.RequestHandler
295 | 
296 |     Returns
297 |     -------
298 |     list
299 |         List of narrative configurations.
300 |     """
301 |     return glob.glob('{}/*.json'.format(get_user_dir(handler)))
302 | 
303 | 
304 | def init_form(handler):
305 |     """Process input from the landing page and write the current session config."""
306 |     meta = {}
307 |     data_dir = get_user_dir(handler)
308 |     if not op.isdir(data_dir):
309 |         os.makedirs(data_dir)
310 | 
311 |     # handle dataset
312 |     data_file = handler.request.files.get('data-file', [{}])[0]
313 |     if data_file:
314 |         # TODO: Unix filenames may not be valid Windows filenames.
315 |         outpath = op.join(data_dir, data_file['filename'])
316 |         with open(outpath, 'wb') as fout:
317 |             fout.write(data_file['body'])
318 |     else:
319 |         dataset = handler.args['dataset'][0]
320 |         outpath = op.join(data_dir, dataset)
321 |     # shutil.copy(outpath, fh_fpath)
322 |     meta['dsid'] = op.basename(outpath)
323 | 
324 |     # handle config
325 |     config_name = handler.get_argument('narrative', '')
326 |     if config_name:
327 |         outpath = op.join(data_dir, config_name)
328 |         # shutil.copy(config_path, op.join(local_data_dir, 'config.json'))
329 |     else:
330 |         conf_file = handler.request.files.get('config-file', [{}])[0]
331 |         if conf_file:
332 |             outpath = op.join(data_dir, conf_file['filename'])
333 |             with open(outpath, 'wb') as fout:
334 |                 fout.write(conf_file['body'])
335 |         else:
336 |             outpath = False
337 |     if outpath:
338 |         meta['nrid'] = op.basename(outpath)
339 | 
340 |     # write meta config
341 |     with open(op.join(data_dir, 'meta.cfg'), 'w') as fout:  # NOQA
342 |         json.dump(meta, fout, indent=4)
343 | 
344 | 
345 | def get_init_config(handler):
346 |     """Get the initial default configuration for the current user."""
347 |     user_dir = get_user_dir(handler)
348 |     metapath = op.join(user_dir, 'meta.cfg')
349 |     if op.isfile(metapath):
350 |         with open(metapath, 'r') as fout:  # NOQA: no encoding for JSON
351 |             meta = json.load(fout)
352 |         narrative_file = meta.get('nrid', '')
353 |         narrative_name = op.splitext(narrative_file)[0]
354 |         config_file = op.join(user_dir, narrative_file)
355 |         if op.isfile(config_file):
356 |             with open(config_file, 'r') as fout:  # NOQA: no encoding for JSON
357 |                 meta['config'] = json.load(fout)
358 |             global NARRATIVE_CACHE
359 |             NARRATIVE_CACHE = {}
360 |             NARRATIVE_CACHE[handler.current_user.id] = \
361 |                 Narrative.from_json(meta['config'])
362 |             app_log.debug('Initial config loaded from {}'.format(config_file))
363 |             return {
364 |                 'style': NARRATIVE_CACHE[handler.current_user.id].html_style,
365 |                 'nrid': narrative_name}
366 |     return {}
367 | 
368 | 
369 | def save_narrative(handler):
370 |     name = handler.path_args[0]
371 |     if not name.endswith('.json'):
372 |         name += '.json'
373 |     outpath = op.join(get_user_dir(handler), name)
374 |     with open(outpath, 'w', encoding='utf8') as fout:
375 |         json.dump(NARRATIVE_CACHE[handler.current_user.id].to_dict(),
376 |                   fout, indent=4)
377 | 
378 | 
379 | def move_nuggets(handler):
380 |     pop, drop = map(int, handler.path_args)
381 |     narrative = NARRATIVE_CACHE[handler.current_user.id]
382 |     popped = narrative.pop(pop)
383 |     narrative.insert(drop, popped)
384 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # This is intended for installation of the Gramex app.
2 | # Do NOT use for anything.
3 | git+https://github.com/gramener/gramex-nlg@dev#egg=nlg
4 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [wheel]
 2 | universal = 1
 3 | 
 4 | [pep8]
 5 | ignore = E265,E402
 6 | 
 7 | [flake8]
 8 | exclude=build,dist,docs,.eggs,node_modules,.vscode
 9 | max-line-length=99
10 | ; E911 allows use of str(). Required for pathlib.Path to string conversions
11 | ; N802 ignores "function name should be in lowercase". Required for
12 | ; tearDownModule(), extendMarkdown, etc where function name is pre-defined
13 | ignore=E911,N802
14 | 
15 | [nosetests]
16 | verbosity=2
17 | nocapture=1
18 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # vim:fenc=utf-8
 4 | #
 5 | """
 6 | NLG Setup.
 7 | """
 8 | 
 9 | import builtins
10 | from setuptools import setup, find_packages
11 | 
12 | 
13 | builtins.__NLG_SETUP__ = True
14 | 
15 | # Setuptools config
16 | NAME = "nlg"
17 | DESCRIPTION = "Natural Language Generation framework for Python."
18 | with open('README.rst', encoding='utf-8') as f:
19 |     LONG_DESCRIPTION = f.read()
20 | MAINTAINER = 'Jaidev Deshpande'
21 | MAINTAINER_EMAIL = 'jaidev.deshpande@gramener.com'
22 | URL = "https://github.com/gramener/gramex-nlg"
23 | DOWNLOAD_URL = 'https://pypi.org/project/nlg/#files'
24 | LICENSE = 'MIT'
25 | PROJECT_URLS = {
26 |     'Bug Tracker': 'https://github.com/gramener/gramex-nlg/issues',
27 |     'Documentation': 'https://learn.gramener.com/guide/nlg',
28 |     'Source Code': 'https://github.com/gramener/gramex-nlg'
29 | }
30 | 
31 | # Requirements
32 | install_requires = [
33 |     'gramex',
34 |     'humanize',
35 |     'inflect',
36 |     'spacy==2.1.8',
37 | ]
38 | 
39 | # Setup
40 | import nlg  # NOQA: E402
41 | setup(
42 |     name=NAME,
43 |     maintainer=MAINTAINER,
44 |     maintainer_email=MAINTAINER_EMAIL,
45 |     description=DESCRIPTION,
46 |     license=LICENSE,
47 |     url=URL,
48 |     download_url=DOWNLOAD_URL,
49 |     include_package_data=True,
50 |     version=nlg.__version__,
51 |     long_description=LONG_DESCRIPTION,
52 |     packages=find_packages(),
53 |     install_requires=install_requires
54 | )
55 | 


--------------------------------------------------------------------------------