├── .github
└── workflows
│ └── unit_test.yml
├── .gitignore
├── README.md
├── docs
├── Makefile
├── _build
│ └── html
│ │ ├── _sources
│ │ ├── api.rst.txt
│ │ ├── development.rst.txt
│ │ ├── index.rst.txt
│ │ ├── intro.rst.txt
│ │ └── tutorial.rst.txt
│ │ ├── _static
│ │ ├── Watson_Tranformer_Design.jpg
│ │ ├── Watson_Tranformer_Design.svg
│ │ ├── alabaster.css
│ │ ├── basic.css
│ │ ├── css
│ │ │ ├── badge_only.css
│ │ │ ├── fonts
│ │ │ │ ├── Roboto-Slab-Bold.woff
│ │ │ │ ├── Roboto-Slab-Bold.woff2
│ │ │ │ ├── Roboto-Slab-Regular.woff
│ │ │ │ ├── Roboto-Slab-Regular.woff2
│ │ │ │ ├── fontawesome-webfont.eot
│ │ │ │ ├── fontawesome-webfont.svg
│ │ │ │ ├── fontawesome-webfont.ttf
│ │ │ │ ├── fontawesome-webfont.woff
│ │ │ │ ├── fontawesome-webfont.woff2
│ │ │ │ ├── lato-bold-italic.woff
│ │ │ │ ├── lato-bold-italic.woff2
│ │ │ │ ├── lato-bold.woff
│ │ │ │ ├── lato-bold.woff2
│ │ │ │ ├── lato-normal-italic.woff
│ │ │ │ ├── lato-normal-italic.woff2
│ │ │ │ ├── lato-normal.woff
│ │ │ │ └── lato-normal.woff2
│ │ │ └── theme.css
│ │ ├── custom.css
│ │ ├── doctools.js
│ │ ├── documentation_options.js
│ │ ├── file.png
│ │ ├── fonts
│ │ │ ├── FontAwesome.otf
│ │ │ ├── Lato
│ │ │ │ ├── lato-bold.eot
│ │ │ │ ├── lato-bold.ttf
│ │ │ │ ├── lato-bold.woff
│ │ │ │ ├── lato-bold.woff2
│ │ │ │ ├── lato-bolditalic.eot
│ │ │ │ ├── lato-bolditalic.ttf
│ │ │ │ ├── lato-bolditalic.woff
│ │ │ │ ├── lato-bolditalic.woff2
│ │ │ │ ├── lato-italic.eot
│ │ │ │ ├── lato-italic.ttf
│ │ │ │ ├── lato-italic.woff
│ │ │ │ ├── lato-italic.woff2
│ │ │ │ ├── lato-regular.eot
│ │ │ │ ├── lato-regular.ttf
│ │ │ │ ├── lato-regular.woff
│ │ │ │ └── lato-regular.woff2
│ │ │ ├── Roboto-Slab-Bold.woff
│ │ │ ├── Roboto-Slab-Bold.woff2
│ │ │ ├── Roboto-Slab-Light.woff
│ │ │ ├── Roboto-Slab-Light.woff2
│ │ │ ├── Roboto-Slab-Regular.woff
│ │ │ ├── Roboto-Slab-Regular.woff2
│ │ │ ├── Roboto-Slab-Thin.woff
│ │ │ ├── Roboto-Slab-Thin.woff2
│ │ │ ├── RobotoSlab
│ │ │ │ ├── roboto-slab-v7-bold.eot
│ │ │ │ ├── roboto-slab-v7-bold.ttf
│ │ │ │ ├── roboto-slab-v7-bold.woff
│ │ │ │ ├── roboto-slab-v7-bold.woff2
│ │ │ │ ├── roboto-slab-v7-regular.eot
│ │ │ │ ├── roboto-slab-v7-regular.ttf
│ │ │ │ ├── roboto-slab-v7-regular.woff
│ │ │ │ └── roboto-slab-v7-regular.woff2
│ │ │ ├── fontawesome-webfont.eot
│ │ │ ├── fontawesome-webfont.svg
│ │ │ ├── fontawesome-webfont.ttf
│ │ │ ├── fontawesome-webfont.woff
│ │ │ ├── fontawesome-webfont.woff2
│ │ │ ├── lato-bold-italic.woff
│ │ │ ├── lato-bold-italic.woff2
│ │ │ ├── lato-bold.woff
│ │ │ ├── lato-bold.woff2
│ │ │ ├── lato-normal-italic.woff
│ │ │ ├── lato-normal-italic.woff2
│ │ │ ├── lato-normal.woff
│ │ │ └── lato-normal.woff2
│ │ ├── jquery-3.5.1.js
│ │ ├── jquery.js
│ │ ├── js
│ │ │ ├── badge_only.js
│ │ │ ├── html5shiv-printshiv.min.js
│ │ │ ├── html5shiv.min.js
│ │ │ ├── modernizr.min.js
│ │ │ └── theme.js
│ │ ├── language_data.js
│ │ ├── minus.png
│ │ ├── plus.png
│ │ ├── pygments.css
│ │ ├── searchtools.js
│ │ ├── underscore-1.3.1.js
│ │ ├── underscore.js
│ │ ├── watson_transformer_logo.png
│ │ ├── watson_transformer_logo.svg
│ │ └── watson_transformer_stt_perf.png
│ │ ├── api.html
│ │ ├── development.html
│ │ ├── genindex.html
│ │ ├── index.html
│ │ ├── intro.html
│ │ ├── objects.inv
│ │ ├── search.html
│ │ ├── searchindex.js
│ │ └── tutorial.html
├── api.rst
├── conf.py
├── development.rst
├── index.rst
├── intro.rst
├── make.bat
├── misc
│ ├── Watson_Tranformer_Design.drawio
│ ├── Watson_Tranformer_Design.jpg
│ ├── Watson_Tranformer_Design.svg
│ ├── pipleline_benchmark.png
│ ├── regular_udf_vs_vectorized_udf_.png
│ ├── watson_transformer_logo.svg
│ ├── watson_transformer_logo_design.drawio
│ ├── watson_transformer_perf_full_pipeline.png
│ └── watson_transformer_stt_perf.png
└── tutorial.rst
├── requirements.txt
├── setup.py
├── src
└── watson_transformer
│ ├── __init__.py
│ ├── contrib
│ ├── __init__.py
│ ├── nlu
│ │ ├── __init__.py
│ │ └── default_nlu_parser.py
│ ├── readers.py
│ ├── response_base.py
│ └── stt
│ │ ├── __init__.py
│ │ └── default_stt_parser.py
│ ├── flat_column_transformer.py
│ ├── json_transformer.py
│ ├── service
│ ├── __init__.py
│ ├── nlu.py
│ ├── service_base.py
│ └── stt.py
│ └── watson_service_transformer.py
└── test
├── README.md
├── contrib
├── nlu
│ └── test_default_nlu_parser.py
├── stt
│ └── test_default_stt_parser.py
└── test_response_base.py
├── service
├── test_nlu.py
├── test_service_base.py
└── test_stt.py
├── test_json_transformer.py
└── test_watson_service_transformer.py
/.github/workflows/unit_test.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: unit-test
5 |
6 | on:
7 | push:
8 | branches: [ master ]
9 | pull_request:
10 | branches: [ master ]
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: ubuntu-latest
16 | strategy:
17 | matrix:
18 | python-version: [3.6, 3.7]
19 |
20 | steps:
21 | - uses: actions/checkout@v2
22 | - name: Set up Python ${{ matrix.python-version }}
23 | uses: actions/setup-python@v1
24 | with:
25 | python-version: ${{ matrix.python-version }}
26 | - name: Install dependencies
27 | run: |
28 | python -m pip install --upgrade pip
29 | pip install -r requirements.txt
30 | pip install pyspark
31 | pip install pandas
32 | pip install pytest
33 | pip install pytest-cov
34 | pip install -e .
35 | - name: Test with pytest
36 | run: |
37 | pip install pytest
38 | pytest --cov=src
39 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .*
2 | !/.gitignore
3 | !/.travis.yml
4 | /example/*
5 | /dist/*
6 | /build/*
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Watson Transformer
2 |
3 | [](https://github.com/kai-niu/watson-transformer/actions)
4 |
5 | Watson Transformer solves the problem of consuming IBM Watson API services([STT](https://cloud.ibm.com/apidocs/speech-to-text), [NLU](https://cloud.ibm.com/apidocs/natural-language-understanding), etc.) at scale by wrapping the service calls into the Spark transformer. In this way, The IBM services such as STT and NLU can build into the Spark ML pipeline, along with other transformers and estimators to tackle the big data challenge.
6 |
7 | # Install
8 | ```
9 | pip install waston-transformer
10 | ```
11 |
12 | # Design
13 | As the UML chart illustrates, The Watson Transformer Class service as a thin wrapper around the IBM Waston API class. For extensibility purposes, the logic of consuming API service is defined in the Watson Service Class, which is an executable class. It enables any applicable API service to be wrapped into the transformer. On the other hand, the transformer handles mapping input data to API calls and parse the service response to data fields.
14 |
15 |
16 |
17 | # Performance
18 |
19 | * __Experiment 1__: This experiment compares the performance of using the regular UDF, and the vectorized UDF with the pyArrow enabled. The testing cluster is provisioned with 10 2vCPU/2GB nodes, and the time cost is recorded on nine datasets, which contain [100,200,400,800,1600] recordings respectfully. The maximum number of worker threads a vectorized UDF can spam is 10; therefore, the maximum QPS(query/sec) of the vectorized UDF transformer is 200.
20 |
21 |
22 |
23 | The result suggests:
24 | 1. Vectorized UDF: the time complexity is between **O(0.001N)** and **O(0.005N)** *N = total recording seconds in the dataset*
25 | 2. Regular UDF: the time complexity is slower than **O(0.01N)** *N = total recording seconds in the dataset*
26 | 3. Vectorized UDF is more than **10x** faster than using regular UDF clock and can process **~400** recording seconds.
27 |
28 |
29 |
30 |
31 | * __Experiment 2__: This experiment benchmark the performance of the pySpark ML pipeline build using several transformers provided by this package. The testing cluster is provisioned the same as it is for the first experiment. The maximum QPS of STT and NLU transformer is 200. Here is the configuration of the two ML pipelines:
32 | * STT pipeline: [STT => JSON_Transformer]
33 | * STT + NLU pipeline: [STT => JSON Transformer => NLU => JSON Transformer => Nested Column Transformer]
34 |
35 |
36 |
37 | The result suggests:
38 | 1. The STT transformer dominates the time cost in the whole pipeline.
39 | 2. The time complexity of two pipelines are between **O(0.005N)** and **O(0.01N)**, *N = total recording seconds in the dataset*
40 | 3. 1 clock second can process **~400** recording seconds
41 |
42 |
43 |
44 | # Tutorial
45 |
46 | API documentation and tutorials are available [here](https://watson-transformer.readthedocs.io/en/latest/?)
47 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/_build/html/_sources/api.rst.txt:
--------------------------------------------------------------------------------
1 | API Reference
2 | **************
3 |
4 | This page gives the API reference of Watson Transformer Package.
5 |
6 | WatsonServiceTransformer Class
7 | ===============================
8 |
9 | **WatsonServiceTransformer** (*inputCol=None, outputCol=None, vectorization=False, max_workers=5, service=None*)
10 |
11 | **Base**:
12 | - ``pySpark.ml.pipeline.Transformer``
13 | - ``pyspark.ml.param.shared.HasInputCol``
14 | - ``pyspark.ml.param.shared.HasOutputCol``
15 | - ``pyspark.ml.util.DefaultParamsReadable``
16 | - ``pyspark.ml.util.DefaultParamsWritable``
17 |
18 | **Parameters**:
19 | - **inputCol**: The column name use as input data. ``required``
20 | - **outputCol**: The column name use to output the transformed data. ``required``
21 | - **vectorization**: Exploiting pyArrow in-memory dataframe. enable vectorization whenever is possible is recommend. The default value is ``False``.
22 | - **max_workers**: When vectorization is enabled, the maximum number of threads can be utilized to boost the performance. The default value is ``5``.
23 | - **service**: The API service instance that wrapped by the Watson Transformer. ``required``
24 |
25 | **Return**:
26 | ``WatsonServiceTransformer`` class instance
27 |
28 | **Return Type**:
29 | ``pySpark.ml.pipeline.Transformer``
30 |
31 | -----------------------------
32 |
33 | **- transform** (*dataframe*)
34 |
35 | **Parameters**:
36 | - **dataframe**: the pySpark dataframe recieve transformation
37 |
38 | **Return**:
39 | pySpark dataframe contains transformation result
40 |
41 | **Return Type**:
42 | ``pyspark.sql.DataFrame``
43 |
44 |
45 | .. note::
46 |
47 | The ``WatsonServiceTransformer`` is a custom implementation of pySpark transformer. The functions which are inherited from the
48 | pySpark transformer base classes have been implemented thus avaiable for use.
49 |
50 |
51 | FlatColumnTransformer Class
52 | ============================
53 |
54 | **FlatColumnTransformer** (*inputCol=None*)
55 |
56 | **Base**:
57 | - ``pySpark.ml.pipeline.Transformer``
58 | - ``pyspark.ml.param.shared.HasInputCol``
59 | - ``pyspark.ml.util.DefaultParamsReadable``
60 | - ``pyspark.ml.util.DefaultParamsWritable``
61 |
62 | **Parameters**:
63 | - **inputCol**: The column name use as input data. ``required``
64 |
65 | **Return**:
66 | ``FlatColumnTransformer`` class instance
67 |
68 | **Return Type**:
69 | ``pySpark.ml.pipeline.Transformer``
70 |
71 | -----------------------------
72 |
73 | **- transform** (*dataframe*)
74 |
75 | **Parameters**:
76 | - **dataframe**: the pySpark dataframe recieve transformation
77 |
78 | **Return**:
79 | pySpark dataframe contains the flattened data from input column
80 |
81 | **Return Type**:
82 | ``pyspark.sql.DataFrame``
83 |
84 |
85 | .. note::
86 |
87 | The ``FlatColumnTransformer`` is a custom implementation of pySpark transformer. The functions which are inherited from the
88 | pySpark transformer base classes have been implemented thus avaiable for use. This transformer will
89 | flatten the nested input column to multiple regular data columns.
90 |
91 |
92 | JSONTransformer Class
93 | =====================
94 |
95 | **JSONTransformer** (*inputCol=None, outputCol=None, removeInputCol=False, parser=None*)
96 |
97 | **Base**:
98 | - ``pySpark.ml.pipeline.Transformer``
99 | - ``pyspark.ml.param.shared.HasInputCol``
100 | - ``pyspark.ml.param.shared.HasOutputCol``
101 | - ``pyspark.ml.util.DefaultParamsReadable``
102 | - ``pyspark.ml.util.DefaultParamsWritable``
103 |
104 | **Parameters**:
105 | - **inputCol**: The column name use as input data. ``required``
106 | - **outputCol**: The column name use to output the transformed data. ``required``
107 | - **removeInputCol**: Whether or not remove the input column from output dataframe. The default value is ``False``.
108 | - **parser**: The object parse JSON data to data column(s). ``required``
109 |
110 | **Return**:
111 | ``JSONTransformer`` class instance
112 |
113 | **Return Type**:
114 | ``pySpark.ml.pipeline.Transformer``
115 |
116 | -----------------------------
117 |
118 | **- transform** (*dataframe*)
119 |
120 | **Parameters**:
121 | - **dataframe**: the pySpark dataframe recieve transformation
122 |
123 | **Return**:
124 | pySpark dataframe contains transformation result
125 |
126 | **Return Type**:
127 | ``pyspark.sql.DataFrame``
128 |
129 |
130 | .. note::
131 |
132 | The ``WatsonServiceTransformer`` is a custom implementation of pySpark transformer. The functions which are inherited from the
133 | pySpark transformer base classes have been implemented thus avaiable for use.
134 |
135 |
136 |
137 |
--------------------------------------------------------------------------------
/docs/_build/html/_sources/development.rst.txt:
--------------------------------------------------------------------------------
1 | Development
2 | =============
3 |
4 | To setup development envrioment and extend support Watson Transformer API to other API ....
5 |
6 |
7 | Install Pacakge in Dev Mode
8 | ---------------------------
9 |
10 | ::
11 |
12 | pip install watson-transfromer
--------------------------------------------------------------------------------
/docs/_build/html/_sources/index.rst.txt:
--------------------------------------------------------------------------------
1 | .. watson tranfromer documentation master file, created by
2 | sphinx-quickstart on Mon Jul 27 22:03:12 2020.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to watson tranfromer's documentation!
7 | =============================================
8 |
9 | .. toctree::
10 | :maxdepth: 3
11 | :caption: Contents:
12 |
13 | intro
14 | tutorial
15 | api
16 | development
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/docs/_build/html/_sources/intro.rst.txt:
--------------------------------------------------------------------------------
1 | What is Watson Transformer?
2 | ===========================
3 | Watson Transformer solves the problem of consuming
4 | IBM Watson API services(STT, NLU, etc.) at scale by
5 | wrapping the service calls into the Spark transformer.
6 | In this way, The IBM services such as STT and NLU can
7 | build into the Spark ML pipeline, along with
8 | other transformers and estimators to tackle the big
9 | data challenge.
10 |
11 | The Design
12 | ----------
13 | As the UML chart illustrates, The Watson Transformer
14 | Class service as a thin wrapper around the IBM Waston
15 | API class. For extensibility purposes, the logic of
16 | consuming API service is defined in the Watson Service
17 | Class, which is an executable class. It enables any
18 | applicable API service to be wrapped into the transformer.
19 | On the other hand, the transformer handles mapping input
20 | data to API calls and parse the service response to
21 | data fields.
--------------------------------------------------------------------------------
/docs/_build/html/_static/Watson_Tranformer_Design.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/Watson_Tranformer_Design.jpg
--------------------------------------------------------------------------------
/docs/_build/html/_static/Watson_Tranformer_Design.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 | «executable object»Return Type Factory
«executable object»... «executable object»Response Parser
«executable object»... API Response => Data Fields
API Response => Data... Instantiate Return Type Object
Instantiate Return Type O... Watson Transformer Class + inputCol: string + outputCol: string + service: Watson Service Class + transfrom(dataframe): dataframe Watson Service Class + token: string + endpoint: string + return_type: object + response_parser: object + **params: kv pairs + __call__(object): return_type Viewer does not support full SVG 1.1
--------------------------------------------------------------------------------
/docs/_build/html/_static/css/badge_only.css:
--------------------------------------------------------------------------------
1 | .fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}
--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/fontawesome-webfont.eot
--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/fontawesome-webfont.ttf
--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/fontawesome-webfont.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/fontawesome-webfont.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/lato-bold-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/lato-bold-italic.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/lato-bold-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/lato-bold-italic.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/lato-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/lato-bold.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/lato-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/lato-bold.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/lato-normal-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/lato-normal-italic.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/lato-normal-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/lato-normal-italic.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/lato-normal.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/lato-normal.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/lato-normal.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/lato-normal.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/custom.css:
--------------------------------------------------------------------------------
1 | /* This file intentionally left blank. */
2 |
--------------------------------------------------------------------------------
/docs/_build/html/_static/doctools.js:
--------------------------------------------------------------------------------
1 | /*
2 | * doctools.js
3 | * ~~~~~~~~~~~
4 | *
5 | * Sphinx JavaScript utilities for all documentation.
6 | *
7 | * :copyright: Copyright 2007-2020 by the Sphinx team, see AUTHORS.
8 | * :license: BSD, see LICENSE for details.
9 | *
10 | */
11 |
12 | /**
13 | * select a different prefix for underscore
14 | */
15 | $u = _.noConflict();
16 |
17 | /**
18 | * make the code below compatible with browsers without
19 | * an installed firebug like debugger
20 | if (!window.console || !console.firebug) {
21 | var names = ["log", "debug", "info", "warn", "error", "assert", "dir",
22 | "dirxml", "group", "groupEnd", "time", "timeEnd", "count", "trace",
23 | "profile", "profileEnd"];
24 | window.console = {};
25 | for (var i = 0; i < names.length; ++i)
26 | window.console[names[i]] = function() {};
27 | }
28 | */
29 |
30 | /**
31 | * small helper function to urldecode strings
32 | */
33 | jQuery.urldecode = function(x) {
34 | return decodeURIComponent(x).replace(/\+/g, ' ');
35 | };
36 |
37 | /**
38 | * small helper function to urlencode strings
39 | */
40 | jQuery.urlencode = encodeURIComponent;
41 |
42 | /**
43 | * This function returns the parsed url parameters of the
44 | * current request. Multiple values per key are supported,
45 | * it will always return arrays of strings for the value parts.
46 | */
47 | jQuery.getQueryParameters = function(s) {
48 | if (typeof s === 'undefined')
49 | s = document.location.search;
50 | var parts = s.substr(s.indexOf('?') + 1).split('&');
51 | var result = {};
52 | for (var i = 0; i < parts.length; i++) {
53 | var tmp = parts[i].split('=', 2);
54 | var key = jQuery.urldecode(tmp[0]);
55 | var value = jQuery.urldecode(tmp[1]);
56 | if (key in result)
57 | result[key].push(value);
58 | else
59 | result[key] = [value];
60 | }
61 | return result;
62 | };
63 |
64 | /**
65 | * highlight a given string on a jquery object by wrapping it in
66 | * span elements with the given class name.
67 | */
68 | jQuery.fn.highlightText = function(text, className) {
69 | function highlight(node, addItems) {
70 | if (node.nodeType === 3) {
71 | var val = node.nodeValue;
72 | var pos = val.toLowerCase().indexOf(text);
73 | if (pos >= 0 &&
74 | !jQuery(node.parentNode).hasClass(className) &&
75 | !jQuery(node.parentNode).hasClass("nohighlight")) {
76 | var span;
77 | var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg");
78 | if (isInSVG) {
79 | span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
80 | } else {
81 | span = document.createElement("span");
82 | span.className = className;
83 | }
84 | span.appendChild(document.createTextNode(val.substr(pos, text.length)));
85 | node.parentNode.insertBefore(span, node.parentNode.insertBefore(
86 | document.createTextNode(val.substr(pos + text.length)),
87 | node.nextSibling));
88 | node.nodeValue = val.substr(0, pos);
89 | if (isInSVG) {
90 | var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect");
91 | var bbox = node.parentElement.getBBox();
92 | rect.x.baseVal.value = bbox.x;
93 | rect.y.baseVal.value = bbox.y;
94 | rect.width.baseVal.value = bbox.width;
95 | rect.height.baseVal.value = bbox.height;
96 | rect.setAttribute('class', className);
97 | addItems.push({
98 | "parent": node.parentNode,
99 | "target": rect});
100 | }
101 | }
102 | }
103 | else if (!jQuery(node).is("button, select, textarea")) {
104 | jQuery.each(node.childNodes, function() {
105 | highlight(this, addItems);
106 | });
107 | }
108 | }
109 | var addItems = [];
110 | var result = this.each(function() {
111 | highlight(this, addItems);
112 | });
113 | for (var i = 0; i < addItems.length; ++i) {
114 | jQuery(addItems[i].parent).before(addItems[i].target);
115 | }
116 | return result;
117 | };
118 |
119 | /*
120 | * backward compatibility for jQuery.browser
121 | * This will be supported until firefox bug is fixed.
122 | */
123 | if (!jQuery.browser) {
124 | jQuery.uaMatch = function(ua) {
125 | ua = ua.toLowerCase();
126 |
127 | var match = /(chrome)[ \/]([\w.]+)/.exec(ua) ||
128 | /(webkit)[ \/]([\w.]+)/.exec(ua) ||
129 | /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) ||
130 | /(msie) ([\w.]+)/.exec(ua) ||
131 | ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) ||
132 | [];
133 |
134 | return {
135 | browser: match[ 1 ] || "",
136 | version: match[ 2 ] || "0"
137 | };
138 | };
139 | jQuery.browser = {};
140 | jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true;
141 | }
142 |
143 | /**
144 | * Small JavaScript module for the documentation.
145 | */
146 | var Documentation = {
147 |
148 | init : function() {
149 | this.fixFirefoxAnchorBug();
150 | this.highlightSearchWords();
151 | this.initIndexTable();
152 | if (DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) {
153 | this.initOnKeyListeners();
154 | }
155 | },
156 |
157 | /**
158 | * i18n support
159 | */
160 | TRANSLATIONS : {},
161 | PLURAL_EXPR : function(n) { return n === 1 ? 0 : 1; },
162 | LOCALE : 'unknown',
163 |
164 | // gettext and ngettext don't access this so that the functions
165 | // can safely bound to a different name (_ = Documentation.gettext)
166 | gettext : function(string) {
167 | var translated = Documentation.TRANSLATIONS[string];
168 | if (typeof translated === 'undefined')
169 | return string;
170 | return (typeof translated === 'string') ? translated : translated[0];
171 | },
172 |
173 | ngettext : function(singular, plural, n) {
174 | var translated = Documentation.TRANSLATIONS[singular];
175 | if (typeof translated === 'undefined')
176 | return (n == 1) ? singular : plural;
177 | return translated[Documentation.PLURALEXPR(n)];
178 | },
179 |
180 | addTranslations : function(catalog) {
181 | for (var key in catalog.messages)
182 | this.TRANSLATIONS[key] = catalog.messages[key];
183 | this.PLURAL_EXPR = new Function('n', 'return +(' + catalog.plural_expr + ')');
184 | this.LOCALE = catalog.locale;
185 | },
186 |
187 | /**
188 | * add context elements like header anchor links
189 | */
190 | addContextElements : function() {
191 | $('div[id] > :header:first').each(function() {
192 | $('').
193 | attr('href', '#' + this.id).
194 | attr('title', _('Permalink to this headline')).
195 | appendTo(this);
196 | });
197 | $('dt[id]').each(function() {
198 | $('').
199 | attr('href', '#' + this.id).
200 | attr('title', _('Permalink to this definition')).
201 | appendTo(this);
202 | });
203 | },
204 |
205 | /**
206 | * workaround a firefox stupidity
207 | * see: https://bugzilla.mozilla.org/show_bug.cgi?id=645075
208 | */
209 | fixFirefoxAnchorBug : function() {
210 | if (document.location.hash && $.browser.mozilla)
211 | window.setTimeout(function() {
212 | document.location.href += '';
213 | }, 10);
214 | },
215 |
216 | /**
217 | * highlight the search words provided in the url in the text
218 | */
219 | highlightSearchWords : function() {
220 | var params = $.getQueryParameters();
221 | var terms = (params.highlight) ? params.highlight[0].split(/\s+/) : [];
222 | if (terms.length) {
223 | var body = $('div.body');
224 | if (!body.length) {
225 | body = $('body');
226 | }
227 | window.setTimeout(function() {
228 | $.each(terms, function() {
229 | body.highlightText(this.toLowerCase(), 'highlighted');
230 | });
231 | }, 10);
232 | $('
' + _('Hide Search Matches') + '
')
234 | .appendTo($('#searchbox'));
235 | }
236 | },
237 |
238 | /**
239 | * init the domain index toggle buttons
240 | */
241 | initIndexTable : function() {
242 | var togglers = $('img.toggler').click(function() {
243 | var src = $(this).attr('src');
244 | var idnum = $(this).attr('id').substr(7);
245 | $('tr.cg-' + idnum).toggle();
246 | if (src.substr(-9) === 'minus.png')
247 | $(this).attr('src', src.substr(0, src.length-9) + 'plus.png');
248 | else
249 | $(this).attr('src', src.substr(0, src.length-8) + 'minus.png');
250 | }).css('display', '');
251 | if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) {
252 | togglers.click();
253 | }
254 | },
255 |
256 | /**
257 | * helper function to hide the search marks again
258 | */
259 | hideSearchWords : function() {
260 | $('#searchbox .highlight-link').fadeOut(300);
261 | $('span.highlighted').removeClass('highlighted');
262 | },
263 |
264 | /**
265 | * make the url absolute
266 | */
267 | makeURL : function(relativeURL) {
268 | return DOCUMENTATION_OPTIONS.URL_ROOT + '/' + relativeURL;
269 | },
270 |
271 | /**
272 | * get the current relative url
273 | */
274 | getCurrentURL : function() {
275 | var path = document.location.pathname;
276 | var parts = path.split(/\//);
277 | $.each(DOCUMENTATION_OPTIONS.URL_ROOT.split(/\//), function() {
278 | if (this === '..')
279 | parts.pop();
280 | });
281 | var url = parts.join('/');
282 | return path.substring(url.lastIndexOf('/') + 1, path.length - 1);
283 | },
284 |
285 | initOnKeyListeners: function() {
286 | $(document).keydown(function(event) {
287 | var activeElementType = document.activeElement.tagName;
288 | // don't navigate when in search box or textarea
289 | if (activeElementType !== 'TEXTAREA' && activeElementType !== 'INPUT' && activeElementType !== 'SELECT'
290 | && !event.altKey && !event.ctrlKey && !event.metaKey && !event.shiftKey) {
291 | switch (event.keyCode) {
292 | case 37: // left
293 | var prevHref = $('link[rel="prev"]').prop('href');
294 | if (prevHref) {
295 | window.location.href = prevHref;
296 | return false;
297 | }
298 | case 39: // right
299 | var nextHref = $('link[rel="next"]').prop('href');
300 | if (nextHref) {
301 | window.location.href = nextHref;
302 | return false;
303 | }
304 | }
305 | }
306 | });
307 | }
308 | };
309 |
310 | // quick alias for translations
311 | _ = Documentation.gettext;
312 |
313 | $(document).ready(function() {
314 | Documentation.init();
315 | });
316 |
--------------------------------------------------------------------------------
/docs/_build/html/_static/documentation_options.js:
--------------------------------------------------------------------------------
1 | var DOCUMENTATION_OPTIONS = {
2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
3 | VERSION: '',
4 | LANGUAGE: 'None',
5 | COLLAPSE_INDEX: false,
6 | BUILDER: 'html',
7 | FILE_SUFFIX: '.html',
8 | LINK_SUFFIX: '.html',
9 | HAS_SOURCE: true,
10 | SOURCELINK_SUFFIX: '.txt',
11 | NAVIGATION_WITH_KEYS: false
12 | };
--------------------------------------------------------------------------------
/docs/_build/html/_static/file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/file.png
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/FontAwesome.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/FontAwesome.otf
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-bold.eot
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-bold.ttf
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-bold.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-bold.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-bolditalic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-bolditalic.eot
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-bolditalic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-bolditalic.ttf
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-bolditalic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-bolditalic.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-bolditalic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-bolditalic.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-italic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-italic.eot
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-italic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-italic.ttf
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-italic.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-italic.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-regular.eot
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-regular.ttf
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-regular.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-regular.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Roboto-Slab-Bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Roboto-Slab-Bold.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Roboto-Slab-Bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Roboto-Slab-Bold.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Roboto-Slab-Light.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Roboto-Slab-Light.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Roboto-Slab-Light.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Roboto-Slab-Light.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Roboto-Slab-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Roboto-Slab-Regular.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Roboto-Slab-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Roboto-Slab-Regular.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Roboto-Slab-Thin.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Roboto-Slab-Thin.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Roboto-Slab-Thin.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Roboto-Slab-Thin.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/fontawesome-webfont.eot
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/fontawesome-webfont.ttf
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/fontawesome-webfont.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/fontawesome-webfont.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/lato-bold-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/lato-bold-italic.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/lato-bold-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/lato-bold-italic.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/lato-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/lato-bold.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/lato-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/lato-bold.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/lato-normal-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/lato-normal-italic.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/lato-normal-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/lato-normal-italic.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/lato-normal.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/lato-normal.woff
--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/lato-normal.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/lato-normal.woff2
--------------------------------------------------------------------------------
/docs/_build/html/_static/js/badge_only.js:
--------------------------------------------------------------------------------
1 | !function(e){var t={};function r(n){if(t[n])return t[n].exports;var o=t[n]={i:n,l:!1,exports:{}};return e[n].call(o.exports,o,o.exports,r),o.l=!0,o.exports}r.m=e,r.c=t,r.d=function(e,t,n){r.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:n})},r.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.t=function(e,t){if(1&t&&(e=r(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var n=Object.create(null);if(r.r(n),Object.defineProperty(n,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)r.d(n,o,function(t){return e[t]}.bind(null,o));return n},r.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return r.d(t,"a",t),t},r.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r.p="",r(r.s=4)}({4:function(e,t,r){}});
--------------------------------------------------------------------------------
/docs/_build/html/_static/js/html5shiv-printshiv.min.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @preserve HTML5 Shiv 3.7.3-pre | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed
3 | */
4 | !function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=y.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=y.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),y.elements=c+" "+a,j(b)}function f(a){var b=x[a[v]];return b||(b={},w++,a[v]=w,x[w]=b),b}function g(a,c,d){if(c||(c=b),q)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():u.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||t.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),q)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return y.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(y,b.frag)}function j(a){a||(a=b);var d=f(a);return!y.shivCSS||p||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),q||i(a,d),a}function k(a){for(var b,c=a.getElementsByTagName("*"),e=c.length,f=RegExp("^(?:"+d().join("|")+")$","i"),g=[];e--;)b=c[e],f.test(b.nodeName)&&g.push(b.applyElement(l(b)));return g}function l(a){for(var b,c=a.attributes,d=c.length,e=a.ownerDocument.createElement(A+":"+a.nodeName);d--;)b=c[d],b.specified&&e.setAttribute(b.nodeName,b.nodeValue);return e.style.cssText=a.style.cssText,e}function m(a){for(var b,c=a.split("{"),e=c.length,f=RegExp("(^|[\\s,>+~])("+d().join("|")+")(?=[[\\s,>+~#.:]|$)","gi"),g="$1"+A+"\\:$2";e--;)b=c[e]=c[e].split("}"),b[b.length-1]=b[b.length-1].replace(f,g),c[e]=b.join("}");return c.join("{")}function n(a){for(var b=a.length;b--;)a[b].removeNode()}function o(a){function b(){clearTimeout(g._removeSheetTimer),d&&d.removeNode(!0),d=null}var d,e,g=f(a),h=a.namespaces,i=a.parentWindow;return!B||a.printShived?a:("undefined"==typeof h[A]&&h.add(A),i.attachEvent("onbeforeprint",function(){b();for(var f,g,h,i=a.styleSheets,j=[],l=i.length,n=Array(l);l--;)n[l]=i[l];for(;h=n.pop();)if(!h.disabled&&z.test(h.media)){try{f=h.imports,g=f.length}catch(o){g=0}for(l=0;g>l;l++)n.push(f[l]);try{j.push(h.cssText)}catch(o){}}j=m(j.reverse().join("")),e=k(a),d=c(a,j)}),i.attachEvent("onafterprint",function(){n(e),clearTimeout(g._removeSheetTimer),g._removeSheetTimer=setTimeout(b,500)}),a.printShived=!0,a)}var p,q,r="3.7.3",s=a.html5||{},t=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,u=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,v="_html5shiv",w=0,x={};!function(){try{var a=b.createElement("a");a.innerHTML=" ",p="hidden"in a,q=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){p=!0,q=!0}}();var y={elements:s.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:r,shivCSS:s.shivCSS!==!1,supportsUnknownElements:q,shivMethods:s.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=y,j(b);var z=/^$|\b(?:all|print)\b/,A="html5shiv",B=!q&&function(){var c=b.documentElement;return!("undefined"==typeof b.namespaces||"undefined"==typeof b.parentWindow||"undefined"==typeof c.applyElement||"undefined"==typeof c.removeNode||"undefined"==typeof a.attachEvent)}();y.type+=" print",y.shivPrint=o,o(b),"object"==typeof module&&module.exports&&(module.exports=y)}("undefined"!=typeof window?window:this,document);
--------------------------------------------------------------------------------
/docs/_build/html/_static/js/html5shiv.min.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @preserve HTML5 Shiv 3.7.3 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed
3 | */
4 | !function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.3-pre",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML=" ",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b),"object"==typeof module&&module.exports&&(module.exports=t)}("undefined"!=typeof window?window:this,document);
--------------------------------------------------------------------------------
/docs/_build/html/_static/js/theme.js:
--------------------------------------------------------------------------------
1 | !function(n){var e={};function t(i){if(e[i])return e[i].exports;var o=e[i]={i:i,l:!1,exports:{}};return n[i].call(o.exports,o,o.exports,t),o.l=!0,o.exports}t.m=n,t.c=e,t.d=function(n,e,i){t.o(n,e)||Object.defineProperty(n,e,{enumerable:!0,get:i})},t.r=function(n){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(n,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(n,"__esModule",{value:!0})},t.t=function(n,e){if(1&e&&(n=t(n)),8&e)return n;if(4&e&&"object"==typeof n&&n&&n.__esModule)return n;var i=Object.create(null);if(t.r(i),Object.defineProperty(i,"default",{enumerable:!0,value:n}),2&e&&"string"!=typeof n)for(var o in n)t.d(i,o,function(e){return n[e]}.bind(null,o));return i},t.n=function(n){var e=n&&n.__esModule?function(){return n.default}:function(){return n};return t.d(e,"a",e),e},t.o=function(n,e){return Object.prototype.hasOwnProperty.call(n,e)},t.p="",t(t.s=0)}([function(n,e,t){t(1),n.exports=t(3)},function(n,e,t){(function(){var e="undefined"!=typeof window?window.jQuery:t(2);n.exports.ThemeNav={navBar:null,win:null,winScroll:!1,winResize:!1,linkScroll:!1,winPosition:0,winHeight:null,docHeight:null,isRunning:!1,enable:function(n){var t=this;void 0===n&&(n=!0),t.isRunning||(t.isRunning=!0,e((function(e){t.init(e),t.reset(),t.win.on("hashchange",t.reset),n&&t.win.on("scroll",(function(){t.linkScroll||t.winScroll||(t.winScroll=!0,requestAnimationFrame((function(){t.onScroll()})))})),t.win.on("resize",(function(){t.winResize||(t.winResize=!0,requestAnimationFrame((function(){t.onResize()})))})),t.onResize()})))},enableSticky:function(){this.enable(!0)},init:function(n){n(document);var e=this;this.navBar=n("div.wy-side-scroll:first"),this.win=n(window),n(document).on("click","[data-toggle='wy-nav-top']",(function(){n("[data-toggle='wy-nav-shift']").toggleClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift")})).on("click",".wy-menu-vertical .current ul li a",(function(){var t=n(this);n("[data-toggle='wy-nav-shift']").removeClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift"),e.toggleCurrent(t),e.hashChange()})).on("click","[data-toggle='rst-current-version']",(function(){n("[data-toggle='rst-versions']").toggleClass("shift-up")})),n("table.docutils:not(.field-list,.footnote,.citation)").wrap("
"),n("table.docutils.footnote").wrap(""),n("table.docutils.citation").wrap("
"),n(".wy-menu-vertical ul").not(".simple").siblings("a").each((function(){var t=n(this);expand=n(' '),expand.on("click",(function(n){return e.toggleCurrent(t),n.stopPropagation(),!1})),t.prepend(expand)}))},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),t=e.find('[href="'+n+'"]');if(0===t.length){var i=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(t=e.find('[href="#'+i.attr("id")+'"]')).length&&(t=e.find('[href="#"]'))}t.length>0&&($(".wy-menu-vertical .current").removeClass("current"),t.addClass("current"),t.closest("li.toctree-l1").addClass("current"),t.closest("li.toctree-l1").parent().addClass("current"),t.closest("li.toctree-l1").addClass("current"),t.closest("li.toctree-l2").addClass("current"),t.closest("li.toctree-l3").addClass("current"),t.closest("li.toctree-l4").addClass("current"),t.closest("li.toctree-l5").addClass("current"),t[0].scrollIntoView())}catch(n){console.log("Error expanding nav for anchor",n)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,t=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(t),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",(function(){this.linkScroll=!1}))},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current"),e.siblings().find("li.current").removeClass("current"),e.find("> ul li.current").removeClass("current"),e.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:n.exports.ThemeNav,StickyNav:n.exports.ThemeNav}),function(){for(var n=0,e=["ms","moz","webkit","o"],t=0;t2;a==
12 | null&&(a=[]);if(y&&a.reduce===y)return e&&(c=b.bind(c,e)),f?a.reduce(c,d):a.reduce(c);j(a,function(a,b,i){f?d=c.call(e,d,a,b,i):(d=a,f=true)});if(!f)throw new TypeError("Reduce of empty array with no initial value");return d};b.reduceRight=b.foldr=function(a,c,d,e){var f=arguments.length>2;a==null&&(a=[]);if(z&&a.reduceRight===z)return e&&(c=b.bind(c,e)),f?a.reduceRight(c,d):a.reduceRight(c);var g=b.toArray(a).reverse();e&&!f&&(c=b.bind(c,e));return f?b.reduce(g,c,d,e):b.reduce(g,c)};b.find=b.detect=
13 | function(a,c,b){var e;E(a,function(a,g,h){if(c.call(b,a,g,h))return e=a,true});return e};b.filter=b.select=function(a,c,b){var e=[];if(a==null)return e;if(A&&a.filter===A)return a.filter(c,b);j(a,function(a,g,h){c.call(b,a,g,h)&&(e[e.length]=a)});return e};b.reject=function(a,c,b){var e=[];if(a==null)return e;j(a,function(a,g,h){c.call(b,a,g,h)||(e[e.length]=a)});return e};b.every=b.all=function(a,c,b){var e=true;if(a==null)return e;if(B&&a.every===B)return a.every(c,b);j(a,function(a,g,h){if(!(e=
14 | e&&c.call(b,a,g,h)))return n});return e};var E=b.some=b.any=function(a,c,d){c||(c=b.identity);var e=false;if(a==null)return e;if(C&&a.some===C)return a.some(c,d);j(a,function(a,b,h){if(e||(e=c.call(d,a,b,h)))return n});return!!e};b.include=b.contains=function(a,c){var b=false;if(a==null)return b;return p&&a.indexOf===p?a.indexOf(c)!=-1:b=E(a,function(a){return a===c})};b.invoke=function(a,c){var d=i.call(arguments,2);return b.map(a,function(a){return(b.isFunction(c)?c||a:a[c]).apply(a,d)})};b.pluck=
15 | function(a,c){return b.map(a,function(a){return a[c]})};b.max=function(a,c,d){if(!c&&b.isArray(a))return Math.max.apply(Math,a);if(!c&&b.isEmpty(a))return-Infinity;var e={computed:-Infinity};j(a,function(a,b,h){b=c?c.call(d,a,b,h):a;b>=e.computed&&(e={value:a,computed:b})});return e.value};b.min=function(a,c,d){if(!c&&b.isArray(a))return Math.min.apply(Math,a);if(!c&&b.isEmpty(a))return Infinity;var e={computed:Infinity};j(a,function(a,b,h){b=c?c.call(d,a,b,h):a;bd?1:0}),"value")};b.groupBy=function(a,c){var d={},e=b.isFunction(c)?c:function(a){return a[c]};j(a,function(a,b){var c=e(a,b);(d[c]||(d[c]=[])).push(a)});return d};b.sortedIndex=function(a,
17 | c,d){d||(d=b.identity);for(var e=0,f=a.length;e>1;d(a[g])=0})})};b.difference=function(a){var c=b.flatten(i.call(arguments,1));return b.filter(a,function(a){return!b.include(c,a)})};b.zip=function(){for(var a=i.call(arguments),c=b.max(b.pluck(a,"length")),d=Array(c),e=0;e=0;d--)b=[a[d].apply(this,b)];return b[0]}};
24 | b.after=function(a,b){return a<=0?b():function(){if(--a<1)return b.apply(this,arguments)}};b.keys=J||function(a){if(a!==Object(a))throw new TypeError("Invalid object");var c=[],d;for(d in a)b.has(a,d)&&(c[c.length]=d);return c};b.values=function(a){return b.map(a,b.identity)};b.functions=b.methods=function(a){var c=[],d;for(d in a)b.isFunction(a[d])&&c.push(d);return c.sort()};b.extend=function(a){j(i.call(arguments,1),function(b){for(var d in b)a[d]=b[d]});return a};b.defaults=function(a){j(i.call(arguments,
25 | 1),function(b){for(var d in b)a[d]==null&&(a[d]=b[d])});return a};b.clone=function(a){return!b.isObject(a)?a:b.isArray(a)?a.slice():b.extend({},a)};b.tap=function(a,b){b(a);return a};b.isEqual=function(a,b){return q(a,b,[])};b.isEmpty=function(a){if(b.isArray(a)||b.isString(a))return a.length===0;for(var c in a)if(b.has(a,c))return false;return true};b.isElement=function(a){return!!(a&&a.nodeType==1)};b.isArray=o||function(a){return l.call(a)=="[object Array]"};b.isObject=function(a){return a===Object(a)};
26 | b.isArguments=function(a){return l.call(a)=="[object Arguments]"};if(!b.isArguments(arguments))b.isArguments=function(a){return!(!a||!b.has(a,"callee"))};b.isFunction=function(a){return l.call(a)=="[object Function]"};b.isString=function(a){return l.call(a)=="[object String]"};b.isNumber=function(a){return l.call(a)=="[object Number]"};b.isNaN=function(a){return a!==a};b.isBoolean=function(a){return a===true||a===false||l.call(a)=="[object Boolean]"};b.isDate=function(a){return l.call(a)=="[object Date]"};
27 | b.isRegExp=function(a){return l.call(a)=="[object RegExp]"};b.isNull=function(a){return a===null};b.isUndefined=function(a){return a===void 0};b.has=function(a,b){return I.call(a,b)};b.noConflict=function(){r._=G;return this};b.identity=function(a){return a};b.times=function(a,b,d){for(var e=0;e /g,">").replace(/"/g,""").replace(/'/g,"'").replace(/\//g,"/")};b.mixin=function(a){j(b.functions(a),
28 | function(c){K(c,b[c]=a[c])})};var L=0;b.uniqueId=function(a){var b=L++;return a?a+b:b};b.templateSettings={evaluate:/<%([\s\S]+?)%>/g,interpolate:/<%=([\s\S]+?)%>/g,escape:/<%-([\s\S]+?)%>/g};var t=/.^/,u=function(a){return a.replace(/\\\\/g,"\\").replace(/\\'/g,"'")};b.template=function(a,c){var d=b.templateSettings,d="var __p=[],print=function(){__p.push.apply(__p,arguments);};with(obj||{}){__p.push('"+a.replace(/\\/g,"\\\\").replace(/'/g,"\\'").replace(d.escape||t,function(a,b){return"',_.escape("+
29 | u(b)+"),'"}).replace(d.interpolate||t,function(a,b){return"',"+u(b)+",'"}).replace(d.evaluate||t,function(a,b){return"');"+u(b).replace(/[\r\n\t]/g," ")+";__p.push('"}).replace(/\r/g,"\\r").replace(/\n/g,"\\n").replace(/\t/g,"\\t")+"');}return __p.join('');",e=new Function("obj","_",d);return c?e(c,b):function(a){return e.call(this,a,b)}};b.chain=function(a){return b(a).chain()};var m=function(a){this._wrapped=a};b.prototype=m.prototype;var v=function(a,c){return c?b(a).chain():a},K=function(a,c){m.prototype[a]=
30 | function(){var a=i.call(arguments);H.call(a,this._wrapped);return v(c.apply(b,a),this._chain)}};b.mixin(b);j("pop,push,reverse,shift,sort,splice,unshift".split(","),function(a){var b=k[a];m.prototype[a]=function(){var d=this._wrapped;b.apply(d,arguments);var e=d.length;(a=="shift"||a=="splice")&&e===0&&delete d[0];return v(d,this._chain)}});j(["concat","join","slice"],function(a){var b=k[a];m.prototype[a]=function(){return v(b.apply(this._wrapped,arguments),this._chain)}});m.prototype.chain=function(){this._chain=
31 | true;return this};m.prototype.value=function(){return this._wrapped}}).call(this);
32 |
--------------------------------------------------------------------------------
/docs/_build/html/_static/watson_transformer_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/watson_transformer_logo.png
--------------------------------------------------------------------------------
/docs/_build/html/_static/watson_transformer_stt_perf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/watson_transformer_stt_perf.png
--------------------------------------------------------------------------------
/docs/_build/html/development.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | Development — watson tranfromer documentation
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 | watson tranfromer
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 | »
141 |
142 | Development
143 |
144 |
145 |
146 |
147 |
148 | View page source
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
Development
163 |
To setup development envrioment and extend support Watson Transformer API to other API ….
164 |
165 |
Install Pacakge in Dev Mode
166 |
pip install watson - transfromer
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
--------------------------------------------------------------------------------
/docs/_build/html/genindex.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | Index — watson tranfromer documentation
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 | watson tranfromer
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 | »
137 |
138 | Index
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
Index
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
--------------------------------------------------------------------------------
/docs/_build/html/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | Welcome to watson tranfromer’s documentation! — watson tranfromer documentation
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 | watson tranfromer
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 | »
138 |
139 | Welcome to watson tranfromer’s documentation!
140 |
141 |
142 |
143 |
144 |
145 | View page source
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
Welcome to watson tranfromer’s documentation!
160 |
161 |
Contents:
162 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
--------------------------------------------------------------------------------
/docs/_build/html/intro.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | What is Watson Transformer? — watson tranfromer documentation
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 | watson tranfromer
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 | »
142 |
143 | What is Watson Transformer?
144 |
145 |
146 |
147 |
148 |
149 | View page source
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
184 |
185 |
186 |
187 |
188 |
189 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
--------------------------------------------------------------------------------
/docs/_build/html/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/objects.inv
--------------------------------------------------------------------------------
/docs/_build/html/search.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | Search — watson tranfromer documentation
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 | watson tranfromer
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 | »
139 |
140 | Search
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 | Please activate JavaScript to enable the search functionality.
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
209 |
210 |
211 |
212 |
213 |
214 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
--------------------------------------------------------------------------------
/docs/_build/html/searchindex.js:
--------------------------------------------------------------------------------
1 | Search.setIndex({docnames:["api","development","index","intro","tutorial"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":3,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":2,"sphinx.domains.rst":2,"sphinx.domains.std":1,sphinx:56},filenames:["api.rst","development.rst","index.rst","intro.rst","tutorial.rst"],objects:{},objnames:{},objtypes:{},terms:{"2rmb6z02rhvoo1robbiegliqwovi1orqx2z7v8":[],"case":4,"class":[2,3,4],"default":0,"function":[0,4],"import":4,"int":4,"long":4,"new":4,"return":[0,4],"true":4,For:[3,4],The:[0,2],There:4,__iter__:4,abc:[],abl:4,about:4,access:4,achiev:4,address:4,after:4,all:4,allow:4,almost:4,along:3,also:[],alwai:4,ani:3,api:[1,2,3,4],api_kei:4,appli:4,applic:3,around:[3,4],audio:4,audio_fil:4,audio_filenam:4,audio_stream:4,auth_endpoint:4,avaiabl:[0,4],avail:4,base:0,beblow:4,been:0,below:4,best:4,big:3,bodi:4,boost:0,bucket:4,bufferediobas:4,build:[3,4],call:[3,4],can:[0,3,4],challeng:3,chart:3,client:4,cloud:4,cluster:4,code:4,colulmn:[],column:[0,2],column_flatt:4,columnar:4,common:[],compar:4,compli:4,concept:4,conceptsopt:4,config:4,configur:4,consum:3,contain:[0,4],content:2,content_typ:4,contentlength:4,contrib:4,convert:4,copi:4,cos_client:4,cover:[],creat:2,custom:[0,4],data:[0,3,4],datafram:[0,4],declar:4,def:4,defaultparamsread:0,defaultparamswrit:0,defaultsttpars:4,defin:3,deliv:4,denot:4,design:2,detail:4,dev:2,develop:[2,4],devlop:4,differ:4,discuss:4,document:4,down:4,downstream:[],each:4,els:4,emot:4,emotionopt:4,enabl:[0,3,4],end:[],endpoint:4,endpoint_url:4,envrioment:1,essenti:2,estim:3,etc:3,exampl:4,execut:[3,4],exploit:[0,4],expressivli:4,extend:1,extens:[3,4],fairli:4,fals:[0,4],featur:4,field:3,file:4,file_obj:4,first:4,flat:2,flatcolumntransform:[2,4],flatten:0,follow:4,form:4,forward:4,frist:4,from:[0,4],gatewai:4,gener:4,get:4,get_object:4,give:0,given:4,good:4,greater:4,hand:3,handl:3,has:4,hasattr:4,hasinputcol:0,hasoutputcol:0,have:[0,4],hello:[],here:4,how:4,http:4,ibm:[2,3],ibm_api_key_id:4,ibm_auth_endpoint:4,ibm_boto3:4,ibm_cos_read:4,ibm_watson:4,ibn:4,ignor:4,illustr:3,implement:0,independ:4,indic:4,individu:4,inherit:0,init:[],initi:4,input:[0,3,4],inputcol:[0,4],insid:4,instal:2,instanc:[0,2],introduc:4,its:4,json:[0,2],jsontransform:[2,4],jt_stt:[],kei:4,keyword:4,keywordsopt:4,languag:4,last:4,learn:4,less:[],limit:4,list:4,logic:[3,4],make:[],mani:4,map:3,max_altern:4,max_work:[0,4],maximum:[0,4],maximum_s:4,memori:[0,4],method:[],methodtyp:4,mode:2,model:4,more:4,multipl:[0,4],name:[0,4],natur:4,natural_language_understanding_v1:4,need:4,nest:[0,4],net:4,nlu:[2,3],nlu_access_token:4,nlu_respons:4,nlu_result:4,nlu_servic:4,node:4,none:[0,4],now:[],number:[0,4],oauth:4,object:[0,4],onc:4,order:4,other:[1,3,4],out:4,output:[0,4],outputcol:[0,4],over:4,own:4,pacakg:2,packag:[0,2],page:[0,4],param1:4,param:[0,4],paramet:[0,4],paramt:4,pars:[0,3,4],parser:[0,4],pass:4,perform:[0,4],pip:[1,4],pipelin:[0,3,4],pipeline_stt:4,plain:4,pleas:4,point:4,possibl:[0,4],previou:4,problem:3,process:4,profanity_filt:4,promot:4,prompot:[],provid:4,public_endpoint_url:4,purpos:[3,4],pyarrow:[0,4],pyspark:[0,4],quick:4,read:4,reader:4,reader_funct:4,readi:4,reason:4,reciev:0,recommend:0,record:4,refer:[2,4],regular:0,relev:4,remov:0,removeinputcol:[0,4],requir:[0,4],respons:3,resuabl:4,result:[0,4],right:4,same:4,save:4,scale:3,section:4,sentiment:4,sentimentopt:4,serv:4,servic:[0,2,3],service_nam:4,session:4,set:4,setup:1,share:0,should:4,show:4,signatur:4,signature_vers:4,sinc:4,size:4,snippet:4,solv:3,some:4,sort:4,spark:3,spawn:4,specif:4,speech:4,split_transcript_at_phrase_end:4,sql:0,stage:4,standard:4,start:4,step:4,storag:4,straight:4,straightforward:4,stream:4,stright:4,structur:4,stt:[2,3],stt_api_token:4,stt_respons:4,stt_result_pars:4,stt_servic:4,support:[1,4],sure:[],tackl:3,take:4,technic:4,text:4,than:4,thei:4,thi:[0,3,4],thin:3,thread:[0,4],thu:0,token:4,tranform:[],transcrib:4,transcript:4,transform:[0,1,2],transfrom:[1,4],translat:4,tutori:2,two:4,type:[0,4],uml:3,understand:4,url:4,us_shortform_narrowbandmodel:4,use:[0,4],used:4,using:4,util:[0,2],valu:0,value1:4,vector:[0,4],w_bnu:[],wai:[3,4],waston:[3,4],watson:[0,1],watson_transform:4,watsonplatform:4,watsonservicetransform:[2,4],watsontransform:4,wav:4,what:2,when:[0,4],whenev:[0,4],whether:[0,4],which:[0,3,4],within:4,work:4,world:[],wrap:[0,3,4],wrapper:3,write:4,your:4},titles:["API Reference","Development","Welcome to watson tranfromer\u2019s documentation!","What is Watson Transformer?","Tutorial"],titleterms:{"class":0,The:[3,4],api:0,column:4,creat:4,design:3,dev:1,develop:1,document:2,essenti:4,flat:4,flatcolumntransform:0,ibm:4,init:[],initi:[],instal:[1,4],instanc:4,json:4,jsontransform:0,mode:1,nlu:4,pacakg:1,packag:4,parser:[],refer:0,result:[],servic:4,speech:[],stt:4,text:[],tranfrom:2,transform:[3,4],tutori:4,util:4,watson:[2,3,4],watsonservicetransform:0,welcom:2,what:3}})
--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
1 | API Reference
2 | **************
3 |
4 | This page gives the API reference of Watson Transformer Package.
5 |
6 | WatsonServiceTransformer Class
7 | ===============================
8 |
9 | **WatsonServiceTransformer** (*inputCol=None, outputCol=None, vectorization=False, max_workers=5, service=None*)
10 |
11 | **Base**:
12 | - ``pySpark.ml.pipeline.Transformer``
13 | - ``pyspark.ml.param.shared.HasInputCol``
14 | - ``pyspark.ml.param.shared.HasOutputCol``
15 | - ``pyspark.ml.util.DefaultParamsReadable``
16 | - ``pyspark.ml.util.DefaultParamsWritable``
17 |
18 | **Parameters**:
19 | - **inputCol**: The column name use as input data. ``required``
20 | - **outputCol**: The column name use to output the transformed data. ``required``
21 | - **vectorization**: Exploiting pyArrow in-memory dataframe. enable vectorization whenever is possible is recommend. The default value is ``False``.
22 | - **max_workers**: When vectorization is enabled, the maximum number of threads can be utilized to boost the performance. The default value is ``5``.
23 | - **service**: The API service instance that wrapped by the Watson Transformer. ``required``
24 |
25 | **Return**:
26 | ``WatsonServiceTransformer`` class instance
27 |
28 | **Return Type**:
29 | ``pySpark.ml.pipeline.Transformer``
30 |
31 | -----------------------------
32 |
33 | **- transform** (*dataframe*)
34 |
35 | **Parameters**:
36 | - **dataframe**: the pySpark dataframe recieve transformation
37 |
38 | **Return**:
39 | pySpark dataframe contains transformation result
40 |
41 | **Return Type**:
42 | ``pyspark.sql.DataFrame``
43 |
44 |
45 | .. note::
46 |
47 | The ``WatsonServiceTransformer`` is a custom implementation of pySpark transformer. The functions which are inherited from the
48 | pySpark transformer base classes have been implemented thus avaiable for use.
49 |
50 |
51 | FlatColumnTransformer Class
52 | ============================
53 |
54 | **FlatColumnTransformer** (*inputCol=None*)
55 |
56 | **Base**:
57 | - ``pySpark.ml.pipeline.Transformer``
58 | - ``pyspark.ml.param.shared.HasInputCol``
59 | - ``pyspark.ml.util.DefaultParamsReadable``
60 | - ``pyspark.ml.util.DefaultParamsWritable``
61 |
62 | **Parameters**:
63 | - **inputCol**: The column name use as input data. ``required``
64 |
65 | **Return**:
66 | ``FlatColumnTransformer`` class instance
67 |
68 | **Return Type**:
69 | ``pySpark.ml.pipeline.Transformer``
70 |
71 | -----------------------------
72 |
73 | **- transform** (*dataframe*)
74 |
75 | **Parameters**:
76 | - **dataframe**: the pySpark dataframe recieve transformation
77 |
78 | **Return**:
79 | pySpark dataframe contains the flattened data from input column
80 |
81 | **Return Type**:
82 | ``pyspark.sql.DataFrame``
83 |
84 |
85 | .. note::
86 |
87 | The ``FlatColumnTransformer`` is a custom implementation of pySpark transformer. The functions which are inherited from the
88 | pySpark transformer base classes have been implemented thus avaiable for use. This transformer will
89 | flatten the nested input column to multiple regular data columns.
90 |
91 |
92 | JSONTransformer Class
93 | =====================
94 |
95 | **JSONTransformer** (*inputCol=None, outputCol=None, removeInputCol=False, parser=None*)
96 |
97 | **Base**:
98 | - ``pySpark.ml.pipeline.Transformer``
99 | - ``pyspark.ml.param.shared.HasInputCol``
100 | - ``pyspark.ml.param.shared.HasOutputCol``
101 | - ``pyspark.ml.util.DefaultParamsReadable``
102 | - ``pyspark.ml.util.DefaultParamsWritable``
103 |
104 | **Parameters**:
105 | - **inputCol**: The column name use as input data. ``required``
106 | - **outputCol**: The column name use to output the transformed data. ``required``
107 | - **removeInputCol**: Whether or not remove the input column from output dataframe. The default value is ``False``.
108 | - **parser**: The object parse JSON data to data column(s). ``required``
109 |
110 | **Return**:
111 | ``JSONTransformer`` class instance
112 |
113 | **Return Type**:
114 | ``pySpark.ml.pipeline.Transformer``
115 |
116 | -----------------------------
117 |
118 | **- transform** (*dataframe*)
119 |
120 | **Parameters**:
121 | - **dataframe**: the pySpark dataframe recieve transformation
122 |
123 | **Return**:
124 | pySpark dataframe contains transformation result
125 |
126 | **Return Type**:
127 | ``pyspark.sql.DataFrame``
128 |
129 |
130 | .. note::
131 |
132 | The ``WatsonServiceTransformer`` is a custom implementation of pySpark transformer. The functions which are inherited from the
133 | pySpark transformer base classes have been implemented thus avaiable for use.
134 |
135 |
136 |
137 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 |
17 |
18 | # -- Project information -----------------------------------------------------
19 |
20 | project = 'watson tranfromer'
21 | copyright = '2020, Kai Niu'
22 | author = 'Kai Niu'
23 |
24 | import sphinx_rtd_theme
25 |
26 |
27 | # -- General configuration ---------------------------------------------------
28 |
29 | # Add any Sphinx extension module names here, as strings. They can be
30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
31 | # ones.
32 | extensions = ["sphinx_rtd_theme",
33 | ]
34 |
35 | # Add any paths that contain templates here, relative to this directory.
36 | templates_path = ['_templates']
37 |
38 | # List of patterns, relative to source directory, that match files and
39 | # directories to ignore when looking for source files.
40 | # This pattern also affects html_static_path and html_extra_path.
41 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
42 |
43 |
44 | # -- Options for HTML output -------------------------------------------------
45 |
46 | # The theme to use for HTML and HTML Help pages. See the documentation for
47 | # a list of builtin themes.
48 | #
49 | html_theme = "sphinx_rtd_theme"
50 | html_logo = 'misc/watson_transformer_logo.svg'
51 | html_theme_options = {
52 | 'canonical_url': '',
53 | 'logo_only': True,
54 | 'display_version': True,
55 | 'prev_next_buttons_location': 'bottom',
56 | 'style_external_links': False,
57 | 'vcs_pageview_mode': '',
58 | 'style_nav_header_background': '#2980b9',
59 | # Toc options
60 | 'collapse_navigation': True,
61 | 'sticky_navigation': True,
62 | 'navigation_depth': 4,
63 | 'includehidden': True,
64 | 'titles_only': False
65 | }
66 |
67 | # Add any paths that contain custom static files (such as style sheets) here,
68 | # relative to this directory. They are copied after the builtin static files,
69 | # so a file named "default.css" will overwrite the builtin "default.css".
70 | html_static_path = ['_static']
71 |
72 | # master document serve as landing page
73 | master_doc = 'index'
--------------------------------------------------------------------------------
/docs/development.rst:
--------------------------------------------------------------------------------
1 | Development
2 | =============
3 |
4 | To setup development envrioment and extend support Watson Transformer API to other API ....
5 |
6 |
7 | Install Pacakge in Dev Mode
8 | ---------------------------
9 |
10 | ::
11 |
12 | pip install watson-transfromer
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. watson tranfromer documentation master file, created by
2 | sphinx-quickstart on Mon Jul 27 22:03:12 2020.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to watson tranfromer's documentation!
7 | =============================================
8 |
9 | .. toctree::
10 | :maxdepth: 3
11 | :caption: Contents:
12 |
13 | intro
14 | tutorial
15 | api
16 | development
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/docs/intro.rst:
--------------------------------------------------------------------------------
1 | What is Watson Transformer?
2 | ===========================
3 | Watson Transformer solves the problem of consuming
4 | IBM Watson API services(STT, NLU, etc.) at scale by
5 | wrapping the service calls into the Spark transformer.
6 | In this way, The IBM services such as STT and NLU can
7 | build into the Spark ML pipeline, along with
8 | other transformers and estimators to tackle the big
9 | data challenge.
10 |
11 | The Design
12 | ----------
13 | As the UML chart illustrates, The Watson Transformer
14 | Class service as a thin wrapper around the IBM Waston
15 | API class. For extensibility purposes, the logic of
16 | consuming API service is defined in the Watson Service
17 | Class, which is an executable class. It enables any
18 | applicable API service to be wrapped into the transformer.
19 | On the other hand, the transformer handles mapping input
20 | data to API calls and parse the service response to
21 | data fields.
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/misc/Watson_Tranformer_Design.drawio:
--------------------------------------------------------------------------------
1 | 7Vttb+I4EP41SHsr7SmJCS8fC23vVuqqVbvS7n1Chhhw69iRYwrsr7+x4xBCEpqlvFSrVEiNJ3YSP888w3gcWmgYrv6ROJp/EwFhLc8JVi103fI81+114J+2rBNLt91NDDNJA9spMzzRX8QaHWtd0IDEuY5KCKZolDdOBOdkonI2LKVY5rtNBcvfNcIzUjA8TTArWn/QQM0Ta893Mvu/hM7m6Z1dx54JcdrZGuI5DsRyy4RuWmgohVDJUbgaEqbBS3FJxt1WnN08mCRc1RngvMrn+6h/j8ad4eTOG97Gz+pL208u84rZws7YPq1apxDMVcjgyG2hQaykeCFDwYQ055Bj/uCMvQyRiqwqH9DdTBv8hYiQKLmGLnZAp2eRsq7Ssc1lhrvbtbb5NubIGrHlera5dAYHHFhEfgcd9HHR8VAJPO2zwoN6BTRIAOqxTSHVXMwEx+wmsw6kWPCA6MtqYLI+d0JEFsdnotTahgK8UAJMWyiTFVU/t47/05f627et65W9smms0waH+f40HR2nnRqSkY6/MWSDTSs3+oFICrARaY3J1PV89xMK8IiFnJA9OKYRQmE5I2pfx265i0jCsKKv+Sc5vhrcohqGXusKoCaThcJj4NdzxPhZx2E4MQCMOgzmMxhLOJopg5q1pIZHohaSw7jv60gPv8UTJfTDJv3gQcfZ2EtJr5+XXrdEeX6J8vyTxSXvJEzEkeCxHvmAZUzkh+bAa5eQ4PrnJKFdIOHq4SsYtoA05g4OIbINLH7ONVZYOzolLIgLeAJCKh/v8rBywXUUnVLGdkyY0RmH5gQQ1VFqoPGmkMlc2RMhDQITgpdzqshThE1QWkLeVgjLR/m+6ucIQ06vSJh3VtV0CoR95bHCXFGsiCEuH4vuEwH9uRTtaMpFJRSdNbC5xXT0B1axMKRIzOOpkKGOTM6Q4bionnhJQ4YT+AVXaSbi1sFe6fxjMJlTFtzhtVjouYJ3TF7S1mAuJP0Fl8UZ71gqm6h4nVyPJz3SUiVJDH0eUl7cHdM3vMp1vMOxsoaJYAxHMR1vphFCjkD5QCglwuN5AnrzC84rTS07exzB3uwRJIT5zHwj2bt5O6ms65bE8l7J/bxO/naYAZUcxDvQ0ogL7reZ6Ds8shg0Wp4O45RHCwX6bqEr44OS8llVqPjd+MDIVFV6aAxRAe51Z/pctzPLowVKmwSMnTKz3JxDTCHcxA+Fk9wg8ZtIUK4Mkv4APoD3UOfRPjz4ENpu1oaP7i5hvhAwJabGqwj46ZLEqq4L7lF80THXecbf8sRdzzheROqW8L/DMqOGvYTltErgHkTx5gvAcvrdLIq+uAXeUZF3VMIxw2PCHkRMFRX6+jLpu8P9xej1vXr09k7Fbq9C3RDwy+QNXobcLOY2Sj+eK5RUe86r9FTYjdJPQW+vZiA/ldLTlUZB6bDQfaU6zdY63ySbT9ZalWg2cn+nP/Tbl5Z7WWLXyP1I9G5W9hfTe9nGhta7MstIKcJPAQA6lTgkfyXi37QbtR/fHYqVhVJ3QN6J/KHdL7D64fcqWrl9Cr+1f5cimd9RNiSQDc5vbkgkG2SX2pBIy1ANp3U49epyWiHl83DqFdfbtZOypvp3YPXPP6z8B9ZqL/gzyn9eVYFAQSbAm9rfu5KGqu3sj1L78/rNEuF09F669oeqKgKEBwaiRt2npP/i9T5U8jpLo+5j0Xvpel+a7hbULc0m/0jpLX4jcLF3l78R+KEecPEKH6rx9mYj8EPpvXiFDxXfv0oVnrx/NYqS19galZ/UDS5d2UNVld7Pn2FyOIwTB3h5NbOlsuZmzvb7i40n1PGEuhnd6Tyh6l2d0QioYaPRpyQK2Hr/dibQuMRJXKJfc413gEtAM/vtTFLuyX6BhG7+Bw==
--------------------------------------------------------------------------------
/docs/misc/Watson_Tranformer_Design.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/misc/Watson_Tranformer_Design.jpg
--------------------------------------------------------------------------------
/docs/misc/Watson_Tranformer_Design.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 | «executable object»Return Type Factory
«executable object»... «executable object»Response Parser
«executable object»... API Response => Data Fields
API Response => Data... Instantiate Return Type Object
Instantiate Return Type O... Watson Transformer Class + inputCol: string + outputCol: string + service: Watson Service Class + transfrom(dataframe): dataframe Watson Service Class + token: string + endpoint: string + return_type: object + response_parser: object + **params: kv pairs + __call__(object): return_type Viewer does not support full SVG 1.1
--------------------------------------------------------------------------------
/docs/misc/pipleline_benchmark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/misc/pipleline_benchmark.png
--------------------------------------------------------------------------------
/docs/misc/regular_udf_vs_vectorized_udf_.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/misc/regular_udf_vs_vectorized_udf_.png
--------------------------------------------------------------------------------
/docs/misc/watson_transformer_perf_full_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/misc/watson_transformer_perf_full_pipeline.png
--------------------------------------------------------------------------------
/docs/misc/watson_transformer_stt_perf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/misc/watson_transformer_stt_perf.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ibm-watson ~= 4.4.0
2 | botocore ~= 1.16.11
3 | ibm-cos-sdk ~= 2.7.0
4 | ibm-cos-sdk-core ~= 2.7.0
5 | ibm-cos-sdk-s3transfer ~= 2.7.0
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 |
2 | """
3 | "
4 | " publish package to pypl index
5 | " docu: https://realpython.com/pypi-publish-python-package/
6 | " docu: https://packaging.python.org/tutorials/packaging-projects/
7 | "
8 | """
9 | from setuptools import setup, Command, find_packages
10 | import os
11 |
12 | with open("README.md", "r") as fh:
13 | long_description = fh.read()
14 |
15 | """
16 | "
17 | " clean up build files. e.g. python setup.py clean
18 | " docu: https://stackoverflow.com/questions/3779915/why-does-python-setup-py-sdist-create-unwanted-project-egg-info-in-project-r
19 | "
20 | """
21 | class CleanCommand(Command):
22 | """Custom clean command to tidy up the project root."""
23 | user_options = []
24 | def initialize_options(self):
25 | pass
26 | def finalize_options(self):
27 | pass
28 | def run(self):
29 | os.system('rm -vrf ./build ./dist ./src/*.egg-info ./*.pyc ./*.tgz ./*.egg-info')
30 |
31 | setup(
32 | name='watson-transformer',
33 | version='0.0.17',
34 | license='BSD 2-Clause License',
35 | author='Kai Niu',
36 | author_email='kai.niu@ibm.com',
37 | description='wrap Watson API into pyspark transformers',
38 | long_description='wrap Watson API into pyspark transformers',
39 | long_description_content_type="text/markdown",
40 | url=" ",
41 | packages= find_packages(where='./src'),
42 | package_dir={
43 | '': 'src',
44 | },
45 | keywords=[
46 | 'pyspark', 'data science', 'pipeline'
47 | ],
48 | zip_safe=True,
49 | classifiers=[
50 | 'Programming Language :: Python :: 3',
51 | 'License :: OSI Approved :: MIT License',
52 | 'Operating System :: OS Independent',
53 | 'Programming Language :: Python',
54 | 'Programming Language :: Python :: 3',
55 | 'Programming Language :: Python :: 3.4',
56 | 'Programming Language :: Python :: 3.5',
57 | 'Programming Language :: Python :: 3.6',
58 | 'Programming Language :: Python :: 3.7',
59 | 'Programming Language :: Python :: Implementation :: CPython',
60 | 'Programming Language :: Python :: Implementation :: PyPy',
61 | 'Topic :: Utilities'
62 | ],
63 | python_requires='>=3.4',
64 | install_requires=[
65 | # eg: 'aspectlib==1.1.1', 'six>=1.7',
66 | 'ibm-watson == 5.2.0'
67 | ],
68 | extras_require={
69 | 'dev' : [''],
70 | 'test' : ['pytest', 'pytest-cov','mock']
71 | },
72 | cmdclass={
73 | 'clean': CleanCommand,
74 | }
75 | )
76 |
77 |
78 | """"
79 | To build package:
80 | 1. move to project root directory
81 | 2. python3 setup.py sdist bdist_wheel
82 | 3. check dist/ folder
83 | 4. python3 -m pip install --user --upgrade twine (optional)
84 | 5. python3 -m twine upload dist/*
85 | 6. python3 -m setup.py clean
86 | """
87 |
--------------------------------------------------------------------------------
/src/watson_transformer/__init__.py:
--------------------------------------------------------------------------------
1 | from watson_transformer.watson_service_transformer import WatsonServiceTransformer
2 | from watson_transformer.flat_column_transformer import FlatColumnTransformer
3 | from watson_transformer.json_transformer import JSONTransformer
--------------------------------------------------------------------------------
/src/watson_transformer/contrib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/src/watson_transformer/contrib/__init__.py
--------------------------------------------------------------------------------
/src/watson_transformer/contrib/nlu/__init__.py:
--------------------------------------------------------------------------------
1 | from watson_transformer.contrib.nlu.default_nlu_parser import DefaultNLUParser
--------------------------------------------------------------------------------
/src/watson_transformer/contrib/nlu/default_nlu_parser.py:
--------------------------------------------------------------------------------
1 | import json
2 | import numbers
3 | from watson_transformer.contrib.response_base import ResponseBase
4 | from pyspark.sql.types import StringType, FloatType, StructType, StructField, Row
5 | from pyspark.sql import functions as F
6 |
7 |
8 | """
9 | "
10 | " default STT output interpreter which assume one alternative transcript and no speaker detection
11 | "
12 | """
13 |
14 | class DefaultNLUParser(ResponseBase):
15 |
16 | def __init__(self, keywords_limit, concepts_limit):
17 | """
18 | @param::keywords_limit: the max number of keywords extracted
19 | @param::concepts_limit: the max number of concepts extracted
20 | @return: none
21 | """
22 | super(DefaultNLUParser, self).__init__()
23 | if not isinstance(keywords_limit, numbers.Number):
24 | raise ValueError('> DefaultNLUParser: keywords_limit must be numeric.')
25 | if not isinstance(concepts_limit, numbers.Number):
26 | raise ValueError('> DefaultNLUParser: concepts_limit must be numeric.')
27 | if keywords_limit <= 0:
28 | raise ValueError('> DefaultNLUParser: keywords_limit must be greater than 0.')
29 | if concepts_limit <= 0:
30 | raise ValueError('> DefaultNLUParser: concepts_limit must be greater than 0.')
31 | self.keywords_limit = keywords_limit
32 | self.concepts_limit = concepts_limit
33 |
34 | """
35 | "
36 | " default NLU output formatter which parse keywords, concepts, sentiment and emotion
37 | "
38 | """
39 | def __call__(self, json_dumps):
40 | """
41 | @param::output: the output json object from STT
42 | @return:the transcript join by period in string format
43 | """
44 | data = {}
45 | valid_json_dumps = True
46 | try:
47 | json_data = json.loads(json_dumps)
48 | except:
49 | valid_json_dumps = False
50 |
51 | if valid_json_dumps and json_data:
52 | # extract keyword data
53 | if "keywords" in json_data:
54 | for i in range(self.keywords_limit):
55 | if i < len(json_data["keywords"]):
56 | kw = json_data["keywords"][i]
57 | data['keyword_%d'%(i)] = kw['text']
58 | data['keyword_%d_score'%(i)] = kw['relevance']
59 | else: # when less keywords extracted than limit
60 | data['keyword_%d'%(i)] = None
61 | data['keyword_%d_score'%(i)] = None
62 | else:
63 | for i in range(self.keywords_limit):
64 | data['keyword_%d'%(i)] = None
65 | data['keyword_%d_score'%(i)] = None
66 |
67 | # extract concept
68 | if "concepts" in json_data:
69 | for i in range(self.concepts_limit):
70 | if i < len(json_data["concepts"]):
71 | concept = json_data["concepts"][i]
72 | data['concept_%d'%(i)] = concept['text']
73 | data['concept_%d_score'%(i)] = concept['relevance']
74 | else: # when less concept extracted than limit
75 | data['concept_%d'%(i)] = None
76 | data['concept_%d_score'%(i)] = None
77 | else:
78 | for i in range(self.concepts_limit):
79 | data['concept_%d'%(i)] = None
80 | data['concept_%d_score'%(i)] = None
81 |
82 | # extract sentiment
83 | if "sentiment" in json_data:
84 | data["sentiment_score"] = json_data["sentiment"]["document"]["score"]
85 | data["sentiment_label"] = json_data["sentiment"]["document"]["label"]
86 | else:
87 | data["sentiment_score"] = None
88 | data["sentiment_label"] = None
89 |
90 | # extract "emotion"
91 | if "emotion" in json_data:
92 | data["sadness_score"] = json_data["emotion"]["document"]["emotion"]["sadness"]
93 | data["joy_score"] = json_data["emotion"]["document"]["emotion"]["joy"]
94 | data["fear_score"] = json_data["emotion"]["document"]["emotion"]["fear"]
95 | data["disgust_score"] = json_data["emotion"]["document"]["emotion"]["disgust"]
96 | data["anger_score"] = json_data["emotion"]["document"]["emotion"]["anger"]
97 | else:
98 | data["sadness_score"] = None
99 | data["joy_score"] = None
100 | data["fear_score"] = None
101 | data["disgust_score"] = None
102 | data["anger_score"] = None
103 |
104 | # case 1: json dumps is invalid
105 | # case 2: json dumps is valid but json data is invalid for the parser
106 | if not data:
107 | # extract keyword data
108 | for i in range(self.keywords_limit):
109 | data['keyword_%d'%(i)] = None
110 | data['keyword_%d_score'%(i)] = None
111 | # extract concept
112 | for i in range(self.concepts_limit):
113 | data['concept_%d'%(i)] = None
114 | data['concept_%d_score'%(i)] = None
115 | # extract sentiment
116 | data["sentiment_score"] = None
117 | data["sentiment_label"] = None
118 | # extract "emotion"
119 | data["sadness_score"] = None
120 | data["joy_score"] = None
121 | data["fear_score"] = None
122 | data["disgust_score"] = None
123 | data["anger_score"] = None
124 |
125 | return Row(**data)
126 |
127 | """
128 | "
129 | " return the default NLU return type
130 | "
131 | """
132 | def get_return_type(self):
133 | """
134 | @param::num_keywords: the number of keywords extracted by NLU
135 | @param::num_concpets: the number of concepts extracted by NLU
136 | @return: the defined return type and Pandas UDF data type
137 | """
138 | fields = []
139 | # populate keyword fields
140 | for i in range(self.keywords_limit):
141 | fields.append(StructField("keyword_%d"%(i), StringType(), True))
142 | fields.append(StructField("keyword_%d_score"%(i), FloatType(), True))
143 |
144 | # populate concpet fields
145 | for i in range(self.concepts_limit):
146 | fields.append(StructField("concept_%d"%(i), StringType(), True))
147 | fields.append(StructField("concept_%d_score"%(i), FloatType(), True))
148 |
149 | # populate other fields
150 | fields.extend([StructField("sentiment_score", FloatType(), True),
151 | StructField("sentiment_label", StringType(), True),
152 | StructField("sadness_score", FloatType(), True),
153 | StructField("joy_score", FloatType(), True),
154 | StructField("fear_score", FloatType(), True),
155 | StructField("disgust_score", FloatType(), True),
156 | StructField("anger_score", FloatType(), True)])
157 |
158 | return StructType(fields)
159 |
--------------------------------------------------------------------------------
/src/watson_transformer/contrib/readers.py:
--------------------------------------------------------------------------------
1 | import types
2 | from botocore.client import Config
3 | import ibm_boto3
4 | def __iter__(self): return 0
5 |
6 | """
7 | "
8 | " IBM COS reader
9 | " boto3 is not thread safe(https://github.com/boto/botocore/issues/1246)
10 | " create new session in each thread(ref: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html)
11 | "
12 | """
13 | def ibm_cos_reader(audio_file, bucket, token, endpoint):
14 | """
15 | @param::audio_file: the audio file uid
16 | @param::bukcet: the bucket name in which the audio file is stored
17 | @param::token: the API access token for IBM COS service
18 | @param::endpoint: the URL to access IBM COS service
19 | @return: the audio stream
20 | """
21 | session = ibm_boto3.session.Session()
22 | cos_client = session.client(service_name='s3',
23 | ibm_api_key_id=token,
24 | ibm_auth_endpoint="https://iam.ng.bluemix.net/oidc/token",
25 | config=Config(signature_version='oauth'),
26 | endpoint_url=endpoint)
27 | audio_stream = cos_client.get_object(Bucket=bucket, Key=audio_file)['Body']
28 | if not hasattr(audio_stream, "__iter__"): audio_stream.__iter__ = types.MethodType( __iter__, audio_stream)
29 | return audio_stream
30 |
31 |
32 | """
33 | "
34 | " Local file reader
35 | "
36 | """
37 | def local_fs_reader(audio_file):
38 | """
39 | @param::audio_file: the full path including filename to the audio file
40 | @return: the filestream of audio file
41 | """
42 | return open(audio_file, "rb")
--------------------------------------------------------------------------------
/src/watson_transformer/contrib/response_base.py:
--------------------------------------------------------------------------------
1 | """
2 | "
3 | " response base class
4 | "
5 | """
6 | class ResponseBase():
7 |
8 | def __call__(self, json_data):
9 | raise NotImplementedError('> ResponseBase class: __call__ method is not implemented.')
10 |
11 | def get_return_type(self):
12 | raise NotImplementedError('> ResponseBase class: get_return_type method is not implemented.')
13 |
--------------------------------------------------------------------------------
/src/watson_transformer/contrib/stt/__init__.py:
--------------------------------------------------------------------------------
1 | from watson_transformer.contrib.stt.default_stt_parser import DefaultSTTParser
--------------------------------------------------------------------------------
/src/watson_transformer/contrib/stt/default_stt_parser.py:
--------------------------------------------------------------------------------
1 |
2 | import json
3 | from watson_transformer.contrib.response_base import ResponseBase
4 | from pyspark.sql.types import StringType, FloatType, StructType, StructField, Row
5 | from pyspark.sql import functions as F
6 |
7 | """
8 | "
9 | " default STT output interpreter which assume one alternative transcript and no speaker detection
10 | "
11 | """
12 |
13 | class DefaultSTTParser(ResponseBase):
14 |
15 | def __init__(self):
16 | super(DefaultSTTParser, self).__init__()
17 |
18 |
19 | """
20 | "
21 | " pass the json response to datatype
22 | "
23 | """
24 | def __call__(self, json_response):
25 | """
26 | @param::output: the output json object from STT
27 | @return:the transcript join by period in string format
28 | """
29 | if json_response:
30 | try:
31 | response = json.loads(json_response)
32 | if 'results'in response:
33 | transcripts = [doc['alternatives'][0]['transcript'].strip() for doc in response['results']]
34 | return '. '.join(transcripts) + '.'
35 | else:
36 | # if result attribute is not present in the response
37 | # it is probably the speech recording has more than 30s in slient from the begining
38 | return None
39 | except:
40 | return None
41 | else:
42 | return None
43 |
44 | """
45 | "
46 | " return the default STT return type
47 | "
48 | """
49 | def get_return_type(self):
50 | """
51 | @return: the defined return type
52 | """
53 | return StringType()
--------------------------------------------------------------------------------
/src/watson_transformer/flat_column_transformer.py:
--------------------------------------------------------------------------------
1 | """
2 | "
3 | " explode the struct column to multiple simple columns
4 | " @ref: https://stackoverflow.com/questions/47669895/how-to-add-multiple-columns-using-udf?rq=1
5 | "
6 | """
7 |
8 | from pyspark import keyword_only
9 | from pyspark.sql import functions as F
10 | from pyspark.sql.types import StringType, FloatType, StructType, StructField, Row
11 | from pyspark.sql import DataFrame
12 | from pyspark.ml.pipeline import Transformer
13 | from pyspark.ml.param.shared import HasInputCol, HasOutputCol, Param
14 | from pyspark.ml.util import DefaultParamsReadable, DefaultParamsWritable
15 |
16 |
17 |
18 | class FlatColumnTransformer(Transformer,
19 | HasInputCol,
20 | DefaultParamsReadable,
21 | DefaultParamsWritable):
22 |
23 | """
24 | "
25 | " set init transformer and set parameters
26 | "
27 | """
28 | @keyword_only
29 | def __init__(self,
30 | inputCol=None):
31 | """
32 | @param::inputCol: the input column name contains sound file name
33 | @return: none
34 | """
35 | super(FlatColumnTransformer, self).__init__()
36 | kwargs = self._input_kwargs
37 | self._set(**kwargs)
38 |
39 | """
40 | "
41 | " set parameters, called from self._set(), inherited method.
42 | "
43 | """
44 | @keyword_only
45 | def setParams(self, inputCol=None):
46 | """
47 | @param::inputCol: the input column name contains sound file name
48 | @return: none
49 | """
50 | kwargs = self._input_kwargs
51 | return self._set(**kwargs)
52 |
53 | """
54 | "
55 | " set input columns name
56 | "
57 | """
58 | def setInputCol(self, value):
59 | """
60 | @param::value: the input columns name contains the sound file name
61 | @return: None
62 | """
63 | return self._set(inputCol=value)
64 |
65 |
66 | """
67 | "
68 | " perform the transform using provided IBM service api
69 | "
70 | """
71 | def _transform(self, df:DataFrame) -> DataFrame:
72 | """
73 | @param::df: the pyspark dataframe
74 | @return: the transformed dataframe
75 | """
76 | cols = df.columns
77 | if self.getInputCol() in cols:
78 | cols.remove(self.getInputCol())
79 | return df.withColumn('__explode_col_output__',F.explode(F.array(F.col(self.getInputCol())))) \
80 | .select(*cols, "__explode_col_output__.*")
81 | else:
82 | raise ValueError("> FlatColumnTransformer class: inputCol is not in the dataframe.")
--------------------------------------------------------------------------------
/src/watson_transformer/json_transformer.py:
--------------------------------------------------------------------------------
1 | """
2 | "
3 | " pyspark transformer covert JSON data to data column(s)
4 | " @ref: https://stackoverflow.com/questions/41399399/serialize-a-custom-transformer-using-python-to-be-used-within-a-pyspark-ml-pipel/52467470#52467470
5 | " @ref: https://stackoverflow.com/questions/32331848/create-a-custom-transformer-in-pyspark-ml
6 | "
7 | """
8 | import pandas as pd
9 | from concurrent.futures import ThreadPoolExecutor
10 | from pyspark import keyword_only
11 | from pyspark.sql import functions as F
12 | from pyspark.sql.types import StringType, FloatType, StructType, StructField, Row
13 | from pyspark.sql import DataFrame
14 | from pyspark.ml.pipeline import Transformer
15 | from pyspark.ml.param.shared import HasInputCol, HasOutputCol, Param
16 | from pyspark.ml.util import DefaultParamsReadable, DefaultParamsWritable
17 |
18 |
19 | class JSONTransformer(Transformer,
20 | HasInputCol,
21 | HasOutputCol,
22 | DefaultParamsReadable,
23 | DefaultParamsWritable):
24 |
25 | """
26 | "
27 | " set init transformer and set parameters
28 | "
29 | """
30 | @keyword_only
31 | def __init__(self,
32 | inputCol=None,
33 | outputCol=None,
34 | removeInputCol=False,
35 | parser=None):
36 | """
37 | @param::inputCol: the input column name contains sound file name
38 | @param::outputCol: the output column name
39 | @param::removeInputCol: flag indicate whether remove input columns
40 | @param::parser: parser object parse JSON data to data column(s)
41 | @return: none
42 | """
43 |
44 | super(JSONTransformer, self).__init__()
45 | self.parser = Param(self, "parser", None)
46 | self._setDefault(parser=None)
47 | self.removeInputCol = Param(self, "removeInputCol", False)
48 | self._setDefault(removeInputCol=False)
49 | kwargs = self._input_kwargs
50 | self._set(**kwargs)
51 |
52 | # make sure parameter: token, endpoint set properly.
53 | if not callable(parser):
54 | raise ValueError('> The parser instance provided must be callable object.')
55 | if not inputCol or not inputCol.strip():
56 | raise ValueError('> The input column name is required.')
57 | if not outputCol or not outputCol.strip():
58 | raise ValueError('> The output column name is required.')
59 |
60 |
61 |
62 | # make sure parameter: token, endpoint set properly.
63 | if parser == None:
64 | raise ValueError('> JSONTransformer Class: JSON data parser object must be provided.')
65 |
66 | """
67 | "
68 | " set parameters, called from self._set(), inherited method.
69 | "
70 | """
71 | @keyword_only
72 | def setParams(self):
73 | """
74 | @param:: None
75 | @return: none
76 | """
77 | kwargs = self._input_kwargs
78 | return self._set(**kwargs)
79 |
80 | """
81 | "
82 | " set whether or not remove input column from dataframe
83 | "
84 | """
85 | def setRemoveInputCol(self, value):
86 | """
87 | @param::value: boolean value indcating enable vectorized udf
88 | @return: None
89 | """
90 | return self._set(removeInputCol=value)
91 |
92 | """
93 | "
94 | " get enable state of vectorized udf
95 | "
96 | """
97 | def getRemoveInputCol(self):
98 | """
99 | @param:: None
100 | @return: vectorization flag
101 | """
102 | return self.getOrDefault(self.removeInputCol)
103 |
104 | """
105 | "
106 | " set the parser object
107 | "
108 | """
109 | def setParser(self, value):
110 | """
111 | @param::value: the IBM service API object
112 | @return: None
113 | """
114 | return self._set(parser=value)
115 |
116 | """
117 | "
118 | " get the parser object
119 | "
120 | """
121 | def getParser(self):
122 | """
123 | @param:None
124 | @return: the configured service object
125 | """
126 | return self.getOrDefault(self.parser)
127 |
128 | """
129 | "
130 | " set input columns name
131 | "
132 | """
133 | def setInputCol(self, value):
134 | """
135 | @param::value: the input columns name contains the sound file name
136 | @return: None
137 | """
138 | return self._set(inputCol=value)
139 |
140 | """
141 | "
142 | " set output columns name
143 | "
144 | """
145 | def setOutputCol(self, value):
146 | """
147 | @param::value: the output column name contains the output result
148 | @return: None
149 | """
150 | return self._set(outputCol=value)
151 |
152 |
153 | """
154 | "
155 | " perform the transform
156 | "
157 | """
158 | def _transform(self, df:DataFrame) -> DataFrame:
159 | """
160 | @param::df: the pyspark dataframe
161 | @return: the transformed dataframe
162 | """
163 | # get the new service instance
164 | parser = self.getParser()
165 | outputCol = self.getOutputCol()
166 | inputCol = self.getInputCol()
167 | removeInputCol = self.getRemoveInputCol()
168 | return_type = parser.get_return_type()
169 | # CPU bounded task, not going to benefit from vectorized UDF very much
170 | parser_udf = F.udf(parser, return_type).asNondeterministic()
171 | df = df.withColumn(outputCol, parser_udf(F.col(inputCol)))
172 | # drop input column based on flag
173 | if removeInputCol:
174 | df = df.drop(self.getInputCol())
175 | return df
--------------------------------------------------------------------------------
/src/watson_transformer/service/__init__.py:
--------------------------------------------------------------------------------
1 | from watson_transformer.service.nlu import NLU
2 | from watson_transformer.service.stt import STT
--------------------------------------------------------------------------------
/src/watson_transformer/service/nlu.py:
--------------------------------------------------------------------------------
1 | """
2 | "
3 | " IBM NLU Service Executable Class
4 | " docu: https://cloud.ibm.com/apidocs/natural-language-understanding?code=python#analyze-text
5 | "
6 | """
7 |
8 | import json
9 | from pyspark import keyword_only
10 | from ibm_watson import NaturalLanguageUnderstandingV1
11 | from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
12 | from ibm_cloud_sdk_core import ApiException
13 | from ibm_watson.natural_language_understanding_v1 import Features, KeywordsOptions, ConceptsOptions, SentimentOptions, EmotionOptions
14 | from pyspark.sql.types import StringType
15 | from watson_transformer.service.service_base import ServiceBase
16 |
17 | class NLU(ServiceBase):
18 |
19 | @keyword_only
20 | def __init__(self, token, endpoint, strict_mode=True, **params):
21 | """
22 | @param::token: the IBM NLU API access token
23 | @param::endpoint: the endpoint url for the NLU API
24 | @param::params: the kv params passing to underlying NaturalLanguageUnderstandingV1 constructor
25 | @return: the output parsed by parser object
26 | """
27 | super(NLU, self).__init__(strict_mode)
28 | self.token = token
29 | self.endpoint = endpoint
30 | self.params = params
31 |
32 | def __call__(self, text):
33 | """
34 | @param::text: the text to perform NLU
35 | @return: the output formatted by formatter object
36 | """
37 |
38 | if text:
39 | # init nlu client
40 | authenticator = IAMAuthenticator(self.token)
41 | nlu = NaturalLanguageUnderstandingV1( version='2019-07-12',authenticator=authenticator)
42 | nlu.set_service_url(self.endpoint)
43 |
44 | try:
45 | response = nlu.analyze(text = text, **self.params).get_result()
46 | except ApiException:
47 | response = None # better to log such execeptions separately
48 | except Exception:
49 | if self.strict_mode:
50 | raise RuntimeError("*** runtime error caused by input: '%s'"%(text))
51 | else:
52 | response = None
53 |
54 | return json.dumps(response) if response else None
55 | else:
56 | return None
57 |
58 | def get_return_type(self):
59 | return StringType()
60 |
61 | def get_new_client(self):
62 | return NLU(token = self.token,
63 | endpoint = self.endpoint,
64 | strict_mode = self.strict_mode,
65 | **self.params)
--------------------------------------------------------------------------------
/src/watson_transformer/service/service_base.py:
--------------------------------------------------------------------------------
1 | """
2 | "
3 | " define the contract/interfact
4 | "
5 | """
6 |
7 | class ServiceBase():
8 | def __init__(self, strict_mode=True):
9 | self.strict_mode = strict_mode
10 |
11 | def __call__(self, data):
12 | raise NotImplementedError('> service class __call__ method is not implemented.')
13 |
14 | def get_return_type(self, data):
15 | raise NotImplementedError('> service class get_return_type method is not implemented.')
16 |
17 | def get_new_client(self):
18 | raise NotImplementedError('> service class get_new_client method is not implemented.')
19 |
20 |
--------------------------------------------------------------------------------
/src/watson_transformer/service/stt.py:
--------------------------------------------------------------------------------
1 | """
2 | "
3 | " IBM STT Service Executable Class
4 | " docu: https://cloud.ibm.com/apidocs/speech-to-text
5 | "
6 | """
7 |
8 | import json
9 | from pyspark import keyword_only
10 | from ibm_watson import SpeechToTextV1
11 | from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
12 | from ibm_cloud_sdk_core.api_exception import ApiException
13 | from pyspark.sql.types import StringType
14 | from watson_transformer.service.service_base import ServiceBase
15 |
16 | class STT(ServiceBase):
17 |
18 | @keyword_only
19 | def __init__(self, token, endpoint, reader, strict_mode=True, **params):
20 | """
21 | @param::token: the IBM STT API access token
22 | @param::endpoint: the endpoint url for the STT API
23 | @param::reader: the object read audio stream using audio file name/id
24 | @param::params: the kv params passing to underlying SpeechToTextV1 constructor
25 | @return: the output formatted by formatter executable
26 | """
27 | super(STT, self).__init__(strict_mode)
28 | self.token = token
29 | self.endpoint = endpoint
30 | self.reader = reader
31 | self.params = params
32 |
33 | def __call__(self, audio_file):
34 | """
35 | @param::audio_file: the audio filename/id for reader to retrieve the audio stream
36 | @return: the output formatted by formatter object
37 | """
38 | if audio_file:
39 | # load asset
40 | audio_stream = self.reader(audio_file)
41 | # check if audio stream is valid
42 | if not audio_stream:
43 | return None
44 |
45 | # init stt client
46 | authenticator = IAMAuthenticator(self.token)
47 | stt = SpeechToTextV1(authenticator=authenticator)
48 | stt.set_service_url(self.endpoint)
49 |
50 | # send the request
51 | try:
52 | response = stt.recognize(audio=audio_stream,**self.params).get_result()
53 | except ApiException as api_ex:
54 | response = {'api_error_message': str(api_ex)} # less likely recoverable if it is STT API error
55 | except Exception as ex:
56 | if self.strict_mode:
57 | raise RuntimeError("*** runtime error caused by input: '%s'"%(audio_file)) # maybe recoverable by retry
58 | else:
59 | response = {'error_message': str(ex)}
60 | return json.dumps(response) if response else None
61 | else:
62 | return None
63 |
64 | def get_return_type(self):
65 | """
66 | @param::output_col: output column name
67 | @return: the output type struct
68 | """
69 | return StringType()
70 |
71 | def get_new_client(self):
72 | return STT(token = self.token,
73 | endpoint = self.endpoint,
74 | reader = self.reader,
75 | strict_mode = self.strict_mode,
76 | **self.params)
--------------------------------------------------------------------------------
/src/watson_transformer/watson_service_transformer.py:
--------------------------------------------------------------------------------
1 | """
2 | "
3 | " pyspark transformer consume IBM Watson service
4 | " @ref: https://stackoverflow.com/questions/41399399/serialize-a-custom-transformer-using-python-to-be-used-within-a-pyspark-ml-pipel/52467470#52467470
5 | " @ref: https://stackoverflow.com/questions/32331848/create-a-custom-transformer-in-pyspark-ml
6 | "
7 | """
8 | import os
9 | import numbers
10 | import pandas as pd
11 | from concurrent.futures import ThreadPoolExecutor
12 | from pyspark import keyword_only
13 | from pyspark.sql import functions as F
14 | from pyspark.sql.types import StringType
15 | from pyspark.sql import DataFrame
16 | from pyspark.ml.pipeline import Transformer
17 | from pyspark.ml.param.shared import HasInputCol, HasOutputCol, Param
18 | from pyspark.ml.util import DefaultParamsReadable, DefaultParamsWritable
19 |
20 |
21 | class WatsonServiceTransformer(Transformer,
22 | HasInputCol,
23 | HasOutputCol,
24 | DefaultParamsReadable,
25 | DefaultParamsWritable):
26 |
27 | """
28 | "
29 | " set init transformer and set parameters
30 | "
31 | """
32 | @keyword_only
33 | def __init__(self,
34 | inputCol=None,
35 | outputCol=None,
36 | vectorization=False,
37 | max_workers=5,
38 | service=None):
39 | """
40 | @param::inputCol: the input column name contains sound file name
41 | @param::outputCol: the output column name
42 | @param::vectorizaton: flag indicate whether enable vectorization
43 | @param::max_workders: the max number of workers for each task
44 | @param::service: the IBM service object
45 | @return: none
46 | """
47 |
48 | super(WatsonServiceTransformer, self).__init__()
49 | self.service = Param(self, "service", None)
50 | self._setDefault(service=None)
51 | self.vectorization = Param(self, "vectorization", False)
52 | self._setDefault(vectorization=False)
53 | self.max_workers = Param(self, "max_workers", 5)
54 | self._setDefault(max_workers=5)
55 | kwargs = self._input_kwargs
56 | self._set(**kwargs)
57 |
58 | # make sure parameter: token, endpoint set properly.
59 | if not callable(service):
60 | raise ValueError('> The service instance provided must be callable object.')
61 | if not isinstance(max_workers, numbers.Number) or max_workers <= 0:
62 | raise ValueError('> The number of maximum workers must greater than 0.')
63 | if not inputCol or not inputCol.strip():
64 | raise ValueError('> The input column name is required.')
65 | if not outputCol or not outputCol.strip():
66 | raise ValueError('> The output column name is required.')
67 |
68 | """
69 | "
70 | " set parameters, called from self._set(), inherited method.
71 | "
72 | """
73 | @keyword_only
74 | def setParams(self):
75 | """
76 | @param:: None
77 | @return: none
78 | """
79 | kwargs = self._input_kwargs
80 | return self._set(**kwargs)
81 |
82 | """
83 | "
84 | " set whether or not enable vectorized udf
85 | "
86 | """
87 | def setVectorization(self, value):
88 | """
89 | @param::value: boolean value indcating enable vectorized udf
90 | @return: None
91 | """
92 | return self._set(vectorization=value)
93 |
94 | """
95 | "
96 | " get enable state of vectorized udf
97 | "
98 | """
99 | def getVectorization(self):
100 | """
101 | @param::None
102 | @return: vectorization flag
103 | """
104 | return self.getOrDefault(self.vectorization)
105 |
106 | """
107 | "
108 | " set the maximum numbers of workder in each task
109 | "
110 | """
111 | def setMax_workers(self, value):
112 | """
113 | @param::value: unsigned int indicate the max number of workers
114 | @return: None
115 | """
116 | return self._set(max_workers=value)
117 |
118 | """
119 | "
120 | " get the max number of workers in each task
121 | "
122 | """
123 | def getMax_workers(self):
124 | """
125 | @param::None
126 | @return: the configured max workers
127 | """
128 | return self.getOrDefault(self.max_workers)
129 |
130 | """
131 | "
132 | " set the API service object
133 | "
134 | """
135 | def setService(self, value):
136 | """
137 | @param::value: the IBM service API object
138 | @return: None
139 | """
140 | return self._set(service=value)
141 |
142 | """
143 | "
144 | " get the API service object
145 | "
146 | """
147 | def getService(self):
148 | """
149 | @param:None
150 | @return: the configured service object
151 | """
152 | return self.getOrDefault(self.service)
153 |
154 | """
155 | "
156 | " set input columns name
157 | "
158 | """
159 | def setInputCol(self, value):
160 | """
161 | @param::value: the input columns name contains the sound file name
162 | @return: None
163 | """
164 | return self._set(inputCol=value)
165 |
166 | """
167 | "
168 | " set output columns name
169 | "
170 | """
171 | def setOutputCol(self, value):
172 | """
173 | @param::value: the output column name contains the output result
174 | @return: None
175 | """
176 | return self._set(outputCol=value)
177 |
178 |
179 | """
180 | "
181 | " perform the transform using provided IBM service api
182 | "
183 | " exploit arrow and vectorized udf
184 | " ref: https://spark.apache.org/docs/latest/sql-pyspark-pandas-with-arrow.html
185 | "
186 | " to prevent udf from being called multiple times, use asNondeterministic()
187 | " issue: https://github.com/apache/spark/pull/19929/files/cc309b0ce2496365afd8c602c282e3d84aeed940
188 | " ref:https://stackoverflow.com/questions/58696198/spark-udf-executed-many-times
189 | "
190 | """
191 | def _transform(self, df:DataFrame) -> DataFrame:
192 | """
193 | @param::df: the pyspark dataframe
194 | @return: the transformed dataframe
195 | """
196 | # get the new service instance
197 | service = self.getService()
198 | enable_vectorization = self.getVectorization()
199 | max_workers = max(self.getMax_workers(), 1)
200 | return_type = service.get_return_type()
201 |
202 | # define the (Vectorized) UDF
203 | if enable_vectorization:
204 | # vectorized udf
205 | @F.pandas_udf(return_type, F.PandasUDFType.SCALAR)
206 | def vectorized_udf(input_data):
207 | os.environ['ARROW_PRE_0_15_IPC_FORMAT']='1'
208 | results = []
209 | with ThreadPoolExecutor(max_workers=max_workers) as executor:
210 | results = executor.map(lambda data:service.get_new_client()(data), input_data)
211 | return pd.Series(results)
212 | else:
213 | # regular udf
214 | default_udf = F.udf(lambda data:service.get_new_client()(data), return_type)
215 | udf = vectorized_udf if enable_vectorization else default_udf
216 | udf = udf.asNondeterministic() # prevent udf from being called mutliple times
217 | df = df.withColumn(self.getOutputCol(), udf(F.col(self.getInputCol())))
218 | return df
219 |
--------------------------------------------------------------------------------
/test/README.md:
--------------------------------------------------------------------------------
1 | https://docs.pytest.org/en/latest/goodpractices.html
--------------------------------------------------------------------------------
/test/contrib/stt/test_default_stt_parser.py:
--------------------------------------------------------------------------------
1 | """
2 | "
3 | " test default STT response type
4 | "
5 | """
6 | import json
7 | import pytest
8 | from unittest import mock
9 | from watson_transformer.contrib.stt.default_stt_parser import DefaultSTTParser
10 |
11 | @pytest.fixture(scope='function')
12 | def mock_input(request):
13 | response = {
14 | 'results':[
15 | {
16 | 'alternatives':[
17 | {'transcript':'foo'},
18 | {'transcript':'bar'}
19 | ]
20 | },
21 | {
22 | 'alternatives':[
23 | {'transcript':'joe'},
24 | {'transcript':'joy'}
25 | ]
26 | }
27 | ]
28 | }
29 | return json.dumps(response)
30 |
31 | class TestDefaultSTTParser():
32 |
33 | def test_invalid_input(self):
34 | # arrange
35 | parser = DefaultSTTParser()
36 | invalide_json = json.dumps({'foo':'bar'})
37 | for value in [None, invalide_json, 1, 'null', []]:
38 | # act
39 | data = parser(value)
40 | # assert
41 | assert data == None
42 |
43 | def test_valid_input(self, mock_input):
44 | # arrange
45 | parser = DefaultSTTParser()
46 | input_data = mock_input
47 | # act
48 | data = parser(input_data)
49 | # assert
50 | assert data == 'foo. joe.'
51 | assert 'bar' not in data
52 | assert 'joy' not in data
53 |
54 |
--------------------------------------------------------------------------------
/test/contrib/test_response_base.py:
--------------------------------------------------------------------------------
1 | """
2 | "
3 | " test response base class
4 | "
5 | """
6 | import pytest
7 | from unittest import mock
8 | from watson_transformer.contrib.response_base import ResponseBase
9 |
10 | class TestResponseBase():
11 |
12 | def test_callable_not_implemented(self):
13 | # arrange
14 | res = ResponseBase()
15 | # act
16 | with pytest.raises(NotImplementedError) as exinfo:
17 | res(None)
18 | # assert
19 | assert '__call__' in str(exinfo.value)
20 |
21 | def test_get_return_type_implemented(self):
22 | # arrange
23 | res = ResponseBase()
24 | # act
25 | with pytest.raises(NotImplementedError) as exinfo:
26 | res.get_return_type()
27 | # assert
28 | assert 'get_return_type' in str(exinfo.value)
--------------------------------------------------------------------------------
/test/service/test_nlu.py:
--------------------------------------------------------------------------------
1 | """
2 | "
3 | " Test NLU Service
4 | "
5 | """
6 |
7 | import json
8 | import pytest
9 | from unittest import mock
10 | from ibm_cloud_sdk_core import ApiException
11 | from watson_transformer.service.nlu import NLU
12 |
13 |
14 | class TestNLU():
15 |
16 | def test_nlu_init(self):
17 | # arange
18 | token = 'foo'
19 | endpoint = 'http://www.ibm.com'
20 | feature = {'foo':'bar'}
21 | # action
22 | nlu = NLU(token = token,
23 | endpoint = endpoint,
24 | features = feature)
25 | # assert
26 | assert nlu.token == token
27 | assert nlu.endpoint == endpoint
28 | assert 'features' in nlu.params
29 | assert 'foo' in nlu.params['features']
30 | assert nlu.params['features']['foo'] == 'bar'
31 |
32 | def test_service_callable_valid_input(self):
33 | # patch where the class is located.
34 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'):
35 | with mock.patch('watson_transformer.service.nlu.NaturalLanguageUnderstandingV1') as mock_nlu_api:
36 | # arrange
37 | mock_nlu_api.return_value.analyze.return_value.get_result.return_value = {'value':'mock response'} # mock nlu.analyze().get_result()
38 | nlu = NLU(token = 'foo',
39 | endpoint='http://www.foo.com/bar',
40 | features='foo')
41 | # act
42 | response = nlu('I love this game.')
43 | # assert
44 | data = json.loads(response)
45 | assert 'value' in data
46 | assert data['value'] == 'mock response'
47 |
48 | def test_service_callable_invalid_input(self):
49 | # patch where the class is located.
50 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'):
51 | with mock.patch('watson_transformer.service.nlu.NaturalLanguageUnderstandingV1') as mock_nlu_api:
52 | # arrange
53 | mock_nlu_api.return_value.analyze.return_value.get_result.return_value = {'value':'mock response'} # mock nlu.analyze().get_result()
54 | nlu = NLU(token = 'foo',
55 | endpoint='http://www.foo.com/bar',
56 | features='foo')
57 | for value in [None, '']:
58 | # act
59 | response = nlu(value)
60 | # assert
61 | assert response == None
62 |
63 | def test_service_callable_raise_none_api_exception_strict_mode_on(self):
64 | # patch where the class is located.
65 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'):
66 | with mock.patch('watson_transformer.service.nlu.NaturalLanguageUnderstandingV1') as mock_nlu_api:
67 | # arrange
68 | mock_nlu_api.return_value.analyze.side_effect = Exception('NLU API raise exception.') # mock nlu.analyze()
69 | nlu = NLU(token = 'foo',
70 | endpoint='http://www.foo.com/bar',
71 | strict_mode=True,
72 | features='foo')
73 | for value in [' ', ' _', 'one two']:
74 | # act
75 | with pytest.raises(Exception) as exinfo:
76 | response = nlu(value)
77 | # assert
78 | assert value in str(exinfo.value)
79 | assert nlu.strict_mode == True
80 |
81 | def test_service_callable_raise_none_api_exception_strict_mode_off(self):
82 | # patch where the class is located.
83 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'):
84 | with mock.patch('watson_transformer.service.nlu.NaturalLanguageUnderstandingV1') as mock_nlu_api:
85 | # arrange
86 | mock_nlu_api.return_value.analyze.side_effect = Exception('NLU API raise exception.') # mock nlu.analyze()
87 | nlu = NLU(token = 'foo',
88 | endpoint='http://www.foo.com/bar',
89 | strict_mode=False,
90 | features='foo')
91 | for value in [' ', ' _', 'one two']:
92 | # act
93 | response = nlu(value)
94 | # assert
95 | assert response == None
96 | assert nlu.strict_mode == False
97 |
98 | def test_service_callable_raise_api_exception_strict_mode_on(self):
99 | # patch where the class is located.
100 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'):
101 | with mock.patch('watson_transformer.service.nlu.NaturalLanguageUnderstandingV1') as mock_nlu_api:
102 | # arrange
103 | mock_nlu_api.return_value.analyze.side_effect = ApiException('NLU API raise exception.') # mock nlu.analyze()
104 | nlu = NLU(token = 'foo',
105 | endpoint='http://www.foo.com/bar',
106 | strict_mode = True,
107 | features='foo')
108 | for value in [' ', ' _', 'one two']:
109 | # act
110 | response = nlu(value)
111 | # assert
112 | assert response == None
113 | assert nlu.strict_mode == True
114 |
115 | def test_service_callable_raise_api_exception_strict_mode_off(self):
116 | # patch where the class is located.
117 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'):
118 | with mock.patch('watson_transformer.service.nlu.NaturalLanguageUnderstandingV1') as mock_nlu_api:
119 | # arrange
120 | mock_nlu_api.return_value.analyze.side_effect = ApiException('NLU API raise exception.') # mock nlu.analyze()
121 | nlu = NLU(token = 'foo',
122 | endpoint='http://www.foo.com/bar',
123 | strict_mode = False,
124 | features='foo')
125 | for value in [' ', ' _', 'one two']:
126 | # act
127 | response = nlu(value)
128 | # assert
129 | assert response == None
130 | assert nlu.strict_mode == False
131 |
132 | def test_get_new_client(self):
133 | # arrange
134 | nlu = NLU(token = 'foo',
135 | endpoint='http://www.foo.com/bar',
136 | features='foo')
137 | # action
138 | new_nlu = nlu.get_new_client()
139 | # assert
140 | assert nlu.token == new_nlu.token
141 | assert nlu.endpoint == new_nlu.endpoint
142 | assert 'features' in new_nlu.params
143 | assert new_nlu.params['features'] == 'foo'
144 | assert nlu != new_nlu
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
--------------------------------------------------------------------------------
/test/service/test_service_base.py:
--------------------------------------------------------------------------------
1 | """
2 | "
3 | " test API service base class
4 | "
5 | """
6 | import pytest
7 | from unittest import mock
8 | from watson_transformer.service.service_base import ServiceBase
9 |
10 | @pytest.fixture(scope='function')
11 | def mock_base_service(request):
12 | # mock the service
13 | return ServiceBase()
14 |
15 | class TestServiceBase():
16 |
17 | def test_callable(self, mock_base_service):
18 | # arrange
19 | service = mock_base_service
20 | # act
21 | with pytest.raises(NotImplementedError) as exinfo:
22 | service(None)
23 | # assert
24 | assert '__call__' in str(exinfo.value)
25 |
26 | def test_get_return_type(self, mock_base_service):
27 | # arrange
28 | service = mock_base_service
29 | # act
30 | with pytest.raises(NotImplementedError) as exinfo:
31 | service.get_return_type(None)
32 | # assert
33 | assert 'get_return_type' in str(exinfo.value)
34 |
35 | def test_get_new_client(self, mock_base_service):
36 | # arrange
37 | service = mock_base_service
38 | # act
39 | with pytest.raises(NotImplementedError) as exinfo:
40 | service.get_new_client()
41 | # assert
42 | assert 'get_new_client' in str(exinfo.value)
43 |
44 | def test_strict_mode_init(self):
45 | # arrange
46 | valid_values = [True, False]
47 | for value in valid_values:
48 | # act
49 | service_base = ServiceBase(strict_mode = value)
50 | # assert
51 | assert service_base.strict_mode == value
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/test/service/test_stt.py:
--------------------------------------------------------------------------------
1 | """
2 | "
3 | " Test IBM STT Service
4 | "
5 | """
6 |
7 | import json
8 | import pytest
9 | from unittest import mock
10 | from ibm_cloud_sdk_core.api_exception import ApiException
11 | from watson_transformer.service.stt import STT
12 |
13 |
14 | class TestSTT():
15 |
16 | def test_nlu_init(self):
17 | # arange
18 | token = 'foo'
19 | endpoint = 'http://www.ibm.com'
20 | reader = lambda x: "foo is speaking to bar"
21 | feature = {'foo':'bar'}
22 | # action
23 | stt = STT(token = token,
24 | endpoint = endpoint,
25 | reader = reader,
26 | features = feature)
27 | # assert
28 | assert stt.token == token
29 | assert stt.endpoint == endpoint
30 | assert 'features' in stt.params
31 | assert 'foo' in stt.params['features']
32 | assert stt.params['features']['foo'] == 'bar'
33 | assert stt.reader('foo') == reader('foo')
34 |
35 | def test_service_callable_valid_input(self):
36 | # patch where the class is located.
37 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'):
38 | with mock.patch('watson_transformer.service.stt.SpeechToTextV1') as mock_stt_api:
39 | # arrange
40 | mock_stt_api.return_value.recognize.return_value.get_result.return_value = {'value':'mock response'} # mock stt.recognize().get_result()
41 | stt = STT(token = 'foo',
42 | endpoint='http://www.foo.com/bar',
43 | reader = lambda x: "foo is speaking to bar.",
44 | features='foo')
45 | # act
46 | response = stt('sample.wav')
47 | # assert
48 | data = json.loads(response)
49 | assert 'value' in data
50 | assert data['value'] == 'mock response'
51 |
52 | def test_service_callable_invalid_input(self):
53 | # patch where the class is located.
54 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'):
55 | with mock.patch('watson_transformer.service.stt.SpeechToTextV1') as mock_stt_api:
56 | # arrange
57 | mock_stt_api.return_value.recognize.return_value.get_result.return_value = {'value':'mock response'} # mock stt.recognize().get_result()
58 | stt = STT(token = 'foo',
59 | endpoint='http://www.foo.com/bar',
60 | reader = lambda x: "foo is speaking to bar.",
61 | features='foo')
62 | for value in [None, '']:
63 | # act
64 | response = stt(value)
65 | # assert
66 | assert response == None
67 |
68 | def test_service_callable_raise_none_api_exception_strict_mode_on(self):
69 | # patch where the class is located.
70 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'):
71 | with mock.patch('watson_transformer.service.stt.SpeechToTextV1') as mock_stt_api:
72 | # arrange
73 | mock_stt_api.return_value.recognize.side_effect = Exception('STT API raise exception.') # mock stt.recognize().get_result()
74 | stt = STT(token = 'foo',
75 | endpoint='http://www.foo.com/bar',
76 | reader = lambda x: "foo is speaking to bar.",
77 | strict_mode = True,
78 | features='foo')
79 | for value in ['none_exist.wav', 'invalid.wav']:
80 | # act
81 | with pytest.raises(Exception) as exinfo:
82 | response = stt(value)
83 | # assert
84 | assert value in str(exinfo.value)
85 | assert stt.strict_mode == True
86 |
87 | def test_service_callable_raise_none_api_exception_strict_mode_off(self):
88 | # patch where the class is located.
89 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'):
90 | with mock.patch('watson_transformer.service.stt.SpeechToTextV1') as mock_stt_api:
91 | # arrange
92 | mock_stt_api.return_value.recognize.side_effect = Exception('raise general exception.') # mock stt.recognize().get_result()
93 | stt = STT(token = 'foo',
94 | endpoint='http://www.foo.com/bar',
95 | reader = lambda x: "foo is speaking to bar.",
96 | strict_mode = False,
97 | features='foo')
98 | for value in ['none_exist.wav', 'invalid.wav']:
99 | # act
100 | response = stt(value)
101 | # assert
102 | assert 'error_message' in response
103 | assert 'raise general exception.' in response
104 | assert stt.strict_mode == False
105 |
106 | def test_service_callable_raise_api_exception_strict_mode_on(self):
107 | # patch where the class is located.
108 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'):
109 | with mock.patch('watson_transformer.service.stt.SpeechToTextV1') as mock_stt_api:
110 | # arrange
111 | mock_stt_api.return_value.recognize.side_effect = ApiException('STT API raise exception.') # mock stt.recognize().get_result()
112 | stt = STT(token = 'foo',
113 | endpoint='http://www.foo.com/bar',
114 | reader = lambda x: "foo is speaking to bar.",
115 | strict_mode = True,
116 | features='foo')
117 | for value in ['none_exist.wav', 'invalid.wav']:
118 | # act
119 | response = stt(value)
120 | # assert
121 | assert 'api_error_message' in response
122 | assert 'STT API raise exception.' in response
123 | assert stt.strict_mode == True
124 |
125 | def test_service_callable_raise_api_exception_strict_mode_on(self):
126 | # patch where the class is located.
127 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'):
128 | with mock.patch('watson_transformer.service.stt.SpeechToTextV1') as mock_stt_api:
129 | # arrange
130 | mock_stt_api.return_value.recognize.side_effect = ApiException('STT API raise exception.') # mock stt.recognize().get_result()
131 | stt = STT(token = 'foo',
132 | endpoint='http://www.foo.com/bar',
133 | reader = lambda x: "foo is speaking to bar.",
134 | strict_mode = False,
135 | features='foo')
136 | for value in ['none_exist.wav', 'invalid.wav']:
137 | # act
138 | response = stt(value)
139 | # assert
140 | assert 'api_error_message' in response
141 | assert 'STT API raise exception.' in response
142 | assert stt.strict_mode == False
143 |
144 | def test_get_new_client(self):
145 | # arrange
146 | stt = STT(token = 'foo',
147 | endpoint='http://www.foo.com/bar',
148 | reader = lambda x: "foo is speaking to bar.",
149 | strict_mode=False,
150 | features='foo')
151 | # action
152 | new_stt = stt.get_new_client()
153 | # assert
154 | assert stt.token == new_stt.token
155 | assert stt.endpoint == new_stt.endpoint
156 | assert 'features' in new_stt.params
157 | assert new_stt.params['features'] == 'foo'
158 | assert stt != new_stt
159 | assert stt.strict_mode == stt.strict_mode
160 | assert stt.strict_mode == False
161 |
162 | def test_reader_raise_exception(self):
163 | # arrange
164 | reader = mock.MagicMock(side_effect=Exception('failed to read the file.'))
165 | stt = STT(token = 'foo',
166 | endpoint='http://www.foo.com/bar',
167 | reader = reader,
168 | features='foo')
169 | # act
170 | with pytest.raises(Exception) as exinfo:
171 | stt('sample.wav')
172 | # assert
173 | assert 'failed to read the file.' in str(exinfo.value)
174 |
175 | def test_reader_return_none_stream(self):
176 | # arrange
177 | reader = mock.MagicMock(return_value = None)
178 | stt = STT(token = 'foo',
179 | endpoint='http://www.foo.com/bar',
180 | reader = reader,
181 | features='foo')
182 | # act
183 | response = stt('sample.wav')
184 | # assert
185 | assert response == None
186 |
187 |
188 |
189 |
190 |
191 |
--------------------------------------------------------------------------------
/test/test_json_transformer.py:
--------------------------------------------------------------------------------
1 | """
2 | "
3 | " test watson service transformer class
4 | "
5 | """
6 | import pytest
7 | from unittest import mock
8 | from watson_transformer.json_transformer import JSONTransformer
9 |
10 |
11 |
12 | @pytest.fixture(scope='function')
13 | def mock_service(request):
14 | # mock the service
15 | service = mock.MagicMock(return_value='foo response')
16 | return lambda : service
17 |
18 |
19 | class TestJSONTransformer():
20 |
21 | def test_init_input_col_with__valid_value(self, mock_service):
22 | # arrange
23 | mocked_service = mock_service()
24 | column_name = "input_column_name"
25 | # act
26 | for column_name in ["foo", "foo bar", "foo_bar", " _"]:
27 | transformer = JSONTransformer(inputCol=column_name,
28 | outputCol='output_column',
29 | removeInputCol=True,
30 | parser=mocked_service)
31 | # assert
32 | assert transformer.getInputCol() == column_name
33 |
34 | def test_init_input_col_with_invalid_value(self, mock_service):
35 | # arrange
36 | mocked_service = mock_service()
37 | column_name = None
38 | # act
39 | for column_name in [None, "", " ", " "]:
40 | with pytest.raises(ValueError) as exception:
41 | transformer = JSONTransformer(inputCol=column_name,
42 | outputCol='output_column',
43 | removeInputCol=True,
44 | parser=mocked_service)
45 | # assert
46 | assert "input column name" in str(exception.value)
47 |
48 | def test_init_output_col_with_valid_value(self, mock_service):
49 | # arrange
50 | mocked_service = mock_service()
51 | column_name = "input_column_name"
52 | # act
53 | for column_name in ["foo", "foo bar", "foo_bar", " _"]:
54 | transformer = JSONTransformer(inputCol=column_name,
55 | outputCol='output_column',
56 | removeInputCol=True,
57 | parser=mocked_service)
58 |
59 | def test_init_output_col_with_invalid_value(self, mock_service):
60 | # arrange
61 | mocked_service = mock_service()
62 | column_name = None
63 | # act
64 | for column_name in [None, "", " ", " "]:
65 | with pytest.raises(ValueError) as exception:
66 | JSONTransformer(inputCol='input_column',
67 | outputCol=column_name,
68 | removeInputCol=True,
69 | parser=mocked_service)
70 | # assert
71 | assert "output column name" in str(exception.value)
72 |
73 | def test_init_remove_input_column_with_valid_value(self, mock_service):
74 | # arrange
75 | mocked_service = mock_service()
76 | valid_values = [0, 1, True, False]
77 | expect_values = [False, True, True, False]
78 | # act
79 | for i in range(len(valid_values)):
80 | transformer = JSONTransformer(inputCol='input_column',
81 | outputCol='output_column',
82 | removeInputCol=valid_values[i],
83 | parser=mocked_service)
84 | # assert
85 | assert transformer.getRemoveInputCol() == expect_values[i]
86 |
87 | def test_init_remove_input_column_with_default_value(self, mock_service):
88 | # arrange
89 | mocked_service = mock_service()
90 | # act
91 | transformer = JSONTransformer(inputCol='input column',
92 | outputCol='output column',
93 | parser=mocked_service)
94 | # assert
95 | assert transformer.getRemoveInputCol() == False
96 |
97 | def test_init_valid_service(self):
98 | # arrange
99 | mocked_service = lambda x: x+1
100 | # act
101 | transformer = JSONTransformer(inputCol='input column',
102 | outputCol='output column',
103 | parser=mocked_service)
104 | # assert
105 | provided_service = transformer.getParser()
106 | assert provided_service(10) == 11
107 |
108 |
109 | def test_init_none_callable_service(self):
110 | # arrange
111 | invalid_services = [None, 12, "12"]
112 | # act
113 | for i in range(len(invalid_services)):
114 | with pytest.raises(ValueError) as exinfo:
115 | JSONTransformer(inputCol='input column',
116 | outputCol='output column',
117 | parser=invalid_services[i])
118 | # assert
119 | assert "parser instance" in str(exinfo.value) and "callable" in str(exinfo.value)
--------------------------------------------------------------------------------
/test/test_watson_service_transformer.py:
--------------------------------------------------------------------------------
1 | """
2 | "
3 | " test watson service transformer class
4 | "
5 | """
6 | import pytest
7 | from unittest import mock
8 | from watson_transformer.watson_service_transformer import WatsonServiceTransformer
9 |
10 |
11 |
12 | @pytest.fixture(scope='function')
13 | def mock_service(request):
14 | # mock the service
15 | service = mock.MagicMock(return_value='foo response')
16 | return lambda : service
17 |
18 |
19 | class TestWatsonServiceTransformer():
20 |
21 | def test_init_input_col_with__valid_value(self, mock_service):
22 | # arrange
23 | mocked_service = mock_service()
24 | column_name = "input_column_name"
25 | # act
26 | for column_name in ["foo", "foo bar", "foo_bar", " _"]:
27 | transformer = WatsonServiceTransformer(inputCol=column_name,
28 | outputCol='output_column',
29 | vectorization=True,
30 | max_workers = 10,
31 | service=mocked_service)
32 | # assert
33 | assert transformer.getInputCol() == column_name
34 |
35 | def test_init_input_col_with_invalid_value(self, mock_service):
36 | # arrange
37 | mocked_service = mock_service()
38 | column_name = None
39 | # act
40 | for column_name in [None, "", " ", " "]:
41 | with pytest.raises(ValueError) as exception:
42 | _ = WatsonServiceTransformer(inputCol=column_name,
43 | outputCol='output_column',
44 | vectorization=True,
45 | max_workers = 10,
46 | service=mocked_service)
47 | # assert
48 | assert "input column name" in str(exception.value)
49 |
50 | def test_init_output_col_with_valid_value(self, mock_service):
51 | # arrange
52 | mocked_service = mock_service()
53 | column_name = "input_column_name"
54 | # act
55 | for column_name in ["foo", "foo bar", "foo_bar", " _"]:
56 | transformer = WatsonServiceTransformer(inputCol='input column',
57 | outputCol=column_name,
58 | vectorization=True,
59 | max_workers = 10,
60 | service=mocked_service)
61 | # assert
62 | assert transformer.getOutputCol() == column_name
63 |
64 | def test_init_output_col_with_invalid_value(self, mock_service):
65 | # arrange
66 | mocked_service = mock_service()
67 | column_name = None
68 | # act
69 | for column_name in [None, "", " ", " "]:
70 | with pytest.raises(ValueError) as exception:
71 | WatsonServiceTransformer(inputCol="input column",
72 | outputCol=column_name,
73 | vectorization=True,
74 | max_workers = 10,
75 | service=mocked_service)
76 | # assert
77 | assert "output column name" in str(exception.value)
78 |
79 | def test_init_max_workers_with_valid_value(self, mock_service):
80 | # arrange
81 | mocked_service = mock_service()
82 | max_workers = [1,2,3,4,5]
83 | expect_max_workers = [1,2,3,4,5]
84 | # act
85 | for i in range(len(max_workers)):
86 | transformer = WatsonServiceTransformer(inputCol='input column',
87 | outputCol='output column',
88 | vectorization=True,
89 | max_workers = max_workers[i],
90 | service=mocked_service)
91 | # assert
92 | assert transformer.getMax_workers() == expect_max_workers[i]
93 |
94 | def test_init_max_workers_with_default_value(self, mock_service):
95 | # arrange
96 | mocked_service = mock_service()
97 | expected_default_value = 5
98 | # act
99 | transformer = WatsonServiceTransformer(inputCol='input column',
100 | outputCol='output column',
101 | vectorization=True,
102 | service=mocked_service)
103 | # assert
104 | assert transformer.getMax_workers() == expected_default_value
105 |
106 | def test_init_max_workers_with_invalid_value(self, mock_service):
107 | # arrange
108 | mocked_service = mock_service()
109 | max_workers = [None, "1", -1, 0, -100]
110 | # act
111 | for i in range(len(max_workers)):
112 | with pytest.raises(ValueError) as exinfo:
113 | WatsonServiceTransformer(inputCol='input column',
114 | outputCol='output column',
115 | vectorization=True,
116 | max_workers = max_workers[i],
117 | service=mocked_service)
118 | # assert
119 | assert "maximum workers" in str(exinfo.value)
120 |
121 | def test_init_vectorization_with_valid_value(self, mock_service):
122 | # arrange
123 | mocked_service = mock_service()
124 | vectorizations = [0, 1, True, False]
125 | expect_vectorizations = [False, True, True, False]
126 | # act
127 | for i in range(len(vectorizations)):
128 | transformer = WatsonServiceTransformer(inputCol='input column',
129 | outputCol='output column',
130 | vectorization=vectorizations[i],
131 | max_workers = 10,
132 | service=mocked_service)
133 | # assert
134 | assert transformer.getVectorization() == expect_vectorizations[i]
135 |
136 | def test_init_vectorization_with_default_value(self, mock_service):
137 | # arrange
138 | mocked_service = mock_service()
139 | # act
140 | transformer = WatsonServiceTransformer(inputCol='input column',
141 | outputCol='output column',
142 | service=mocked_service)
143 | # assert
144 | assert transformer.getVectorization() == False
145 |
146 | def test_init_valid_service(self):
147 | # arrange
148 | mocked_service = lambda x: x+1
149 | # act
150 | transformer = WatsonServiceTransformer(inputCol='input column',
151 | outputCol='output column',
152 | service=mocked_service)
153 | # assert
154 | provided_service = transformer.getService()
155 | assert provided_service(10) == 11
156 |
157 |
158 | def test_init_none_callable_service(self):
159 | # arrange
160 | values = [None, 12, "12"]
161 | # act
162 | for i in range(len(values)):
163 | with pytest.raises(ValueError) as exinfo:
164 | WatsonServiceTransformer(inputCol='input column',
165 | outputCol='output column',
166 | service=values[i])
167 | # assert
168 | assert "service instance" in str(exinfo.value) and "callable" in str(exinfo.value)
--------------------------------------------------------------------------------