├── .github └── workflows │ └── unit_test.yml ├── .gitignore ├── README.md ├── docs ├── Makefile ├── _build │ └── html │ │ ├── _sources │ │ ├── api.rst.txt │ │ ├── development.rst.txt │ │ ├── index.rst.txt │ │ ├── intro.rst.txt │ │ └── tutorial.rst.txt │ │ ├── _static │ │ ├── Watson_Tranformer_Design.jpg │ │ ├── Watson_Tranformer_Design.svg │ │ ├── alabaster.css │ │ ├── basic.css │ │ ├── css │ │ │ ├── badge_only.css │ │ │ ├── fonts │ │ │ │ ├── Roboto-Slab-Bold.woff │ │ │ │ ├── Roboto-Slab-Bold.woff2 │ │ │ │ ├── Roboto-Slab-Regular.woff │ │ │ │ ├── Roboto-Slab-Regular.woff2 │ │ │ │ ├── fontawesome-webfont.eot │ │ │ │ ├── fontawesome-webfont.svg │ │ │ │ ├── fontawesome-webfont.ttf │ │ │ │ ├── fontawesome-webfont.woff │ │ │ │ ├── fontawesome-webfont.woff2 │ │ │ │ ├── lato-bold-italic.woff │ │ │ │ ├── lato-bold-italic.woff2 │ │ │ │ ├── lato-bold.woff │ │ │ │ ├── lato-bold.woff2 │ │ │ │ ├── lato-normal-italic.woff │ │ │ │ ├── lato-normal-italic.woff2 │ │ │ │ ├── lato-normal.woff │ │ │ │ └── lato-normal.woff2 │ │ │ └── theme.css │ │ ├── custom.css │ │ ├── doctools.js │ │ ├── documentation_options.js │ │ ├── file.png │ │ ├── fonts │ │ │ ├── FontAwesome.otf │ │ │ ├── Lato │ │ │ │ ├── lato-bold.eot │ │ │ │ ├── lato-bold.ttf │ │ │ │ ├── lato-bold.woff │ │ │ │ ├── lato-bold.woff2 │ │ │ │ ├── lato-bolditalic.eot │ │ │ │ ├── lato-bolditalic.ttf │ │ │ │ ├── lato-bolditalic.woff │ │ │ │ ├── lato-bolditalic.woff2 │ │ │ │ ├── lato-italic.eot │ │ │ │ ├── lato-italic.ttf │ │ │ │ ├── lato-italic.woff │ │ │ │ ├── lato-italic.woff2 │ │ │ │ ├── lato-regular.eot │ │ │ │ ├── lato-regular.ttf │ │ │ │ ├── lato-regular.woff │ │ │ │ └── lato-regular.woff2 │ │ │ ├── Roboto-Slab-Bold.woff │ │ │ ├── Roboto-Slab-Bold.woff2 │ │ │ ├── Roboto-Slab-Light.woff │ │ │ ├── Roboto-Slab-Light.woff2 │ │ │ ├── Roboto-Slab-Regular.woff │ │ │ ├── Roboto-Slab-Regular.woff2 │ │ │ ├── Roboto-Slab-Thin.woff │ │ │ ├── Roboto-Slab-Thin.woff2 │ │ │ ├── RobotoSlab │ │ │ │ ├── roboto-slab-v7-bold.eot │ │ │ │ ├── roboto-slab-v7-bold.ttf │ │ │ │ ├── roboto-slab-v7-bold.woff │ │ │ │ ├── roboto-slab-v7-bold.woff2 │ │ │ │ ├── roboto-slab-v7-regular.eot │ │ │ │ ├── roboto-slab-v7-regular.ttf │ │ │ │ ├── roboto-slab-v7-regular.woff │ │ │ │ └── roboto-slab-v7-regular.woff2 │ │ │ ├── fontawesome-webfont.eot │ │ │ ├── fontawesome-webfont.svg │ │ │ ├── fontawesome-webfont.ttf │ │ │ ├── fontawesome-webfont.woff │ │ │ ├── fontawesome-webfont.woff2 │ │ │ ├── lato-bold-italic.woff │ │ │ ├── lato-bold-italic.woff2 │ │ │ ├── lato-bold.woff │ │ │ ├── lato-bold.woff2 │ │ │ ├── lato-normal-italic.woff │ │ │ ├── lato-normal-italic.woff2 │ │ │ ├── lato-normal.woff │ │ │ └── lato-normal.woff2 │ │ ├── jquery-3.5.1.js │ │ ├── jquery.js │ │ ├── js │ │ │ ├── badge_only.js │ │ │ ├── html5shiv-printshiv.min.js │ │ │ ├── html5shiv.min.js │ │ │ ├── modernizr.min.js │ │ │ └── theme.js │ │ ├── language_data.js │ │ ├── minus.png │ │ ├── plus.png │ │ ├── pygments.css │ │ ├── searchtools.js │ │ ├── underscore-1.3.1.js │ │ ├── underscore.js │ │ ├── watson_transformer_logo.png │ │ ├── watson_transformer_logo.svg │ │ └── watson_transformer_stt_perf.png │ │ ├── api.html │ │ ├── development.html │ │ ├── genindex.html │ │ ├── index.html │ │ ├── intro.html │ │ ├── objects.inv │ │ ├── search.html │ │ ├── searchindex.js │ │ └── tutorial.html ├── api.rst ├── conf.py ├── development.rst ├── index.rst ├── intro.rst ├── make.bat ├── misc │ ├── Watson_Tranformer_Design.drawio │ ├── Watson_Tranformer_Design.jpg │ ├── Watson_Tranformer_Design.svg │ ├── pipleline_benchmark.png │ ├── regular_udf_vs_vectorized_udf_.png │ ├── watson_transformer_logo.svg │ ├── watson_transformer_logo_design.drawio │ ├── watson_transformer_perf_full_pipeline.png │ └── watson_transformer_stt_perf.png └── tutorial.rst ├── requirements.txt ├── setup.py ├── src └── watson_transformer │ ├── __init__.py │ ├── contrib │ ├── __init__.py │ ├── nlu │ │ ├── __init__.py │ │ └── default_nlu_parser.py │ ├── readers.py │ ├── response_base.py │ └── stt │ │ ├── __init__.py │ │ └── default_stt_parser.py │ ├── flat_column_transformer.py │ ├── json_transformer.py │ ├── service │ ├── __init__.py │ ├── nlu.py │ ├── service_base.py │ └── stt.py │ └── watson_service_transformer.py └── test ├── README.md ├── contrib ├── nlu │ └── test_default_nlu_parser.py ├── stt │ └── test_default_stt_parser.py └── test_response_base.py ├── service ├── test_nlu.py ├── test_service_base.py └── test_stt.py ├── test_json_transformer.py └── test_watson_service_transformer.py /.github/workflows/unit_test.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: unit-test 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | python-version: [3.6, 3.7] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v1 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | pip install -r requirements.txt 30 | pip install pyspark 31 | pip install pandas 32 | pip install pytest 33 | pip install pytest-cov 34 | pip install -e . 35 | - name: Test with pytest 36 | run: | 37 | pip install pytest 38 | pytest --cov=src 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .* 2 | !/.gitignore 3 | !/.travis.yml 4 | /example/* 5 | /dist/* 6 | /build/* -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Watson Transformer 2 | 3 | [![Actions Status](https://github.com/kai-niu/watson-transformer/workflows/unit-test/badge.svg)](https://github.com/kai-niu/watson-transformer/actions) 4 | 5 | Watson Transformer solves the problem of consuming IBM Watson API services([STT](https://cloud.ibm.com/apidocs/speech-to-text), [NLU](https://cloud.ibm.com/apidocs/natural-language-understanding), etc.) at scale by wrapping the service calls into the Spark transformer. In this way, The IBM services such as STT and NLU can build into the Spark ML pipeline, along with other transformers and estimators to tackle the big data challenge. 6 | 7 | # Install 8 | ``` 9 | pip install waston-transformer 10 | ``` 11 | 12 | # Design 13 | As the UML chart illustrates, The Watson Transformer Class service as a thin wrapper around the IBM Waston API class. For extensibility purposes, the logic of consuming API service is defined in the Watson Service Class, which is an executable class. It enables any applicable API service to be wrapped into the transformer. On the other hand, the transformer handles mapping input data to API calls and parse the service response to data fields. 14 | 15 | 16 | 17 | # Performance 18 | 19 | * __Experiment 1__: This experiment compares the performance of using the regular UDF, and the vectorized UDF with the pyArrow enabled. The testing cluster is provisioned with 10 2vCPU/2GB nodes, and the time cost is recorded on nine datasets, which contain [100,200,400,800,1600] recordings respectfully. The maximum number of worker threads a vectorized UDF can spam is 10; therefore, the maximum QPS(query/sec) of the vectorized UDF transformer is 200. 20 | 21 | 22 | 23 | The result suggests: 24 | 1. Vectorized UDF: the time complexity is between **O(0.001N)** and **O(0.005N)** *N = total recording seconds in the dataset* 25 | 2. Regular UDF: the time complexity is slower than **O(0.01N)** *N = total recording seconds in the dataset* 26 | 3. Vectorized UDF is more than **10x** faster than using regular UDF clock and can process **~400** recording seconds. 27 | 28 |
29 |
30 | 31 | * __Experiment 2__: This experiment benchmark the performance of the pySpark ML pipeline build using several transformers provided by this package. The testing cluster is provisioned the same as it is for the first experiment. The maximum QPS of STT and NLU transformer is 200. Here is the configuration of the two ML pipelines: 32 | * STT pipeline: [STT => JSON_Transformer] 33 | * STT + NLU pipeline: [STT => JSON Transformer => NLU => JSON Transformer => Nested Column Transformer] 34 | 35 | 36 | 37 | The result suggests: 38 | 1. The STT transformer dominates the time cost in the whole pipeline. 39 | 2. The time complexity of two pipelines are between **O(0.005N)** and **O(0.01N)**, *N = total recording seconds in the dataset* 40 | 3. 1 clock second can process **~400** recording seconds 41 | 42 | 43 | 44 | # Tutorial 45 | 46 | API documentation and tutorials are available [here](https://watson-transformer.readthedocs.io/en/latest/?) 47 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_build/html/_sources/api.rst.txt: -------------------------------------------------------------------------------- 1 | API Reference 2 | ************** 3 | 4 | This page gives the API reference of Watson Transformer Package. 5 | 6 | WatsonServiceTransformer Class 7 | =============================== 8 | 9 | **WatsonServiceTransformer** (*inputCol=None, outputCol=None, vectorization=False, max_workers=5, service=None*) 10 | 11 | **Base**: 12 | - ``pySpark.ml.pipeline.Transformer`` 13 | - ``pyspark.ml.param.shared.HasInputCol`` 14 | - ``pyspark.ml.param.shared.HasOutputCol`` 15 | - ``pyspark.ml.util.DefaultParamsReadable`` 16 | - ``pyspark.ml.util.DefaultParamsWritable`` 17 | 18 | **Parameters**: 19 | - **inputCol**: The column name use as input data. ``required`` 20 | - **outputCol**: The column name use to output the transformed data. ``required`` 21 | - **vectorization**: Exploiting pyArrow in-memory dataframe. enable vectorization whenever is possible is recommend. The default value is ``False``. 22 | - **max_workers**: When vectorization is enabled, the maximum number of threads can be utilized to boost the performance. The default value is ``5``. 23 | - **service**: The API service instance that wrapped by the Watson Transformer. ``required`` 24 | 25 | **Return**: 26 | ``WatsonServiceTransformer`` class instance 27 | 28 | **Return Type**: 29 | ``pySpark.ml.pipeline.Transformer`` 30 | 31 | ----------------------------- 32 | 33 | **- transform** (*dataframe*) 34 | 35 | **Parameters**: 36 | - **dataframe**: the pySpark dataframe recieve transformation 37 | 38 | **Return**: 39 | pySpark dataframe contains transformation result 40 | 41 | **Return Type**: 42 | ``pyspark.sql.DataFrame`` 43 | 44 | 45 | .. note:: 46 | 47 | The ``WatsonServiceTransformer`` is a custom implementation of pySpark transformer. The functions which are inherited from the 48 | pySpark transformer base classes have been implemented thus avaiable for use. 49 | 50 | 51 | FlatColumnTransformer Class 52 | ============================ 53 | 54 | **FlatColumnTransformer** (*inputCol=None*) 55 | 56 | **Base**: 57 | - ``pySpark.ml.pipeline.Transformer`` 58 | - ``pyspark.ml.param.shared.HasInputCol`` 59 | - ``pyspark.ml.util.DefaultParamsReadable`` 60 | - ``pyspark.ml.util.DefaultParamsWritable`` 61 | 62 | **Parameters**: 63 | - **inputCol**: The column name use as input data. ``required`` 64 | 65 | **Return**: 66 | ``FlatColumnTransformer`` class instance 67 | 68 | **Return Type**: 69 | ``pySpark.ml.pipeline.Transformer`` 70 | 71 | ----------------------------- 72 | 73 | **- transform** (*dataframe*) 74 | 75 | **Parameters**: 76 | - **dataframe**: the pySpark dataframe recieve transformation 77 | 78 | **Return**: 79 | pySpark dataframe contains the flattened data from input column 80 | 81 | **Return Type**: 82 | ``pyspark.sql.DataFrame`` 83 | 84 | 85 | .. note:: 86 | 87 | The ``FlatColumnTransformer`` is a custom implementation of pySpark transformer. The functions which are inherited from the 88 | pySpark transformer base classes have been implemented thus avaiable for use. This transformer will 89 | flatten the nested input column to multiple regular data columns. 90 | 91 | 92 | JSONTransformer Class 93 | ===================== 94 | 95 | **JSONTransformer** (*inputCol=None, outputCol=None, removeInputCol=False, parser=None*) 96 | 97 | **Base**: 98 | - ``pySpark.ml.pipeline.Transformer`` 99 | - ``pyspark.ml.param.shared.HasInputCol`` 100 | - ``pyspark.ml.param.shared.HasOutputCol`` 101 | - ``pyspark.ml.util.DefaultParamsReadable`` 102 | - ``pyspark.ml.util.DefaultParamsWritable`` 103 | 104 | **Parameters**: 105 | - **inputCol**: The column name use as input data. ``required`` 106 | - **outputCol**: The column name use to output the transformed data. ``required`` 107 | - **removeInputCol**: Whether or not remove the input column from output dataframe. The default value is ``False``. 108 | - **parser**: The object parse JSON data to data column(s). ``required`` 109 | 110 | **Return**: 111 | ``JSONTransformer`` class instance 112 | 113 | **Return Type**: 114 | ``pySpark.ml.pipeline.Transformer`` 115 | 116 | ----------------------------- 117 | 118 | **- transform** (*dataframe*) 119 | 120 | **Parameters**: 121 | - **dataframe**: the pySpark dataframe recieve transformation 122 | 123 | **Return**: 124 | pySpark dataframe contains transformation result 125 | 126 | **Return Type**: 127 | ``pyspark.sql.DataFrame`` 128 | 129 | 130 | .. note:: 131 | 132 | The ``WatsonServiceTransformer`` is a custom implementation of pySpark transformer. The functions which are inherited from the 133 | pySpark transformer base classes have been implemented thus avaiable for use. 134 | 135 | 136 | 137 | -------------------------------------------------------------------------------- /docs/_build/html/_sources/development.rst.txt: -------------------------------------------------------------------------------- 1 | Development 2 | ============= 3 | 4 | To setup development envrioment and extend support Watson Transformer API to other API .... 5 | 6 | 7 | Install Pacakge in Dev Mode 8 | --------------------------- 9 | 10 | :: 11 | 12 | pip install watson-transfromer -------------------------------------------------------------------------------- /docs/_build/html/_sources/index.rst.txt: -------------------------------------------------------------------------------- 1 | .. watson tranfromer documentation master file, created by 2 | sphinx-quickstart on Mon Jul 27 22:03:12 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to watson tranfromer's documentation! 7 | ============================================= 8 | 9 | .. toctree:: 10 | :maxdepth: 3 11 | :caption: Contents: 12 | 13 | intro 14 | tutorial 15 | api 16 | development 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /docs/_build/html/_sources/intro.rst.txt: -------------------------------------------------------------------------------- 1 | What is Watson Transformer? 2 | =========================== 3 | Watson Transformer solves the problem of consuming 4 | IBM Watson API services(STT, NLU, etc.) at scale by 5 | wrapping the service calls into the Spark transformer. 6 | In this way, The IBM services such as STT and NLU can 7 | build into the Spark ML pipeline, along with 8 | other transformers and estimators to tackle the big 9 | data challenge. 10 | 11 | The Design 12 | ---------- 13 | As the UML chart illustrates, The Watson Transformer 14 | Class service as a thin wrapper around the IBM Waston 15 | API class. For extensibility purposes, the logic of 16 | consuming API service is defined in the Watson Service 17 | Class, which is an executable class. It enables any 18 | applicable API service to be wrapped into the transformer. 19 | On the other hand, the transformer handles mapping input 20 | data to API calls and parse the service response to 21 | data fields. -------------------------------------------------------------------------------- /docs/_build/html/_static/Watson_Tranformer_Design.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/Watson_Tranformer_Design.jpg -------------------------------------------------------------------------------- /docs/_build/html/_static/Watson_Tranformer_Design.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 |
«executable object»
Return Type Factory
«executable object»...
«executable object»
Response Parser
«executable object»...
API Response => Data Fields
API Response => Data...
Instantiate Return Type Object
Instantiate Return Type O...
Watson Transformer Class+ inputCol: string+ outputCol: string+ service: Watson Service Class+ transfrom(dataframe): dataframeWatson Service Class+ token: string+ endpoint: string+ return_type: object+ response_parser: object+ **params: kv pairs+ __call__(object): return_type
Viewer does not support full SVG 1.1
-------------------------------------------------------------------------------- /docs/_build/html/_static/css/badge_only.css: -------------------------------------------------------------------------------- 1 | .fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}} -------------------------------------------------------------------------------- /docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/css/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/_build/html/_static/css/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/_build/html/_static/css/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/css/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/css/fonts/lato-bold-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/lato-bold-italic.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/css/fonts/lato-bold-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/lato-bold-italic.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/css/fonts/lato-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/lato-bold.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/css/fonts/lato-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/lato-bold.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/css/fonts/lato-normal-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/lato-normal-italic.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/css/fonts/lato-normal-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/lato-normal-italic.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/css/fonts/lato-normal.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/lato-normal.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/css/fonts/lato-normal.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/css/fonts/lato-normal.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/custom.css: -------------------------------------------------------------------------------- 1 | /* This file intentionally left blank. */ 2 | -------------------------------------------------------------------------------- /docs/_build/html/_static/doctools.js: -------------------------------------------------------------------------------- 1 | /* 2 | * doctools.js 3 | * ~~~~~~~~~~~ 4 | * 5 | * Sphinx JavaScript utilities for all documentation. 6 | * 7 | * :copyright: Copyright 2007-2020 by the Sphinx team, see AUTHORS. 8 | * :license: BSD, see LICENSE for details. 9 | * 10 | */ 11 | 12 | /** 13 | * select a different prefix for underscore 14 | */ 15 | $u = _.noConflict(); 16 | 17 | /** 18 | * make the code below compatible with browsers without 19 | * an installed firebug like debugger 20 | if (!window.console || !console.firebug) { 21 | var names = ["log", "debug", "info", "warn", "error", "assert", "dir", 22 | "dirxml", "group", "groupEnd", "time", "timeEnd", "count", "trace", 23 | "profile", "profileEnd"]; 24 | window.console = {}; 25 | for (var i = 0; i < names.length; ++i) 26 | window.console[names[i]] = function() {}; 27 | } 28 | */ 29 | 30 | /** 31 | * small helper function to urldecode strings 32 | */ 33 | jQuery.urldecode = function(x) { 34 | return decodeURIComponent(x).replace(/\+/g, ' '); 35 | }; 36 | 37 | /** 38 | * small helper function to urlencode strings 39 | */ 40 | jQuery.urlencode = encodeURIComponent; 41 | 42 | /** 43 | * This function returns the parsed url parameters of the 44 | * current request. Multiple values per key are supported, 45 | * it will always return arrays of strings for the value parts. 46 | */ 47 | jQuery.getQueryParameters = function(s) { 48 | if (typeof s === 'undefined') 49 | s = document.location.search; 50 | var parts = s.substr(s.indexOf('?') + 1).split('&'); 51 | var result = {}; 52 | for (var i = 0; i < parts.length; i++) { 53 | var tmp = parts[i].split('=', 2); 54 | var key = jQuery.urldecode(tmp[0]); 55 | var value = jQuery.urldecode(tmp[1]); 56 | if (key in result) 57 | result[key].push(value); 58 | else 59 | result[key] = [value]; 60 | } 61 | return result; 62 | }; 63 | 64 | /** 65 | * highlight a given string on a jquery object by wrapping it in 66 | * span elements with the given class name. 67 | */ 68 | jQuery.fn.highlightText = function(text, className) { 69 | function highlight(node, addItems) { 70 | if (node.nodeType === 3) { 71 | var val = node.nodeValue; 72 | var pos = val.toLowerCase().indexOf(text); 73 | if (pos >= 0 && 74 | !jQuery(node.parentNode).hasClass(className) && 75 | !jQuery(node.parentNode).hasClass("nohighlight")) { 76 | var span; 77 | var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg"); 78 | if (isInSVG) { 79 | span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); 80 | } else { 81 | span = document.createElement("span"); 82 | span.className = className; 83 | } 84 | span.appendChild(document.createTextNode(val.substr(pos, text.length))); 85 | node.parentNode.insertBefore(span, node.parentNode.insertBefore( 86 | document.createTextNode(val.substr(pos + text.length)), 87 | node.nextSibling)); 88 | node.nodeValue = val.substr(0, pos); 89 | if (isInSVG) { 90 | var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect"); 91 | var bbox = node.parentElement.getBBox(); 92 | rect.x.baseVal.value = bbox.x; 93 | rect.y.baseVal.value = bbox.y; 94 | rect.width.baseVal.value = bbox.width; 95 | rect.height.baseVal.value = bbox.height; 96 | rect.setAttribute('class', className); 97 | addItems.push({ 98 | "parent": node.parentNode, 99 | "target": rect}); 100 | } 101 | } 102 | } 103 | else if (!jQuery(node).is("button, select, textarea")) { 104 | jQuery.each(node.childNodes, function() { 105 | highlight(this, addItems); 106 | }); 107 | } 108 | } 109 | var addItems = []; 110 | var result = this.each(function() { 111 | highlight(this, addItems); 112 | }); 113 | for (var i = 0; i < addItems.length; ++i) { 114 | jQuery(addItems[i].parent).before(addItems[i].target); 115 | } 116 | return result; 117 | }; 118 | 119 | /* 120 | * backward compatibility for jQuery.browser 121 | * This will be supported until firefox bug is fixed. 122 | */ 123 | if (!jQuery.browser) { 124 | jQuery.uaMatch = function(ua) { 125 | ua = ua.toLowerCase(); 126 | 127 | var match = /(chrome)[ \/]([\w.]+)/.exec(ua) || 128 | /(webkit)[ \/]([\w.]+)/.exec(ua) || 129 | /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) || 130 | /(msie) ([\w.]+)/.exec(ua) || 131 | ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) || 132 | []; 133 | 134 | return { 135 | browser: match[ 1 ] || "", 136 | version: match[ 2 ] || "0" 137 | }; 138 | }; 139 | jQuery.browser = {}; 140 | jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true; 141 | } 142 | 143 | /** 144 | * Small JavaScript module for the documentation. 145 | */ 146 | var Documentation = { 147 | 148 | init : function() { 149 | this.fixFirefoxAnchorBug(); 150 | this.highlightSearchWords(); 151 | this.initIndexTable(); 152 | if (DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) { 153 | this.initOnKeyListeners(); 154 | } 155 | }, 156 | 157 | /** 158 | * i18n support 159 | */ 160 | TRANSLATIONS : {}, 161 | PLURAL_EXPR : function(n) { return n === 1 ? 0 : 1; }, 162 | LOCALE : 'unknown', 163 | 164 | // gettext and ngettext don't access this so that the functions 165 | // can safely bound to a different name (_ = Documentation.gettext) 166 | gettext : function(string) { 167 | var translated = Documentation.TRANSLATIONS[string]; 168 | if (typeof translated === 'undefined') 169 | return string; 170 | return (typeof translated === 'string') ? translated : translated[0]; 171 | }, 172 | 173 | ngettext : function(singular, plural, n) { 174 | var translated = Documentation.TRANSLATIONS[singular]; 175 | if (typeof translated === 'undefined') 176 | return (n == 1) ? singular : plural; 177 | return translated[Documentation.PLURALEXPR(n)]; 178 | }, 179 | 180 | addTranslations : function(catalog) { 181 | for (var key in catalog.messages) 182 | this.TRANSLATIONS[key] = catalog.messages[key]; 183 | this.PLURAL_EXPR = new Function('n', 'return +(' + catalog.plural_expr + ')'); 184 | this.LOCALE = catalog.locale; 185 | }, 186 | 187 | /** 188 | * add context elements like header anchor links 189 | */ 190 | addContextElements : function() { 191 | $('div[id] > :header:first').each(function() { 192 | $('\u00B6'). 193 | attr('href', '#' + this.id). 194 | attr('title', _('Permalink to this headline')). 195 | appendTo(this); 196 | }); 197 | $('dt[id]').each(function() { 198 | $('\u00B6'). 199 | attr('href', '#' + this.id). 200 | attr('title', _('Permalink to this definition')). 201 | appendTo(this); 202 | }); 203 | }, 204 | 205 | /** 206 | * workaround a firefox stupidity 207 | * see: https://bugzilla.mozilla.org/show_bug.cgi?id=645075 208 | */ 209 | fixFirefoxAnchorBug : function() { 210 | if (document.location.hash && $.browser.mozilla) 211 | window.setTimeout(function() { 212 | document.location.href += ''; 213 | }, 10); 214 | }, 215 | 216 | /** 217 | * highlight the search words provided in the url in the text 218 | */ 219 | highlightSearchWords : function() { 220 | var params = $.getQueryParameters(); 221 | var terms = (params.highlight) ? params.highlight[0].split(/\s+/) : []; 222 | if (terms.length) { 223 | var body = $('div.body'); 224 | if (!body.length) { 225 | body = $('body'); 226 | } 227 | window.setTimeout(function() { 228 | $.each(terms, function() { 229 | body.highlightText(this.toLowerCase(), 'highlighted'); 230 | }); 231 | }, 10); 232 | $('') 234 | .appendTo($('#searchbox')); 235 | } 236 | }, 237 | 238 | /** 239 | * init the domain index toggle buttons 240 | */ 241 | initIndexTable : function() { 242 | var togglers = $('img.toggler').click(function() { 243 | var src = $(this).attr('src'); 244 | var idnum = $(this).attr('id').substr(7); 245 | $('tr.cg-' + idnum).toggle(); 246 | if (src.substr(-9) === 'minus.png') 247 | $(this).attr('src', src.substr(0, src.length-9) + 'plus.png'); 248 | else 249 | $(this).attr('src', src.substr(0, src.length-8) + 'minus.png'); 250 | }).css('display', ''); 251 | if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) { 252 | togglers.click(); 253 | } 254 | }, 255 | 256 | /** 257 | * helper function to hide the search marks again 258 | */ 259 | hideSearchWords : function() { 260 | $('#searchbox .highlight-link').fadeOut(300); 261 | $('span.highlighted').removeClass('highlighted'); 262 | }, 263 | 264 | /** 265 | * make the url absolute 266 | */ 267 | makeURL : function(relativeURL) { 268 | return DOCUMENTATION_OPTIONS.URL_ROOT + '/' + relativeURL; 269 | }, 270 | 271 | /** 272 | * get the current relative url 273 | */ 274 | getCurrentURL : function() { 275 | var path = document.location.pathname; 276 | var parts = path.split(/\//); 277 | $.each(DOCUMENTATION_OPTIONS.URL_ROOT.split(/\//), function() { 278 | if (this === '..') 279 | parts.pop(); 280 | }); 281 | var url = parts.join('/'); 282 | return path.substring(url.lastIndexOf('/') + 1, path.length - 1); 283 | }, 284 | 285 | initOnKeyListeners: function() { 286 | $(document).keydown(function(event) { 287 | var activeElementType = document.activeElement.tagName; 288 | // don't navigate when in search box or textarea 289 | if (activeElementType !== 'TEXTAREA' && activeElementType !== 'INPUT' && activeElementType !== 'SELECT' 290 | && !event.altKey && !event.ctrlKey && !event.metaKey && !event.shiftKey) { 291 | switch (event.keyCode) { 292 | case 37: // left 293 | var prevHref = $('link[rel="prev"]').prop('href'); 294 | if (prevHref) { 295 | window.location.href = prevHref; 296 | return false; 297 | } 298 | case 39: // right 299 | var nextHref = $('link[rel="next"]').prop('href'); 300 | if (nextHref) { 301 | window.location.href = nextHref; 302 | return false; 303 | } 304 | } 305 | } 306 | }); 307 | } 308 | }; 309 | 310 | // quick alias for translations 311 | _ = Documentation.gettext; 312 | 313 | $(document).ready(function() { 314 | Documentation.init(); 315 | }); 316 | -------------------------------------------------------------------------------- /docs/_build/html/_static/documentation_options.js: -------------------------------------------------------------------------------- 1 | var DOCUMENTATION_OPTIONS = { 2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), 3 | VERSION: '', 4 | LANGUAGE: 'None', 5 | COLLAPSE_INDEX: false, 6 | BUILDER: 'html', 7 | FILE_SUFFIX: '.html', 8 | LINK_SUFFIX: '.html', 9 | HAS_SOURCE: true, 10 | SOURCELINK_SUFFIX: '.txt', 11 | NAVIGATION_WITH_KEYS: false 12 | }; -------------------------------------------------------------------------------- /docs/_build/html/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/file.png -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/FontAwesome.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/FontAwesome.otf -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Lato/lato-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-bold.eot -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Lato/lato-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-bold.ttf -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Lato/lato-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-bold.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Lato/lato-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-bold.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Lato/lato-bolditalic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-bolditalic.eot -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Lato/lato-bolditalic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-bolditalic.ttf -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Lato/lato-bolditalic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-bolditalic.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Lato/lato-bolditalic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-bolditalic.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Lato/lato-italic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-italic.eot -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Lato/lato-italic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-italic.ttf -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Lato/lato-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-italic.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Lato/lato-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-italic.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Lato/lato-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-regular.eot -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Lato/lato-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-regular.ttf -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Lato/lato-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-regular.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Lato/lato-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Lato/lato-regular.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Roboto-Slab-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Roboto-Slab-Bold.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Roboto-Slab-Bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Roboto-Slab-Bold.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Roboto-Slab-Light.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Roboto-Slab-Light.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Roboto-Slab-Light.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Roboto-Slab-Light.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Roboto-Slab-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Roboto-Slab-Regular.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Roboto-Slab-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Roboto-Slab-Regular.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Roboto-Slab-Thin.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Roboto-Slab-Thin.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/Roboto-Slab-Thin.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/Roboto-Slab-Thin.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/lato-bold-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/lato-bold-italic.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/lato-bold-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/lato-bold-italic.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/lato-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/lato-bold.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/lato-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/lato-bold.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/lato-normal-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/lato-normal-italic.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/lato-normal-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/lato-normal-italic.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/lato-normal.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/lato-normal.woff -------------------------------------------------------------------------------- /docs/_build/html/_static/fonts/lato-normal.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/fonts/lato-normal.woff2 -------------------------------------------------------------------------------- /docs/_build/html/_static/js/badge_only.js: -------------------------------------------------------------------------------- 1 | !function(e){var t={};function r(n){if(t[n])return t[n].exports;var o=t[n]={i:n,l:!1,exports:{}};return e[n].call(o.exports,o,o.exports,r),o.l=!0,o.exports}r.m=e,r.c=t,r.d=function(e,t,n){r.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:n})},r.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.t=function(e,t){if(1&t&&(e=r(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var n=Object.create(null);if(r.r(n),Object.defineProperty(n,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)r.d(n,o,function(t){return e[t]}.bind(null,o));return n},r.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return r.d(t,"a",t),t},r.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r.p="",r(r.s=4)}({4:function(e,t,r){}}); -------------------------------------------------------------------------------- /docs/_build/html/_static/js/html5shiv-printshiv.min.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @preserve HTML5 Shiv 3.7.3-pre | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed 3 | */ 4 | !function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=y.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=y.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),y.elements=c+" "+a,j(b)}function f(a){var b=x[a[v]];return b||(b={},w++,a[v]=w,x[w]=b),b}function g(a,c,d){if(c||(c=b),q)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():u.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||t.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),q)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return y.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(y,b.frag)}function j(a){a||(a=b);var d=f(a);return!y.shivCSS||p||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),q||i(a,d),a}function k(a){for(var b,c=a.getElementsByTagName("*"),e=c.length,f=RegExp("^(?:"+d().join("|")+")$","i"),g=[];e--;)b=c[e],f.test(b.nodeName)&&g.push(b.applyElement(l(b)));return g}function l(a){for(var b,c=a.attributes,d=c.length,e=a.ownerDocument.createElement(A+":"+a.nodeName);d--;)b=c[d],b.specified&&e.setAttribute(b.nodeName,b.nodeValue);return e.style.cssText=a.style.cssText,e}function m(a){for(var b,c=a.split("{"),e=c.length,f=RegExp("(^|[\\s,>+~])("+d().join("|")+")(?=[[\\s,>+~#.:]|$)","gi"),g="$1"+A+"\\:$2";e--;)b=c[e]=c[e].split("}"),b[b.length-1]=b[b.length-1].replace(f,g),c[e]=b.join("}");return c.join("{")}function n(a){for(var b=a.length;b--;)a[b].removeNode()}function o(a){function b(){clearTimeout(g._removeSheetTimer),d&&d.removeNode(!0),d=null}var d,e,g=f(a),h=a.namespaces,i=a.parentWindow;return!B||a.printShived?a:("undefined"==typeof h[A]&&h.add(A),i.attachEvent("onbeforeprint",function(){b();for(var f,g,h,i=a.styleSheets,j=[],l=i.length,n=Array(l);l--;)n[l]=i[l];for(;h=n.pop();)if(!h.disabled&&z.test(h.media)){try{f=h.imports,g=f.length}catch(o){g=0}for(l=0;g>l;l++)n.push(f[l]);try{j.push(h.cssText)}catch(o){}}j=m(j.reverse().join("")),e=k(a),d=c(a,j)}),i.attachEvent("onafterprint",function(){n(e),clearTimeout(g._removeSheetTimer),g._removeSheetTimer=setTimeout(b,500)}),a.printShived=!0,a)}var p,q,r="3.7.3",s=a.html5||{},t=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,u=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,v="_html5shiv",w=0,x={};!function(){try{var a=b.createElement("a");a.innerHTML="",p="hidden"in a,q=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){p=!0,q=!0}}();var y={elements:s.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:r,shivCSS:s.shivCSS!==!1,supportsUnknownElements:q,shivMethods:s.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=y,j(b);var z=/^$|\b(?:all|print)\b/,A="html5shiv",B=!q&&function(){var c=b.documentElement;return!("undefined"==typeof b.namespaces||"undefined"==typeof b.parentWindow||"undefined"==typeof c.applyElement||"undefined"==typeof c.removeNode||"undefined"==typeof a.attachEvent)}();y.type+=" print",y.shivPrint=o,o(b),"object"==typeof module&&module.exports&&(module.exports=y)}("undefined"!=typeof window?window:this,document); -------------------------------------------------------------------------------- /docs/_build/html/_static/js/html5shiv.min.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @preserve HTML5 Shiv 3.7.3 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed 3 | */ 4 | !function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.3-pre",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML="",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b),"object"==typeof module&&module.exports&&(module.exports=t)}("undefined"!=typeof window?window:this,document); -------------------------------------------------------------------------------- /docs/_build/html/_static/js/theme.js: -------------------------------------------------------------------------------- 1 | !function(n){var e={};function t(i){if(e[i])return e[i].exports;var o=e[i]={i:i,l:!1,exports:{}};return n[i].call(o.exports,o,o.exports,t),o.l=!0,o.exports}t.m=n,t.c=e,t.d=function(n,e,i){t.o(n,e)||Object.defineProperty(n,e,{enumerable:!0,get:i})},t.r=function(n){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(n,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(n,"__esModule",{value:!0})},t.t=function(n,e){if(1&e&&(n=t(n)),8&e)return n;if(4&e&&"object"==typeof n&&n&&n.__esModule)return n;var i=Object.create(null);if(t.r(i),Object.defineProperty(i,"default",{enumerable:!0,value:n}),2&e&&"string"!=typeof n)for(var o in n)t.d(i,o,function(e){return n[e]}.bind(null,o));return i},t.n=function(n){var e=n&&n.__esModule?function(){return n.default}:function(){return n};return t.d(e,"a",e),e},t.o=function(n,e){return Object.prototype.hasOwnProperty.call(n,e)},t.p="",t(t.s=0)}([function(n,e,t){t(1),n.exports=t(3)},function(n,e,t){(function(){var e="undefined"!=typeof window?window.jQuery:t(2);n.exports.ThemeNav={navBar:null,win:null,winScroll:!1,winResize:!1,linkScroll:!1,winPosition:0,winHeight:null,docHeight:null,isRunning:!1,enable:function(n){var t=this;void 0===n&&(n=!0),t.isRunning||(t.isRunning=!0,e((function(e){t.init(e),t.reset(),t.win.on("hashchange",t.reset),n&&t.win.on("scroll",(function(){t.linkScroll||t.winScroll||(t.winScroll=!0,requestAnimationFrame((function(){t.onScroll()})))})),t.win.on("resize",(function(){t.winResize||(t.winResize=!0,requestAnimationFrame((function(){t.onResize()})))})),t.onResize()})))},enableSticky:function(){this.enable(!0)},init:function(n){n(document);var e=this;this.navBar=n("div.wy-side-scroll:first"),this.win=n(window),n(document).on("click","[data-toggle='wy-nav-top']",(function(){n("[data-toggle='wy-nav-shift']").toggleClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift")})).on("click",".wy-menu-vertical .current ul li a",(function(){var t=n(this);n("[data-toggle='wy-nav-shift']").removeClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift"),e.toggleCurrent(t),e.hashChange()})).on("click","[data-toggle='rst-current-version']",(function(){n("[data-toggle='rst-versions']").toggleClass("shift-up")})),n("table.docutils:not(.field-list,.footnote,.citation)").wrap("
"),n("table.docutils.footnote").wrap("
"),n("table.docutils.citation").wrap("
"),n(".wy-menu-vertical ul").not(".simple").siblings("a").each((function(){var t=n(this);expand=n(''),expand.on("click",(function(n){return e.toggleCurrent(t),n.stopPropagation(),!1})),t.prepend(expand)}))},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),t=e.find('[href="'+n+'"]');if(0===t.length){var i=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(t=e.find('[href="#'+i.attr("id")+'"]')).length&&(t=e.find('[href="#"]'))}t.length>0&&($(".wy-menu-vertical .current").removeClass("current"),t.addClass("current"),t.closest("li.toctree-l1").addClass("current"),t.closest("li.toctree-l1").parent().addClass("current"),t.closest("li.toctree-l1").addClass("current"),t.closest("li.toctree-l2").addClass("current"),t.closest("li.toctree-l3").addClass("current"),t.closest("li.toctree-l4").addClass("current"),t.closest("li.toctree-l5").addClass("current"),t[0].scrollIntoView())}catch(n){console.log("Error expanding nav for anchor",n)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,t=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(t),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",(function(){this.linkScroll=!1}))},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current"),e.siblings().find("li.current").removeClass("current"),e.find("> ul li.current").removeClass("current"),e.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:n.exports.ThemeNav,StickyNav:n.exports.ThemeNav}),function(){for(var n=0,e=["ms","moz","webkit","o"],t=0;t2;a== 12 | null&&(a=[]);if(y&&a.reduce===y)return e&&(c=b.bind(c,e)),f?a.reduce(c,d):a.reduce(c);j(a,function(a,b,i){f?d=c.call(e,d,a,b,i):(d=a,f=true)});if(!f)throw new TypeError("Reduce of empty array with no initial value");return d};b.reduceRight=b.foldr=function(a,c,d,e){var f=arguments.length>2;a==null&&(a=[]);if(z&&a.reduceRight===z)return e&&(c=b.bind(c,e)),f?a.reduceRight(c,d):a.reduceRight(c);var g=b.toArray(a).reverse();e&&!f&&(c=b.bind(c,e));return f?b.reduce(g,c,d,e):b.reduce(g,c)};b.find=b.detect= 13 | function(a,c,b){var e;E(a,function(a,g,h){if(c.call(b,a,g,h))return e=a,true});return e};b.filter=b.select=function(a,c,b){var e=[];if(a==null)return e;if(A&&a.filter===A)return a.filter(c,b);j(a,function(a,g,h){c.call(b,a,g,h)&&(e[e.length]=a)});return e};b.reject=function(a,c,b){var e=[];if(a==null)return e;j(a,function(a,g,h){c.call(b,a,g,h)||(e[e.length]=a)});return e};b.every=b.all=function(a,c,b){var e=true;if(a==null)return e;if(B&&a.every===B)return a.every(c,b);j(a,function(a,g,h){if(!(e= 14 | e&&c.call(b,a,g,h)))return n});return e};var E=b.some=b.any=function(a,c,d){c||(c=b.identity);var e=false;if(a==null)return e;if(C&&a.some===C)return a.some(c,d);j(a,function(a,b,h){if(e||(e=c.call(d,a,b,h)))return n});return!!e};b.include=b.contains=function(a,c){var b=false;if(a==null)return b;return p&&a.indexOf===p?a.indexOf(c)!=-1:b=E(a,function(a){return a===c})};b.invoke=function(a,c){var d=i.call(arguments,2);return b.map(a,function(a){return(b.isFunction(c)?c||a:a[c]).apply(a,d)})};b.pluck= 15 | function(a,c){return b.map(a,function(a){return a[c]})};b.max=function(a,c,d){if(!c&&b.isArray(a))return Math.max.apply(Math,a);if(!c&&b.isEmpty(a))return-Infinity;var e={computed:-Infinity};j(a,function(a,b,h){b=c?c.call(d,a,b,h):a;b>=e.computed&&(e={value:a,computed:b})});return e.value};b.min=function(a,c,d){if(!c&&b.isArray(a))return Math.min.apply(Math,a);if(!c&&b.isEmpty(a))return Infinity;var e={computed:Infinity};j(a,function(a,b,h){b=c?c.call(d,a,b,h):a;bd?1:0}),"value")};b.groupBy=function(a,c){var d={},e=b.isFunction(c)?c:function(a){return a[c]};j(a,function(a,b){var c=e(a,b);(d[c]||(d[c]=[])).push(a)});return d};b.sortedIndex=function(a, 17 | c,d){d||(d=b.identity);for(var e=0,f=a.length;e>1;d(a[g])=0})})};b.difference=function(a){var c=b.flatten(i.call(arguments,1));return b.filter(a,function(a){return!b.include(c,a)})};b.zip=function(){for(var a=i.call(arguments),c=b.max(b.pluck(a,"length")),d=Array(c),e=0;e=0;d--)b=[a[d].apply(this,b)];return b[0]}}; 24 | b.after=function(a,b){return a<=0?b():function(){if(--a<1)return b.apply(this,arguments)}};b.keys=J||function(a){if(a!==Object(a))throw new TypeError("Invalid object");var c=[],d;for(d in a)b.has(a,d)&&(c[c.length]=d);return c};b.values=function(a){return b.map(a,b.identity)};b.functions=b.methods=function(a){var c=[],d;for(d in a)b.isFunction(a[d])&&c.push(d);return c.sort()};b.extend=function(a){j(i.call(arguments,1),function(b){for(var d in b)a[d]=b[d]});return a};b.defaults=function(a){j(i.call(arguments, 25 | 1),function(b){for(var d in b)a[d]==null&&(a[d]=b[d])});return a};b.clone=function(a){return!b.isObject(a)?a:b.isArray(a)?a.slice():b.extend({},a)};b.tap=function(a,b){b(a);return a};b.isEqual=function(a,b){return q(a,b,[])};b.isEmpty=function(a){if(b.isArray(a)||b.isString(a))return a.length===0;for(var c in a)if(b.has(a,c))return false;return true};b.isElement=function(a){return!!(a&&a.nodeType==1)};b.isArray=o||function(a){return l.call(a)=="[object Array]"};b.isObject=function(a){return a===Object(a)}; 26 | b.isArguments=function(a){return l.call(a)=="[object Arguments]"};if(!b.isArguments(arguments))b.isArguments=function(a){return!(!a||!b.has(a,"callee"))};b.isFunction=function(a){return l.call(a)=="[object Function]"};b.isString=function(a){return l.call(a)=="[object String]"};b.isNumber=function(a){return l.call(a)=="[object Number]"};b.isNaN=function(a){return a!==a};b.isBoolean=function(a){return a===true||a===false||l.call(a)=="[object Boolean]"};b.isDate=function(a){return l.call(a)=="[object Date]"}; 27 | b.isRegExp=function(a){return l.call(a)=="[object RegExp]"};b.isNull=function(a){return a===null};b.isUndefined=function(a){return a===void 0};b.has=function(a,b){return I.call(a,b)};b.noConflict=function(){r._=G;return this};b.identity=function(a){return a};b.times=function(a,b,d){for(var e=0;e/g,">").replace(/"/g,""").replace(/'/g,"'").replace(/\//g,"/")};b.mixin=function(a){j(b.functions(a), 28 | function(c){K(c,b[c]=a[c])})};var L=0;b.uniqueId=function(a){var b=L++;return a?a+b:b};b.templateSettings={evaluate:/<%([\s\S]+?)%>/g,interpolate:/<%=([\s\S]+?)%>/g,escape:/<%-([\s\S]+?)%>/g};var t=/.^/,u=function(a){return a.replace(/\\\\/g,"\\").replace(/\\'/g,"'")};b.template=function(a,c){var d=b.templateSettings,d="var __p=[],print=function(){__p.push.apply(__p,arguments);};with(obj||{}){__p.push('"+a.replace(/\\/g,"\\\\").replace(/'/g,"\\'").replace(d.escape||t,function(a,b){return"',_.escape("+ 29 | u(b)+"),'"}).replace(d.interpolate||t,function(a,b){return"',"+u(b)+",'"}).replace(d.evaluate||t,function(a,b){return"');"+u(b).replace(/[\r\n\t]/g," ")+";__p.push('"}).replace(/\r/g,"\\r").replace(/\n/g,"\\n").replace(/\t/g,"\\t")+"');}return __p.join('');",e=new Function("obj","_",d);return c?e(c,b):function(a){return e.call(this,a,b)}};b.chain=function(a){return b(a).chain()};var m=function(a){this._wrapped=a};b.prototype=m.prototype;var v=function(a,c){return c?b(a).chain():a},K=function(a,c){m.prototype[a]= 30 | function(){var a=i.call(arguments);H.call(a,this._wrapped);return v(c.apply(b,a),this._chain)}};b.mixin(b);j("pop,push,reverse,shift,sort,splice,unshift".split(","),function(a){var b=k[a];m.prototype[a]=function(){var d=this._wrapped;b.apply(d,arguments);var e=d.length;(a=="shift"||a=="splice")&&e===0&&delete d[0];return v(d,this._chain)}});j(["concat","join","slice"],function(a){var b=k[a];m.prototype[a]=function(){return v(b.apply(this._wrapped,arguments),this._chain)}});m.prototype.chain=function(){this._chain= 31 | true;return this};m.prototype.value=function(){return this._wrapped}}).call(this); 32 | -------------------------------------------------------------------------------- /docs/_build/html/_static/watson_transformer_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/watson_transformer_logo.png -------------------------------------------------------------------------------- /docs/_build/html/_static/watson_transformer_stt_perf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/_static/watson_transformer_stt_perf.png -------------------------------------------------------------------------------- /docs/_build/html/development.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Development — watson tranfromer documentation 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 |
46 | 47 | 104 | 105 |
106 | 107 | 108 | 114 | 115 | 116 |
117 | 118 |
119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 |
137 | 138 |
    139 | 140 |
  • »
  • 141 | 142 |
  • Development
  • 143 | 144 | 145 |
  • 146 | 147 | 148 | View page source 149 | 150 | 151 |
  • 152 | 153 |
154 | 155 | 156 |
157 |
158 |
159 |
160 | 161 |
162 |

Development

163 |

To setup development envrioment and extend support Watson Transformer API to other API ….

164 |
165 |

Install Pacakge in Dev Mode

166 |
pip install watson-transfromer
167 | 
168 |
169 |
170 |
171 | 172 | 173 |
174 | 175 |
176 |
177 | 178 | 184 | 185 | 186 |
187 | 188 |
189 |

190 | 191 | © Copyright 2020, Kai Niu 192 | 193 |

194 |
195 | 196 | 197 | 198 | Built with Sphinx using a 199 | 200 | theme 201 | 202 | provided by Read the Docs. 203 | 204 |
205 | 206 |
207 |
208 | 209 |
210 | 211 |
212 | 213 | 214 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | -------------------------------------------------------------------------------- /docs/_build/html/genindex.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Index — watson tranfromer documentation 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 |
45 | 46 | 100 | 101 |
102 | 103 | 104 | 110 | 111 | 112 |
113 | 114 |
115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 |
133 | 134 |
    135 | 136 |
  • »
  • 137 | 138 |
  • Index
  • 139 | 140 | 141 |
  • 142 | 143 | 144 | 145 |
  • 146 | 147 |
148 | 149 | 150 |
151 |
152 |
153 |
154 | 155 | 156 |

Index

157 | 158 |
159 | 160 |
161 | 162 | 163 |
164 | 165 |
166 |
167 | 168 | 169 |
170 | 171 |
172 |

173 | 174 | © Copyright 2020, Kai Niu 175 | 176 |

177 |
178 | 179 | 180 | 181 | Built with Sphinx using a 182 | 183 | theme 184 | 185 | provided by Read the Docs. 186 | 187 |
188 | 189 |
190 |
191 | 192 |
193 | 194 |
195 | 196 | 197 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | -------------------------------------------------------------------------------- /docs/_build/html/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Welcome to watson tranfromer’s documentation! — watson tranfromer documentation 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 |
46 | 47 | 101 | 102 |
103 | 104 | 105 | 111 | 112 | 113 |
114 | 115 |
116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 |
134 | 135 |
    136 | 137 |
  • »
  • 138 | 139 |
  • Welcome to watson tranfromer’s documentation!
  • 140 | 141 | 142 |
  • 143 | 144 | 145 | View page source 146 | 147 | 148 |
  • 149 | 150 |
151 | 152 | 153 |
154 |
155 |
156 |
157 | 158 |
159 |

Welcome to watson tranfromer’s documentation!

160 | 199 |
200 | 201 | 202 |
203 | 204 |
205 |
206 | 207 | 213 | 214 | 215 |
216 | 217 |
218 |

219 | 220 | © Copyright 2020, Kai Niu 221 | 222 |

223 |
224 | 225 | 226 | 227 | Built with Sphinx using a 228 | 229 | theme 230 | 231 | provided by Read the Docs. 232 | 233 |
234 | 235 |
236 |
237 | 238 |
239 | 240 |
241 | 242 | 243 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | -------------------------------------------------------------------------------- /docs/_build/html/intro.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | What is Watson Transformer? — watson tranfromer documentation 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 |
47 | 48 | 105 | 106 |
107 | 108 | 109 | 115 | 116 | 117 |
118 | 119 |
120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 |
138 | 139 |
    140 | 141 |
  • »
  • 142 | 143 |
  • What is Watson Transformer?
  • 144 | 145 | 146 |
  • 147 | 148 | 149 | View page source 150 | 151 | 152 |
  • 153 | 154 |
155 | 156 | 157 |
158 |
159 |
160 |
161 | 162 |
163 |

What is Watson Transformer?

164 |

Watson Transformer solves the problem of consuming 165 | IBM Watson API services(STT, NLU, etc.) at scale by 166 | wrapping the service calls into the Spark transformer. 167 | In this way, The IBM services such as STT and NLU can 168 | build into the Spark ML pipeline, along with 169 | other transformers and estimators to tackle the big 170 | data challenge.

171 |
172 |

The Design

173 |

As the UML chart illustrates, The Watson Transformer 174 | Class service as a thin wrapper around the IBM Waston 175 | API class. For extensibility purposes, the logic of 176 | consuming API service is defined in the Watson Service 177 | Class, which is an executable class. It enables any 178 | applicable API service to be wrapped into the transformer. 179 | On the other hand, the transformer handles mapping input 180 | data to API calls and parse the service response to 181 | data fields.

182 |
183 |
184 | 185 | 186 |
187 | 188 |
189 |
190 | 191 | 199 | 200 | 201 |
202 | 203 |
204 |

205 | 206 | © Copyright 2020, Kai Niu 207 | 208 |

209 |
210 | 211 | 212 | 213 | Built with Sphinx using a 214 | 215 | theme 216 | 217 | provided by Read the Docs. 218 | 219 |
220 | 221 |
222 |
223 | 224 |
225 | 226 |
227 | 228 | 229 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | -------------------------------------------------------------------------------- /docs/_build/html/objects.inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/_build/html/objects.inv -------------------------------------------------------------------------------- /docs/_build/html/search.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Search — watson tranfromer documentation 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 |
47 | 48 | 102 | 103 |
104 | 105 | 106 | 112 | 113 | 114 |
115 | 116 |
117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 |
135 | 136 |
    137 | 138 |
  • »
  • 139 | 140 |
  • Search
  • 141 | 142 | 143 |
  • 144 | 145 | 146 | 147 |
  • 148 | 149 |
150 | 151 | 152 |
153 |
154 |
155 |
156 | 157 | 164 | 165 | 166 |
167 | 168 |
169 | 170 |
171 | 172 |
173 |
174 | 175 | 176 |
177 | 178 |
179 |

180 | 181 | © Copyright 2020, Kai Niu 182 | 183 |

184 |
185 | 186 | 187 | 188 | Built with Sphinx using a 189 | 190 | theme 191 | 192 | provided by Read the Docs. 193 | 194 |
195 | 196 |
197 |
198 | 199 |
200 | 201 |
202 | 203 | 204 | 209 | 210 | 211 | 212 | 213 | 214 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | -------------------------------------------------------------------------------- /docs/_build/html/searchindex.js: -------------------------------------------------------------------------------- 1 | Search.setIndex({docnames:["api","development","index","intro","tutorial"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":3,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":2,"sphinx.domains.rst":2,"sphinx.domains.std":1,sphinx:56},filenames:["api.rst","development.rst","index.rst","intro.rst","tutorial.rst"],objects:{},objnames:{},objtypes:{},terms:{"2rmb6z02rhvoo1robbiegliqwovi1orqx2z7v8":[],"case":4,"class":[2,3,4],"default":0,"function":[0,4],"import":4,"int":4,"long":4,"new":4,"return":[0,4],"true":4,For:[3,4],The:[0,2],There:4,__iter__:4,abc:[],abl:4,about:4,access:4,achiev:4,address:4,after:4,all:4,allow:4,almost:4,along:3,also:[],alwai:4,ani:3,api:[1,2,3,4],api_kei:4,appli:4,applic:3,around:[3,4],audio:4,audio_fil:4,audio_filenam:4,audio_stream:4,auth_endpoint:4,avaiabl:[0,4],avail:4,base:0,beblow:4,been:0,below:4,best:4,big:3,bodi:4,boost:0,bucket:4,bufferediobas:4,build:[3,4],call:[3,4],can:[0,3,4],challeng:3,chart:3,client:4,cloud:4,cluster:4,code:4,colulmn:[],column:[0,2],column_flatt:4,columnar:4,common:[],compar:4,compli:4,concept:4,conceptsopt:4,config:4,configur:4,consum:3,contain:[0,4],content:2,content_typ:4,contentlength:4,contrib:4,convert:4,copi:4,cos_client:4,cover:[],creat:2,custom:[0,4],data:[0,3,4],datafram:[0,4],declar:4,def:4,defaultparamsread:0,defaultparamswrit:0,defaultsttpars:4,defin:3,deliv:4,denot:4,design:2,detail:4,dev:2,develop:[2,4],devlop:4,differ:4,discuss:4,document:4,down:4,downstream:[],each:4,els:4,emot:4,emotionopt:4,enabl:[0,3,4],end:[],endpoint:4,endpoint_url:4,envrioment:1,essenti:2,estim:3,etc:3,exampl:4,execut:[3,4],exploit:[0,4],expressivli:4,extend:1,extens:[3,4],fairli:4,fals:[0,4],featur:4,field:3,file:4,file_obj:4,first:4,flat:2,flatcolumntransform:[2,4],flatten:0,follow:4,form:4,forward:4,frist:4,from:[0,4],gatewai:4,gener:4,get:4,get_object:4,give:0,given:4,good:4,greater:4,hand:3,handl:3,has:4,hasattr:4,hasinputcol:0,hasoutputcol:0,have:[0,4],hello:[],here:4,how:4,http:4,ibm:[2,3],ibm_api_key_id:4,ibm_auth_endpoint:4,ibm_boto3:4,ibm_cos_read:4,ibm_watson:4,ibn:4,ignor:4,illustr:3,implement:0,independ:4,indic:4,individu:4,inherit:0,init:[],initi:4,input:[0,3,4],inputcol:[0,4],insid:4,instal:2,instanc:[0,2],introduc:4,its:4,json:[0,2],jsontransform:[2,4],jt_stt:[],kei:4,keyword:4,keywordsopt:4,languag:4,last:4,learn:4,less:[],limit:4,list:4,logic:[3,4],make:[],mani:4,map:3,max_altern:4,max_work:[0,4],maximum:[0,4],maximum_s:4,memori:[0,4],method:[],methodtyp:4,mode:2,model:4,more:4,multipl:[0,4],name:[0,4],natur:4,natural_language_understanding_v1:4,need:4,nest:[0,4],net:4,nlu:[2,3],nlu_access_token:4,nlu_respons:4,nlu_result:4,nlu_servic:4,node:4,none:[0,4],now:[],number:[0,4],oauth:4,object:[0,4],onc:4,order:4,other:[1,3,4],out:4,output:[0,4],outputcol:[0,4],over:4,own:4,pacakg:2,packag:[0,2],page:[0,4],param1:4,param:[0,4],paramet:[0,4],paramt:4,pars:[0,3,4],parser:[0,4],pass:4,perform:[0,4],pip:[1,4],pipelin:[0,3,4],pipeline_stt:4,plain:4,pleas:4,point:4,possibl:[0,4],previou:4,problem:3,process:4,profanity_filt:4,promot:4,prompot:[],provid:4,public_endpoint_url:4,purpos:[3,4],pyarrow:[0,4],pyspark:[0,4],quick:4,read:4,reader:4,reader_funct:4,readi:4,reason:4,reciev:0,recommend:0,record:4,refer:[2,4],regular:0,relev:4,remov:0,removeinputcol:[0,4],requir:[0,4],respons:3,resuabl:4,result:[0,4],right:4,same:4,save:4,scale:3,section:4,sentiment:4,sentimentopt:4,serv:4,servic:[0,2,3],service_nam:4,session:4,set:4,setup:1,share:0,should:4,show:4,signatur:4,signature_vers:4,sinc:4,size:4,snippet:4,solv:3,some:4,sort:4,spark:3,spawn:4,specif:4,speech:4,split_transcript_at_phrase_end:4,sql:0,stage:4,standard:4,start:4,step:4,storag:4,straight:4,straightforward:4,stream:4,stright:4,structur:4,stt:[2,3],stt_api_token:4,stt_respons:4,stt_result_pars:4,stt_servic:4,support:[1,4],sure:[],tackl:3,take:4,technic:4,text:4,than:4,thei:4,thi:[0,3,4],thin:3,thread:[0,4],thu:0,token:4,tranform:[],transcrib:4,transcript:4,transform:[0,1,2],transfrom:[1,4],translat:4,tutori:2,two:4,type:[0,4],uml:3,understand:4,url:4,us_shortform_narrowbandmodel:4,use:[0,4],used:4,using:4,util:[0,2],valu:0,value1:4,vector:[0,4],w_bnu:[],wai:[3,4],waston:[3,4],watson:[0,1],watson_transform:4,watsonplatform:4,watsonservicetransform:[2,4],watsontransform:4,wav:4,what:2,when:[0,4],whenev:[0,4],whether:[0,4],which:[0,3,4],within:4,work:4,world:[],wrap:[0,3,4],wrapper:3,write:4,your:4},titles:["API Reference","Development","Welcome to watson tranfromer\u2019s documentation!","What is Watson Transformer?","Tutorial"],titleterms:{"class":0,The:[3,4],api:0,column:4,creat:4,design:3,dev:1,develop:1,document:2,essenti:4,flat:4,flatcolumntransform:0,ibm:4,init:[],initi:[],instal:[1,4],instanc:4,json:4,jsontransform:0,mode:1,nlu:4,pacakg:1,packag:4,parser:[],refer:0,result:[],servic:4,speech:[],stt:4,text:[],tranfrom:2,transform:[3,4],tutori:4,util:4,watson:[2,3,4],watsonservicetransform:0,welcom:2,what:3}}) -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ************** 3 | 4 | This page gives the API reference of Watson Transformer Package. 5 | 6 | WatsonServiceTransformer Class 7 | =============================== 8 | 9 | **WatsonServiceTransformer** (*inputCol=None, outputCol=None, vectorization=False, max_workers=5, service=None*) 10 | 11 | **Base**: 12 | - ``pySpark.ml.pipeline.Transformer`` 13 | - ``pyspark.ml.param.shared.HasInputCol`` 14 | - ``pyspark.ml.param.shared.HasOutputCol`` 15 | - ``pyspark.ml.util.DefaultParamsReadable`` 16 | - ``pyspark.ml.util.DefaultParamsWritable`` 17 | 18 | **Parameters**: 19 | - **inputCol**: The column name use as input data. ``required`` 20 | - **outputCol**: The column name use to output the transformed data. ``required`` 21 | - **vectorization**: Exploiting pyArrow in-memory dataframe. enable vectorization whenever is possible is recommend. The default value is ``False``. 22 | - **max_workers**: When vectorization is enabled, the maximum number of threads can be utilized to boost the performance. The default value is ``5``. 23 | - **service**: The API service instance that wrapped by the Watson Transformer. ``required`` 24 | 25 | **Return**: 26 | ``WatsonServiceTransformer`` class instance 27 | 28 | **Return Type**: 29 | ``pySpark.ml.pipeline.Transformer`` 30 | 31 | ----------------------------- 32 | 33 | **- transform** (*dataframe*) 34 | 35 | **Parameters**: 36 | - **dataframe**: the pySpark dataframe recieve transformation 37 | 38 | **Return**: 39 | pySpark dataframe contains transformation result 40 | 41 | **Return Type**: 42 | ``pyspark.sql.DataFrame`` 43 | 44 | 45 | .. note:: 46 | 47 | The ``WatsonServiceTransformer`` is a custom implementation of pySpark transformer. The functions which are inherited from the 48 | pySpark transformer base classes have been implemented thus avaiable for use. 49 | 50 | 51 | FlatColumnTransformer Class 52 | ============================ 53 | 54 | **FlatColumnTransformer** (*inputCol=None*) 55 | 56 | **Base**: 57 | - ``pySpark.ml.pipeline.Transformer`` 58 | - ``pyspark.ml.param.shared.HasInputCol`` 59 | - ``pyspark.ml.util.DefaultParamsReadable`` 60 | - ``pyspark.ml.util.DefaultParamsWritable`` 61 | 62 | **Parameters**: 63 | - **inputCol**: The column name use as input data. ``required`` 64 | 65 | **Return**: 66 | ``FlatColumnTransformer`` class instance 67 | 68 | **Return Type**: 69 | ``pySpark.ml.pipeline.Transformer`` 70 | 71 | ----------------------------- 72 | 73 | **- transform** (*dataframe*) 74 | 75 | **Parameters**: 76 | - **dataframe**: the pySpark dataframe recieve transformation 77 | 78 | **Return**: 79 | pySpark dataframe contains the flattened data from input column 80 | 81 | **Return Type**: 82 | ``pyspark.sql.DataFrame`` 83 | 84 | 85 | .. note:: 86 | 87 | The ``FlatColumnTransformer`` is a custom implementation of pySpark transformer. The functions which are inherited from the 88 | pySpark transformer base classes have been implemented thus avaiable for use. This transformer will 89 | flatten the nested input column to multiple regular data columns. 90 | 91 | 92 | JSONTransformer Class 93 | ===================== 94 | 95 | **JSONTransformer** (*inputCol=None, outputCol=None, removeInputCol=False, parser=None*) 96 | 97 | **Base**: 98 | - ``pySpark.ml.pipeline.Transformer`` 99 | - ``pyspark.ml.param.shared.HasInputCol`` 100 | - ``pyspark.ml.param.shared.HasOutputCol`` 101 | - ``pyspark.ml.util.DefaultParamsReadable`` 102 | - ``pyspark.ml.util.DefaultParamsWritable`` 103 | 104 | **Parameters**: 105 | - **inputCol**: The column name use as input data. ``required`` 106 | - **outputCol**: The column name use to output the transformed data. ``required`` 107 | - **removeInputCol**: Whether or not remove the input column from output dataframe. The default value is ``False``. 108 | - **parser**: The object parse JSON data to data column(s). ``required`` 109 | 110 | **Return**: 111 | ``JSONTransformer`` class instance 112 | 113 | **Return Type**: 114 | ``pySpark.ml.pipeline.Transformer`` 115 | 116 | ----------------------------- 117 | 118 | **- transform** (*dataframe*) 119 | 120 | **Parameters**: 121 | - **dataframe**: the pySpark dataframe recieve transformation 122 | 123 | **Return**: 124 | pySpark dataframe contains transformation result 125 | 126 | **Return Type**: 127 | ``pyspark.sql.DataFrame`` 128 | 129 | 130 | .. note:: 131 | 132 | The ``WatsonServiceTransformer`` is a custom implementation of pySpark transformer. The functions which are inherited from the 133 | pySpark transformer base classes have been implemented thus avaiable for use. 134 | 135 | 136 | 137 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'watson tranfromer' 21 | copyright = '2020, Kai Niu' 22 | author = 'Kai Niu' 23 | 24 | import sphinx_rtd_theme 25 | 26 | 27 | # -- General configuration --------------------------------------------------- 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = ["sphinx_rtd_theme", 33 | ] 34 | 35 | # Add any paths that contain templates here, relative to this directory. 36 | templates_path = ['_templates'] 37 | 38 | # List of patterns, relative to source directory, that match files and 39 | # directories to ignore when looking for source files. 40 | # This pattern also affects html_static_path and html_extra_path. 41 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 42 | 43 | 44 | # -- Options for HTML output ------------------------------------------------- 45 | 46 | # The theme to use for HTML and HTML Help pages. See the documentation for 47 | # a list of builtin themes. 48 | # 49 | html_theme = "sphinx_rtd_theme" 50 | html_logo = 'misc/watson_transformer_logo.svg' 51 | html_theme_options = { 52 | 'canonical_url': '', 53 | 'logo_only': True, 54 | 'display_version': True, 55 | 'prev_next_buttons_location': 'bottom', 56 | 'style_external_links': False, 57 | 'vcs_pageview_mode': '', 58 | 'style_nav_header_background': '#2980b9', 59 | # Toc options 60 | 'collapse_navigation': True, 61 | 'sticky_navigation': True, 62 | 'navigation_depth': 4, 63 | 'includehidden': True, 64 | 'titles_only': False 65 | } 66 | 67 | # Add any paths that contain custom static files (such as style sheets) here, 68 | # relative to this directory. They are copied after the builtin static files, 69 | # so a file named "default.css" will overwrite the builtin "default.css". 70 | html_static_path = ['_static'] 71 | 72 | # master document serve as landing page 73 | master_doc = 'index' -------------------------------------------------------------------------------- /docs/development.rst: -------------------------------------------------------------------------------- 1 | Development 2 | ============= 3 | 4 | To setup development envrioment and extend support Watson Transformer API to other API .... 5 | 6 | 7 | Install Pacakge in Dev Mode 8 | --------------------------- 9 | 10 | :: 11 | 12 | pip install watson-transfromer -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. watson tranfromer documentation master file, created by 2 | sphinx-quickstart on Mon Jul 27 22:03:12 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to watson tranfromer's documentation! 7 | ============================================= 8 | 9 | .. toctree:: 10 | :maxdepth: 3 11 | :caption: Contents: 12 | 13 | intro 14 | tutorial 15 | api 16 | development 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /docs/intro.rst: -------------------------------------------------------------------------------- 1 | What is Watson Transformer? 2 | =========================== 3 | Watson Transformer solves the problem of consuming 4 | IBM Watson API services(STT, NLU, etc.) at scale by 5 | wrapping the service calls into the Spark transformer. 6 | In this way, The IBM services such as STT and NLU can 7 | build into the Spark ML pipeline, along with 8 | other transformers and estimators to tackle the big 9 | data challenge. 10 | 11 | The Design 12 | ---------- 13 | As the UML chart illustrates, The Watson Transformer 14 | Class service as a thin wrapper around the IBM Waston 15 | API class. For extensibility purposes, the logic of 16 | consuming API service is defined in the Watson Service 17 | Class, which is an executable class. It enables any 18 | applicable API service to be wrapped into the transformer. 19 | On the other hand, the transformer handles mapping input 20 | data to API calls and parse the service response to 21 | data fields. -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/misc/Watson_Tranformer_Design.drawio: -------------------------------------------------------------------------------- 1 | 7Vttb+I4EP41SHsr7SmJCS8fC23vVuqqVbvS7n1Chhhw69iRYwrsr7+x4xBCEpqlvFSrVEiNJ3YSP888w3gcWmgYrv6ROJp/EwFhLc8JVi103fI81+114J+2rBNLt91NDDNJA9spMzzRX8QaHWtd0IDEuY5KCKZolDdOBOdkonI2LKVY5rtNBcvfNcIzUjA8TTArWn/QQM0Ta893Mvu/hM7m6Z1dx54JcdrZGuI5DsRyy4RuWmgohVDJUbgaEqbBS3FJxt1WnN08mCRc1RngvMrn+6h/j8ad4eTOG97Gz+pL208u84rZws7YPq1apxDMVcjgyG2hQaykeCFDwYQ055Bj/uCMvQyRiqwqH9DdTBv8hYiQKLmGLnZAp2eRsq7Ssc1lhrvbtbb5NubIGrHlera5dAYHHFhEfgcd9HHR8VAJPO2zwoN6BTRIAOqxTSHVXMwEx+wmsw6kWPCA6MtqYLI+d0JEFsdnotTahgK8UAJMWyiTFVU/t47/05f627et65W9smms0waH+f40HR2nnRqSkY6/MWSDTSs3+oFICrARaY3J1PV89xMK8IiFnJA9OKYRQmE5I2pfx265i0jCsKKv+Sc5vhrcohqGXusKoCaThcJj4NdzxPhZx2E4MQCMOgzmMxhLOJopg5q1pIZHohaSw7jv60gPv8UTJfTDJv3gQcfZ2EtJr5+XXrdEeX6J8vyTxSXvJEzEkeCxHvmAZUzkh+bAa5eQ4PrnJKFdIOHq4SsYtoA05g4OIbINLH7ONVZYOzolLIgLeAJCKh/v8rBywXUUnVLGdkyY0RmH5gQQ1VFqoPGmkMlc2RMhDQITgpdzqshThE1QWkLeVgjLR/m+6ucIQ06vSJh3VtV0CoR95bHCXFGsiCEuH4vuEwH9uRTtaMpFJRSdNbC5xXT0B1axMKRIzOOpkKGOTM6Q4bionnhJQ4YT+AVXaSbi1sFe6fxjMJlTFtzhtVjouYJ3TF7S1mAuJP0Fl8UZ71gqm6h4nVyPJz3SUiVJDH0eUl7cHdM3vMp1vMOxsoaJYAxHMR1vphFCjkD5QCglwuN5AnrzC84rTS07exzB3uwRJIT5zHwj2bt5O6ms65bE8l7J/bxO/naYAZUcxDvQ0ogL7reZ6Ds8shg0Wp4O45RHCwX6bqEr44OS8llVqPjd+MDIVFV6aAxRAe51Z/pctzPLowVKmwSMnTKz3JxDTCHcxA+Fk9wg8ZtIUK4Mkv4APoD3UOfRPjz4ENpu1oaP7i5hvhAwJabGqwj46ZLEqq4L7lF80THXecbf8sRdzzheROqW8L/DMqOGvYTltErgHkTx5gvAcvrdLIq+uAXeUZF3VMIxw2PCHkRMFRX6+jLpu8P9xej1vXr09k7Fbq9C3RDwy+QNXobcLOY2Sj+eK5RUe86r9FTYjdJPQW+vZiA/ldLTlUZB6bDQfaU6zdY63ySbT9ZalWg2cn+nP/Tbl5Z7WWLXyP1I9G5W9hfTe9nGhta7MstIKcJPAQA6lTgkfyXi37QbtR/fHYqVhVJ3QN6J/KHdL7D64fcqWrl9Cr+1f5cimd9RNiSQDc5vbkgkG2SX2pBIy1ANp3U49epyWiHl83DqFdfbtZOypvp3YPXPP6z8B9ZqL/gzyn9eVYFAQSbAm9rfu5KGqu3sj1L78/rNEuF09F669oeqKgKEBwaiRt2npP/i9T5U8jpLo+5j0Xvpel+a7hbULc0m/0jpLX4jcLF3l78R+KEecPEKH6rx9mYj8EPpvXiFDxXfv0oVnrx/NYqS19galZ/UDS5d2UNVld7Pn2FyOIwTB3h5NbOlsuZmzvb7i40n1PGEuhnd6Tyh6l2d0QioYaPRpyQK2Hr/dibQuMRJXKJfc413gEtAM/vtTFLuyX6BhG7+Bw== -------------------------------------------------------------------------------- /docs/misc/Watson_Tranformer_Design.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/misc/Watson_Tranformer_Design.jpg -------------------------------------------------------------------------------- /docs/misc/Watson_Tranformer_Design.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 |
«executable object»
Return Type Factory
«executable object»...
«executable object»
Response Parser
«executable object»...
API Response => Data Fields
API Response => Data...
Instantiate Return Type Object
Instantiate Return Type O...
Watson Transformer Class+ inputCol: string+ outputCol: string+ service: Watson Service Class+ transfrom(dataframe): dataframeWatson Service Class+ token: string+ endpoint: string+ return_type: object+ response_parser: object+ **params: kv pairs+ __call__(object): return_type
Viewer does not support full SVG 1.1
-------------------------------------------------------------------------------- /docs/misc/pipleline_benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/misc/pipleline_benchmark.png -------------------------------------------------------------------------------- /docs/misc/regular_udf_vs_vectorized_udf_.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/misc/regular_udf_vs_vectorized_udf_.png -------------------------------------------------------------------------------- /docs/misc/watson_transformer_perf_full_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/misc/watson_transformer_perf_full_pipeline.png -------------------------------------------------------------------------------- /docs/misc/watson_transformer_stt_perf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/docs/misc/watson_transformer_stt_perf.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ibm-watson ~= 4.4.0 2 | botocore ~= 1.16.11 3 | ibm-cos-sdk ~= 2.7.0 4 | ibm-cos-sdk-core ~= 2.7.0 5 | ibm-cos-sdk-s3transfer ~= 2.7.0 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | " 4 | " publish package to pypl index 5 | " docu: https://realpython.com/pypi-publish-python-package/ 6 | " docu: https://packaging.python.org/tutorials/packaging-projects/ 7 | " 8 | """ 9 | from setuptools import setup, Command, find_packages 10 | import os 11 | 12 | with open("README.md", "r") as fh: 13 | long_description = fh.read() 14 | 15 | """ 16 | " 17 | " clean up build files. e.g. python setup.py clean 18 | " docu: https://stackoverflow.com/questions/3779915/why-does-python-setup-py-sdist-create-unwanted-project-egg-info-in-project-r 19 | " 20 | """ 21 | class CleanCommand(Command): 22 | """Custom clean command to tidy up the project root.""" 23 | user_options = [] 24 | def initialize_options(self): 25 | pass 26 | def finalize_options(self): 27 | pass 28 | def run(self): 29 | os.system('rm -vrf ./build ./dist ./src/*.egg-info ./*.pyc ./*.tgz ./*.egg-info') 30 | 31 | setup( 32 | name='watson-transformer', 33 | version='0.0.17', 34 | license='BSD 2-Clause License', 35 | author='Kai Niu', 36 | author_email='kai.niu@ibm.com', 37 | description='wrap Watson API into pyspark transformers', 38 | long_description='wrap Watson API into pyspark transformers', 39 | long_description_content_type="text/markdown", 40 | url=" ", 41 | packages= find_packages(where='./src'), 42 | package_dir={ 43 | '': 'src', 44 | }, 45 | keywords=[ 46 | 'pyspark', 'data science', 'pipeline' 47 | ], 48 | zip_safe=True, 49 | classifiers=[ 50 | 'Programming Language :: Python :: 3', 51 | 'License :: OSI Approved :: MIT License', 52 | 'Operating System :: OS Independent', 53 | 'Programming Language :: Python', 54 | 'Programming Language :: Python :: 3', 55 | 'Programming Language :: Python :: 3.4', 56 | 'Programming Language :: Python :: 3.5', 57 | 'Programming Language :: Python :: 3.6', 58 | 'Programming Language :: Python :: 3.7', 59 | 'Programming Language :: Python :: Implementation :: CPython', 60 | 'Programming Language :: Python :: Implementation :: PyPy', 61 | 'Topic :: Utilities' 62 | ], 63 | python_requires='>=3.4', 64 | install_requires=[ 65 | # eg: 'aspectlib==1.1.1', 'six>=1.7', 66 | 'ibm-watson == 5.2.0' 67 | ], 68 | extras_require={ 69 | 'dev' : [''], 70 | 'test' : ['pytest', 'pytest-cov','mock'] 71 | }, 72 | cmdclass={ 73 | 'clean': CleanCommand, 74 | } 75 | ) 76 | 77 | 78 | """" 79 | To build package: 80 | 1. move to project root directory 81 | 2. python3 setup.py sdist bdist_wheel 82 | 3. check dist/ folder 83 | 4. python3 -m pip install --user --upgrade twine (optional) 84 | 5. python3 -m twine upload dist/* 85 | 6. python3 -m setup.py clean 86 | """ 87 | -------------------------------------------------------------------------------- /src/watson_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | from watson_transformer.watson_service_transformer import WatsonServiceTransformer 2 | from watson_transformer.flat_column_transformer import FlatColumnTransformer 3 | from watson_transformer.json_transformer import JSONTransformer -------------------------------------------------------------------------------- /src/watson_transformer/contrib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kai-niu/watson-transformer/e24316e4d01cb6caba16f2d7fadd4ef11edbeca6/src/watson_transformer/contrib/__init__.py -------------------------------------------------------------------------------- /src/watson_transformer/contrib/nlu/__init__.py: -------------------------------------------------------------------------------- 1 | from watson_transformer.contrib.nlu.default_nlu_parser import DefaultNLUParser -------------------------------------------------------------------------------- /src/watson_transformer/contrib/nlu/default_nlu_parser.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numbers 3 | from watson_transformer.contrib.response_base import ResponseBase 4 | from pyspark.sql.types import StringType, FloatType, StructType, StructField, Row 5 | from pyspark.sql import functions as F 6 | 7 | 8 | """ 9 | " 10 | " default STT output interpreter which assume one alternative transcript and no speaker detection 11 | " 12 | """ 13 | 14 | class DefaultNLUParser(ResponseBase): 15 | 16 | def __init__(self, keywords_limit, concepts_limit): 17 | """ 18 | @param::keywords_limit: the max number of keywords extracted 19 | @param::concepts_limit: the max number of concepts extracted 20 | @return: none 21 | """ 22 | super(DefaultNLUParser, self).__init__() 23 | if not isinstance(keywords_limit, numbers.Number): 24 | raise ValueError('> DefaultNLUParser: keywords_limit must be numeric.') 25 | if not isinstance(concepts_limit, numbers.Number): 26 | raise ValueError('> DefaultNLUParser: concepts_limit must be numeric.') 27 | if keywords_limit <= 0: 28 | raise ValueError('> DefaultNLUParser: keywords_limit must be greater than 0.') 29 | if concepts_limit <= 0: 30 | raise ValueError('> DefaultNLUParser: concepts_limit must be greater than 0.') 31 | self.keywords_limit = keywords_limit 32 | self.concepts_limit = concepts_limit 33 | 34 | """ 35 | " 36 | " default NLU output formatter which parse keywords, concepts, sentiment and emotion 37 | " 38 | """ 39 | def __call__(self, json_dumps): 40 | """ 41 | @param::output: the output json object from STT 42 | @return:the transcript join by period in string format 43 | """ 44 | data = {} 45 | valid_json_dumps = True 46 | try: 47 | json_data = json.loads(json_dumps) 48 | except: 49 | valid_json_dumps = False 50 | 51 | if valid_json_dumps and json_data: 52 | # extract keyword data 53 | if "keywords" in json_data: 54 | for i in range(self.keywords_limit): 55 | if i < len(json_data["keywords"]): 56 | kw = json_data["keywords"][i] 57 | data['keyword_%d'%(i)] = kw['text'] 58 | data['keyword_%d_score'%(i)] = kw['relevance'] 59 | else: # when less keywords extracted than limit 60 | data['keyword_%d'%(i)] = None 61 | data['keyword_%d_score'%(i)] = None 62 | else: 63 | for i in range(self.keywords_limit): 64 | data['keyword_%d'%(i)] = None 65 | data['keyword_%d_score'%(i)] = None 66 | 67 | # extract concept 68 | if "concepts" in json_data: 69 | for i in range(self.concepts_limit): 70 | if i < len(json_data["concepts"]): 71 | concept = json_data["concepts"][i] 72 | data['concept_%d'%(i)] = concept['text'] 73 | data['concept_%d_score'%(i)] = concept['relevance'] 74 | else: # when less concept extracted than limit 75 | data['concept_%d'%(i)] = None 76 | data['concept_%d_score'%(i)] = None 77 | else: 78 | for i in range(self.concepts_limit): 79 | data['concept_%d'%(i)] = None 80 | data['concept_%d_score'%(i)] = None 81 | 82 | # extract sentiment 83 | if "sentiment" in json_data: 84 | data["sentiment_score"] = json_data["sentiment"]["document"]["score"] 85 | data["sentiment_label"] = json_data["sentiment"]["document"]["label"] 86 | else: 87 | data["sentiment_score"] = None 88 | data["sentiment_label"] = None 89 | 90 | # extract "emotion" 91 | if "emotion" in json_data: 92 | data["sadness_score"] = json_data["emotion"]["document"]["emotion"]["sadness"] 93 | data["joy_score"] = json_data["emotion"]["document"]["emotion"]["joy"] 94 | data["fear_score"] = json_data["emotion"]["document"]["emotion"]["fear"] 95 | data["disgust_score"] = json_data["emotion"]["document"]["emotion"]["disgust"] 96 | data["anger_score"] = json_data["emotion"]["document"]["emotion"]["anger"] 97 | else: 98 | data["sadness_score"] = None 99 | data["joy_score"] = None 100 | data["fear_score"] = None 101 | data["disgust_score"] = None 102 | data["anger_score"] = None 103 | 104 | # case 1: json dumps is invalid 105 | # case 2: json dumps is valid but json data is invalid for the parser 106 | if not data: 107 | # extract keyword data 108 | for i in range(self.keywords_limit): 109 | data['keyword_%d'%(i)] = None 110 | data['keyword_%d_score'%(i)] = None 111 | # extract concept 112 | for i in range(self.concepts_limit): 113 | data['concept_%d'%(i)] = None 114 | data['concept_%d_score'%(i)] = None 115 | # extract sentiment 116 | data["sentiment_score"] = None 117 | data["sentiment_label"] = None 118 | # extract "emotion" 119 | data["sadness_score"] = None 120 | data["joy_score"] = None 121 | data["fear_score"] = None 122 | data["disgust_score"] = None 123 | data["anger_score"] = None 124 | 125 | return Row(**data) 126 | 127 | """ 128 | " 129 | " return the default NLU return type 130 | " 131 | """ 132 | def get_return_type(self): 133 | """ 134 | @param::num_keywords: the number of keywords extracted by NLU 135 | @param::num_concpets: the number of concepts extracted by NLU 136 | @return: the defined return type and Pandas UDF data type 137 | """ 138 | fields = [] 139 | # populate keyword fields 140 | for i in range(self.keywords_limit): 141 | fields.append(StructField("keyword_%d"%(i), StringType(), True)) 142 | fields.append(StructField("keyword_%d_score"%(i), FloatType(), True)) 143 | 144 | # populate concpet fields 145 | for i in range(self.concepts_limit): 146 | fields.append(StructField("concept_%d"%(i), StringType(), True)) 147 | fields.append(StructField("concept_%d_score"%(i), FloatType(), True)) 148 | 149 | # populate other fields 150 | fields.extend([StructField("sentiment_score", FloatType(), True), 151 | StructField("sentiment_label", StringType(), True), 152 | StructField("sadness_score", FloatType(), True), 153 | StructField("joy_score", FloatType(), True), 154 | StructField("fear_score", FloatType(), True), 155 | StructField("disgust_score", FloatType(), True), 156 | StructField("anger_score", FloatType(), True)]) 157 | 158 | return StructType(fields) 159 | -------------------------------------------------------------------------------- /src/watson_transformer/contrib/readers.py: -------------------------------------------------------------------------------- 1 | import types 2 | from botocore.client import Config 3 | import ibm_boto3 4 | def __iter__(self): return 0 5 | 6 | """ 7 | " 8 | " IBM COS reader 9 | " boto3 is not thread safe(https://github.com/boto/botocore/issues/1246) 10 | " create new session in each thread(ref: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html) 11 | " 12 | """ 13 | def ibm_cos_reader(audio_file, bucket, token, endpoint): 14 | """ 15 | @param::audio_file: the audio file uid 16 | @param::bukcet: the bucket name in which the audio file is stored 17 | @param::token: the API access token for IBM COS service 18 | @param::endpoint: the URL to access IBM COS service 19 | @return: the audio stream 20 | """ 21 | session = ibm_boto3.session.Session() 22 | cos_client = session.client(service_name='s3', 23 | ibm_api_key_id=token, 24 | ibm_auth_endpoint="https://iam.ng.bluemix.net/oidc/token", 25 | config=Config(signature_version='oauth'), 26 | endpoint_url=endpoint) 27 | audio_stream = cos_client.get_object(Bucket=bucket, Key=audio_file)['Body'] 28 | if not hasattr(audio_stream, "__iter__"): audio_stream.__iter__ = types.MethodType( __iter__, audio_stream) 29 | return audio_stream 30 | 31 | 32 | """ 33 | " 34 | " Local file reader 35 | " 36 | """ 37 | def local_fs_reader(audio_file): 38 | """ 39 | @param::audio_file: the full path including filename to the audio file 40 | @return: the filestream of audio file 41 | """ 42 | return open(audio_file, "rb") -------------------------------------------------------------------------------- /src/watson_transformer/contrib/response_base.py: -------------------------------------------------------------------------------- 1 | """ 2 | " 3 | " response base class 4 | " 5 | """ 6 | class ResponseBase(): 7 | 8 | def __call__(self, json_data): 9 | raise NotImplementedError('> ResponseBase class: __call__ method is not implemented.') 10 | 11 | def get_return_type(self): 12 | raise NotImplementedError('> ResponseBase class: get_return_type method is not implemented.') 13 | -------------------------------------------------------------------------------- /src/watson_transformer/contrib/stt/__init__.py: -------------------------------------------------------------------------------- 1 | from watson_transformer.contrib.stt.default_stt_parser import DefaultSTTParser -------------------------------------------------------------------------------- /src/watson_transformer/contrib/stt/default_stt_parser.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | from watson_transformer.contrib.response_base import ResponseBase 4 | from pyspark.sql.types import StringType, FloatType, StructType, StructField, Row 5 | from pyspark.sql import functions as F 6 | 7 | """ 8 | " 9 | " default STT output interpreter which assume one alternative transcript and no speaker detection 10 | " 11 | """ 12 | 13 | class DefaultSTTParser(ResponseBase): 14 | 15 | def __init__(self): 16 | super(DefaultSTTParser, self).__init__() 17 | 18 | 19 | """ 20 | " 21 | " pass the json response to datatype 22 | " 23 | """ 24 | def __call__(self, json_response): 25 | """ 26 | @param::output: the output json object from STT 27 | @return:the transcript join by period in string format 28 | """ 29 | if json_response: 30 | try: 31 | response = json.loads(json_response) 32 | if 'results'in response: 33 | transcripts = [doc['alternatives'][0]['transcript'].strip() for doc in response['results']] 34 | return '. '.join(transcripts) + '.' 35 | else: 36 | # if result attribute is not present in the response 37 | # it is probably the speech recording has more than 30s in slient from the begining 38 | return None 39 | except: 40 | return None 41 | else: 42 | return None 43 | 44 | """ 45 | " 46 | " return the default STT return type 47 | " 48 | """ 49 | def get_return_type(self): 50 | """ 51 | @return: the defined return type 52 | """ 53 | return StringType() -------------------------------------------------------------------------------- /src/watson_transformer/flat_column_transformer.py: -------------------------------------------------------------------------------- 1 | """ 2 | " 3 | " explode the struct column to multiple simple columns 4 | " @ref: https://stackoverflow.com/questions/47669895/how-to-add-multiple-columns-using-udf?rq=1 5 | " 6 | """ 7 | 8 | from pyspark import keyword_only 9 | from pyspark.sql import functions as F 10 | from pyspark.sql.types import StringType, FloatType, StructType, StructField, Row 11 | from pyspark.sql import DataFrame 12 | from pyspark.ml.pipeline import Transformer 13 | from pyspark.ml.param.shared import HasInputCol, HasOutputCol, Param 14 | from pyspark.ml.util import DefaultParamsReadable, DefaultParamsWritable 15 | 16 | 17 | 18 | class FlatColumnTransformer(Transformer, 19 | HasInputCol, 20 | DefaultParamsReadable, 21 | DefaultParamsWritable): 22 | 23 | """ 24 | " 25 | " set init transformer and set parameters 26 | " 27 | """ 28 | @keyword_only 29 | def __init__(self, 30 | inputCol=None): 31 | """ 32 | @param::inputCol: the input column name contains sound file name 33 | @return: none 34 | """ 35 | super(FlatColumnTransformer, self).__init__() 36 | kwargs = self._input_kwargs 37 | self._set(**kwargs) 38 | 39 | """ 40 | " 41 | " set parameters, called from self._set(), inherited method. 42 | " 43 | """ 44 | @keyword_only 45 | def setParams(self, inputCol=None): 46 | """ 47 | @param::inputCol: the input column name contains sound file name 48 | @return: none 49 | """ 50 | kwargs = self._input_kwargs 51 | return self._set(**kwargs) 52 | 53 | """ 54 | " 55 | " set input columns name 56 | " 57 | """ 58 | def setInputCol(self, value): 59 | """ 60 | @param::value: the input columns name contains the sound file name 61 | @return: None 62 | """ 63 | return self._set(inputCol=value) 64 | 65 | 66 | """ 67 | " 68 | " perform the transform using provided IBM service api 69 | " 70 | """ 71 | def _transform(self, df:DataFrame) -> DataFrame: 72 | """ 73 | @param::df: the pyspark dataframe 74 | @return: the transformed dataframe 75 | """ 76 | cols = df.columns 77 | if self.getInputCol() in cols: 78 | cols.remove(self.getInputCol()) 79 | return df.withColumn('__explode_col_output__',F.explode(F.array(F.col(self.getInputCol())))) \ 80 | .select(*cols, "__explode_col_output__.*") 81 | else: 82 | raise ValueError("> FlatColumnTransformer class: inputCol is not in the dataframe.") -------------------------------------------------------------------------------- /src/watson_transformer/json_transformer.py: -------------------------------------------------------------------------------- 1 | """ 2 | " 3 | " pyspark transformer covert JSON data to data column(s) 4 | " @ref: https://stackoverflow.com/questions/41399399/serialize-a-custom-transformer-using-python-to-be-used-within-a-pyspark-ml-pipel/52467470#52467470 5 | " @ref: https://stackoverflow.com/questions/32331848/create-a-custom-transformer-in-pyspark-ml 6 | " 7 | """ 8 | import pandas as pd 9 | from concurrent.futures import ThreadPoolExecutor 10 | from pyspark import keyword_only 11 | from pyspark.sql import functions as F 12 | from pyspark.sql.types import StringType, FloatType, StructType, StructField, Row 13 | from pyspark.sql import DataFrame 14 | from pyspark.ml.pipeline import Transformer 15 | from pyspark.ml.param.shared import HasInputCol, HasOutputCol, Param 16 | from pyspark.ml.util import DefaultParamsReadable, DefaultParamsWritable 17 | 18 | 19 | class JSONTransformer(Transformer, 20 | HasInputCol, 21 | HasOutputCol, 22 | DefaultParamsReadable, 23 | DefaultParamsWritable): 24 | 25 | """ 26 | " 27 | " set init transformer and set parameters 28 | " 29 | """ 30 | @keyword_only 31 | def __init__(self, 32 | inputCol=None, 33 | outputCol=None, 34 | removeInputCol=False, 35 | parser=None): 36 | """ 37 | @param::inputCol: the input column name contains sound file name 38 | @param::outputCol: the output column name 39 | @param::removeInputCol: flag indicate whether remove input columns 40 | @param::parser: parser object parse JSON data to data column(s) 41 | @return: none 42 | """ 43 | 44 | super(JSONTransformer, self).__init__() 45 | self.parser = Param(self, "parser", None) 46 | self._setDefault(parser=None) 47 | self.removeInputCol = Param(self, "removeInputCol", False) 48 | self._setDefault(removeInputCol=False) 49 | kwargs = self._input_kwargs 50 | self._set(**kwargs) 51 | 52 | # make sure parameter: token, endpoint set properly. 53 | if not callable(parser): 54 | raise ValueError('> The parser instance provided must be callable object.') 55 | if not inputCol or not inputCol.strip(): 56 | raise ValueError('> The input column name is required.') 57 | if not outputCol or not outputCol.strip(): 58 | raise ValueError('> The output column name is required.') 59 | 60 | 61 | 62 | # make sure parameter: token, endpoint set properly. 63 | if parser == None: 64 | raise ValueError('> JSONTransformer Class: JSON data parser object must be provided.') 65 | 66 | """ 67 | " 68 | " set parameters, called from self._set(), inherited method. 69 | " 70 | """ 71 | @keyword_only 72 | def setParams(self): 73 | """ 74 | @param:: None 75 | @return: none 76 | """ 77 | kwargs = self._input_kwargs 78 | return self._set(**kwargs) 79 | 80 | """ 81 | " 82 | " set whether or not remove input column from dataframe 83 | " 84 | """ 85 | def setRemoveInputCol(self, value): 86 | """ 87 | @param::value: boolean value indcating enable vectorized udf 88 | @return: None 89 | """ 90 | return self._set(removeInputCol=value) 91 | 92 | """ 93 | " 94 | " get enable state of vectorized udf 95 | " 96 | """ 97 | def getRemoveInputCol(self): 98 | """ 99 | @param:: None 100 | @return: vectorization flag 101 | """ 102 | return self.getOrDefault(self.removeInputCol) 103 | 104 | """ 105 | " 106 | " set the parser object 107 | " 108 | """ 109 | def setParser(self, value): 110 | """ 111 | @param::value: the IBM service API object 112 | @return: None 113 | """ 114 | return self._set(parser=value) 115 | 116 | """ 117 | " 118 | " get the parser object 119 | " 120 | """ 121 | def getParser(self): 122 | """ 123 | @param:None 124 | @return: the configured service object 125 | """ 126 | return self.getOrDefault(self.parser) 127 | 128 | """ 129 | " 130 | " set input columns name 131 | " 132 | """ 133 | def setInputCol(self, value): 134 | """ 135 | @param::value: the input columns name contains the sound file name 136 | @return: None 137 | """ 138 | return self._set(inputCol=value) 139 | 140 | """ 141 | " 142 | " set output columns name 143 | " 144 | """ 145 | def setOutputCol(self, value): 146 | """ 147 | @param::value: the output column name contains the output result 148 | @return: None 149 | """ 150 | return self._set(outputCol=value) 151 | 152 | 153 | """ 154 | " 155 | " perform the transform 156 | " 157 | """ 158 | def _transform(self, df:DataFrame) -> DataFrame: 159 | """ 160 | @param::df: the pyspark dataframe 161 | @return: the transformed dataframe 162 | """ 163 | # get the new service instance 164 | parser = self.getParser() 165 | outputCol = self.getOutputCol() 166 | inputCol = self.getInputCol() 167 | removeInputCol = self.getRemoveInputCol() 168 | return_type = parser.get_return_type() 169 | # CPU bounded task, not going to benefit from vectorized UDF very much 170 | parser_udf = F.udf(parser, return_type).asNondeterministic() 171 | df = df.withColumn(outputCol, parser_udf(F.col(inputCol))) 172 | # drop input column based on flag 173 | if removeInputCol: 174 | df = df.drop(self.getInputCol()) 175 | return df -------------------------------------------------------------------------------- /src/watson_transformer/service/__init__.py: -------------------------------------------------------------------------------- 1 | from watson_transformer.service.nlu import NLU 2 | from watson_transformer.service.stt import STT -------------------------------------------------------------------------------- /src/watson_transformer/service/nlu.py: -------------------------------------------------------------------------------- 1 | """ 2 | " 3 | " IBM NLU Service Executable Class 4 | " docu: https://cloud.ibm.com/apidocs/natural-language-understanding?code=python#analyze-text 5 | " 6 | """ 7 | 8 | import json 9 | from pyspark import keyword_only 10 | from ibm_watson import NaturalLanguageUnderstandingV1 11 | from ibm_cloud_sdk_core.authenticators import IAMAuthenticator 12 | from ibm_cloud_sdk_core import ApiException 13 | from ibm_watson.natural_language_understanding_v1 import Features, KeywordsOptions, ConceptsOptions, SentimentOptions, EmotionOptions 14 | from pyspark.sql.types import StringType 15 | from watson_transformer.service.service_base import ServiceBase 16 | 17 | class NLU(ServiceBase): 18 | 19 | @keyword_only 20 | def __init__(self, token, endpoint, strict_mode=True, **params): 21 | """ 22 | @param::token: the IBM NLU API access token 23 | @param::endpoint: the endpoint url for the NLU API 24 | @param::params: the kv params passing to underlying NaturalLanguageUnderstandingV1 constructor 25 | @return: the output parsed by parser object 26 | """ 27 | super(NLU, self).__init__(strict_mode) 28 | self.token = token 29 | self.endpoint = endpoint 30 | self.params = params 31 | 32 | def __call__(self, text): 33 | """ 34 | @param::text: the text to perform NLU 35 | @return: the output formatted by formatter object 36 | """ 37 | 38 | if text: 39 | # init nlu client 40 | authenticator = IAMAuthenticator(self.token) 41 | nlu = NaturalLanguageUnderstandingV1( version='2019-07-12',authenticator=authenticator) 42 | nlu.set_service_url(self.endpoint) 43 | 44 | try: 45 | response = nlu.analyze(text = text, **self.params).get_result() 46 | except ApiException: 47 | response = None # better to log such execeptions separately 48 | except Exception: 49 | if self.strict_mode: 50 | raise RuntimeError("*** runtime error caused by input: '%s'"%(text)) 51 | else: 52 | response = None 53 | 54 | return json.dumps(response) if response else None 55 | else: 56 | return None 57 | 58 | def get_return_type(self): 59 | return StringType() 60 | 61 | def get_new_client(self): 62 | return NLU(token = self.token, 63 | endpoint = self.endpoint, 64 | strict_mode = self.strict_mode, 65 | **self.params) -------------------------------------------------------------------------------- /src/watson_transformer/service/service_base.py: -------------------------------------------------------------------------------- 1 | """ 2 | " 3 | " define the contract/interfact 4 | " 5 | """ 6 | 7 | class ServiceBase(): 8 | def __init__(self, strict_mode=True): 9 | self.strict_mode = strict_mode 10 | 11 | def __call__(self, data): 12 | raise NotImplementedError('> service class __call__ method is not implemented.') 13 | 14 | def get_return_type(self, data): 15 | raise NotImplementedError('> service class get_return_type method is not implemented.') 16 | 17 | def get_new_client(self): 18 | raise NotImplementedError('> service class get_new_client method is not implemented.') 19 | 20 | -------------------------------------------------------------------------------- /src/watson_transformer/service/stt.py: -------------------------------------------------------------------------------- 1 | """ 2 | " 3 | " IBM STT Service Executable Class 4 | " docu: https://cloud.ibm.com/apidocs/speech-to-text 5 | " 6 | """ 7 | 8 | import json 9 | from pyspark import keyword_only 10 | from ibm_watson import SpeechToTextV1 11 | from ibm_cloud_sdk_core.authenticators import IAMAuthenticator 12 | from ibm_cloud_sdk_core.api_exception import ApiException 13 | from pyspark.sql.types import StringType 14 | from watson_transformer.service.service_base import ServiceBase 15 | 16 | class STT(ServiceBase): 17 | 18 | @keyword_only 19 | def __init__(self, token, endpoint, reader, strict_mode=True, **params): 20 | """ 21 | @param::token: the IBM STT API access token 22 | @param::endpoint: the endpoint url for the STT API 23 | @param::reader: the object read audio stream using audio file name/id 24 | @param::params: the kv params passing to underlying SpeechToTextV1 constructor 25 | @return: the output formatted by formatter executable 26 | """ 27 | super(STT, self).__init__(strict_mode) 28 | self.token = token 29 | self.endpoint = endpoint 30 | self.reader = reader 31 | self.params = params 32 | 33 | def __call__(self, audio_file): 34 | """ 35 | @param::audio_file: the audio filename/id for reader to retrieve the audio stream 36 | @return: the output formatted by formatter object 37 | """ 38 | if audio_file: 39 | # load asset 40 | audio_stream = self.reader(audio_file) 41 | # check if audio stream is valid 42 | if not audio_stream: 43 | return None 44 | 45 | # init stt client 46 | authenticator = IAMAuthenticator(self.token) 47 | stt = SpeechToTextV1(authenticator=authenticator) 48 | stt.set_service_url(self.endpoint) 49 | 50 | # send the request 51 | try: 52 | response = stt.recognize(audio=audio_stream,**self.params).get_result() 53 | except ApiException as api_ex: 54 | response = {'api_error_message': str(api_ex)} # less likely recoverable if it is STT API error 55 | except Exception as ex: 56 | if self.strict_mode: 57 | raise RuntimeError("*** runtime error caused by input: '%s'"%(audio_file)) # maybe recoverable by retry 58 | else: 59 | response = {'error_message': str(ex)} 60 | return json.dumps(response) if response else None 61 | else: 62 | return None 63 | 64 | def get_return_type(self): 65 | """ 66 | @param::output_col: output column name 67 | @return: the output type struct 68 | """ 69 | return StringType() 70 | 71 | def get_new_client(self): 72 | return STT(token = self.token, 73 | endpoint = self.endpoint, 74 | reader = self.reader, 75 | strict_mode = self.strict_mode, 76 | **self.params) -------------------------------------------------------------------------------- /src/watson_transformer/watson_service_transformer.py: -------------------------------------------------------------------------------- 1 | """ 2 | " 3 | " pyspark transformer consume IBM Watson service 4 | " @ref: https://stackoverflow.com/questions/41399399/serialize-a-custom-transformer-using-python-to-be-used-within-a-pyspark-ml-pipel/52467470#52467470 5 | " @ref: https://stackoverflow.com/questions/32331848/create-a-custom-transformer-in-pyspark-ml 6 | " 7 | """ 8 | import os 9 | import numbers 10 | import pandas as pd 11 | from concurrent.futures import ThreadPoolExecutor 12 | from pyspark import keyword_only 13 | from pyspark.sql import functions as F 14 | from pyspark.sql.types import StringType 15 | from pyspark.sql import DataFrame 16 | from pyspark.ml.pipeline import Transformer 17 | from pyspark.ml.param.shared import HasInputCol, HasOutputCol, Param 18 | from pyspark.ml.util import DefaultParamsReadable, DefaultParamsWritable 19 | 20 | 21 | class WatsonServiceTransformer(Transformer, 22 | HasInputCol, 23 | HasOutputCol, 24 | DefaultParamsReadable, 25 | DefaultParamsWritable): 26 | 27 | """ 28 | " 29 | " set init transformer and set parameters 30 | " 31 | """ 32 | @keyword_only 33 | def __init__(self, 34 | inputCol=None, 35 | outputCol=None, 36 | vectorization=False, 37 | max_workers=5, 38 | service=None): 39 | """ 40 | @param::inputCol: the input column name contains sound file name 41 | @param::outputCol: the output column name 42 | @param::vectorizaton: flag indicate whether enable vectorization 43 | @param::max_workders: the max number of workers for each task 44 | @param::service: the IBM service object 45 | @return: none 46 | """ 47 | 48 | super(WatsonServiceTransformer, self).__init__() 49 | self.service = Param(self, "service", None) 50 | self._setDefault(service=None) 51 | self.vectorization = Param(self, "vectorization", False) 52 | self._setDefault(vectorization=False) 53 | self.max_workers = Param(self, "max_workers", 5) 54 | self._setDefault(max_workers=5) 55 | kwargs = self._input_kwargs 56 | self._set(**kwargs) 57 | 58 | # make sure parameter: token, endpoint set properly. 59 | if not callable(service): 60 | raise ValueError('> The service instance provided must be callable object.') 61 | if not isinstance(max_workers, numbers.Number) or max_workers <= 0: 62 | raise ValueError('> The number of maximum workers must greater than 0.') 63 | if not inputCol or not inputCol.strip(): 64 | raise ValueError('> The input column name is required.') 65 | if not outputCol or not outputCol.strip(): 66 | raise ValueError('> The output column name is required.') 67 | 68 | """ 69 | " 70 | " set parameters, called from self._set(), inherited method. 71 | " 72 | """ 73 | @keyword_only 74 | def setParams(self): 75 | """ 76 | @param:: None 77 | @return: none 78 | """ 79 | kwargs = self._input_kwargs 80 | return self._set(**kwargs) 81 | 82 | """ 83 | " 84 | " set whether or not enable vectorized udf 85 | " 86 | """ 87 | def setVectorization(self, value): 88 | """ 89 | @param::value: boolean value indcating enable vectorized udf 90 | @return: None 91 | """ 92 | return self._set(vectorization=value) 93 | 94 | """ 95 | " 96 | " get enable state of vectorized udf 97 | " 98 | """ 99 | def getVectorization(self): 100 | """ 101 | @param::None 102 | @return: vectorization flag 103 | """ 104 | return self.getOrDefault(self.vectorization) 105 | 106 | """ 107 | " 108 | " set the maximum numbers of workder in each task 109 | " 110 | """ 111 | def setMax_workers(self, value): 112 | """ 113 | @param::value: unsigned int indicate the max number of workers 114 | @return: None 115 | """ 116 | return self._set(max_workers=value) 117 | 118 | """ 119 | " 120 | " get the max number of workers in each task 121 | " 122 | """ 123 | def getMax_workers(self): 124 | """ 125 | @param::None 126 | @return: the configured max workers 127 | """ 128 | return self.getOrDefault(self.max_workers) 129 | 130 | """ 131 | " 132 | " set the API service object 133 | " 134 | """ 135 | def setService(self, value): 136 | """ 137 | @param::value: the IBM service API object 138 | @return: None 139 | """ 140 | return self._set(service=value) 141 | 142 | """ 143 | " 144 | " get the API service object 145 | " 146 | """ 147 | def getService(self): 148 | """ 149 | @param:None 150 | @return: the configured service object 151 | """ 152 | return self.getOrDefault(self.service) 153 | 154 | """ 155 | " 156 | " set input columns name 157 | " 158 | """ 159 | def setInputCol(self, value): 160 | """ 161 | @param::value: the input columns name contains the sound file name 162 | @return: None 163 | """ 164 | return self._set(inputCol=value) 165 | 166 | """ 167 | " 168 | " set output columns name 169 | " 170 | """ 171 | def setOutputCol(self, value): 172 | """ 173 | @param::value: the output column name contains the output result 174 | @return: None 175 | """ 176 | return self._set(outputCol=value) 177 | 178 | 179 | """ 180 | " 181 | " perform the transform using provided IBM service api 182 | " 183 | " exploit arrow and vectorized udf 184 | " ref: https://spark.apache.org/docs/latest/sql-pyspark-pandas-with-arrow.html 185 | " 186 | " to prevent udf from being called multiple times, use asNondeterministic() 187 | " issue: https://github.com/apache/spark/pull/19929/files/cc309b0ce2496365afd8c602c282e3d84aeed940 188 | " ref:https://stackoverflow.com/questions/58696198/spark-udf-executed-many-times 189 | " 190 | """ 191 | def _transform(self, df:DataFrame) -> DataFrame: 192 | """ 193 | @param::df: the pyspark dataframe 194 | @return: the transformed dataframe 195 | """ 196 | # get the new service instance 197 | service = self.getService() 198 | enable_vectorization = self.getVectorization() 199 | max_workers = max(self.getMax_workers(), 1) 200 | return_type = service.get_return_type() 201 | 202 | # define the (Vectorized) UDF 203 | if enable_vectorization: 204 | # vectorized udf 205 | @F.pandas_udf(return_type, F.PandasUDFType.SCALAR) 206 | def vectorized_udf(input_data): 207 | os.environ['ARROW_PRE_0_15_IPC_FORMAT']='1' 208 | results = [] 209 | with ThreadPoolExecutor(max_workers=max_workers) as executor: 210 | results = executor.map(lambda data:service.get_new_client()(data), input_data) 211 | return pd.Series(results) 212 | else: 213 | # regular udf 214 | default_udf = F.udf(lambda data:service.get_new_client()(data), return_type) 215 | udf = vectorized_udf if enable_vectorization else default_udf 216 | udf = udf.asNondeterministic() # prevent udf from being called mutliple times 217 | df = df.withColumn(self.getOutputCol(), udf(F.col(self.getInputCol()))) 218 | return df 219 | -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- 1 | https://docs.pytest.org/en/latest/goodpractices.html -------------------------------------------------------------------------------- /test/contrib/stt/test_default_stt_parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | " 3 | " test default STT response type 4 | " 5 | """ 6 | import json 7 | import pytest 8 | from unittest import mock 9 | from watson_transformer.contrib.stt.default_stt_parser import DefaultSTTParser 10 | 11 | @pytest.fixture(scope='function') 12 | def mock_input(request): 13 | response = { 14 | 'results':[ 15 | { 16 | 'alternatives':[ 17 | {'transcript':'foo'}, 18 | {'transcript':'bar'} 19 | ] 20 | }, 21 | { 22 | 'alternatives':[ 23 | {'transcript':'joe'}, 24 | {'transcript':'joy'} 25 | ] 26 | } 27 | ] 28 | } 29 | return json.dumps(response) 30 | 31 | class TestDefaultSTTParser(): 32 | 33 | def test_invalid_input(self): 34 | # arrange 35 | parser = DefaultSTTParser() 36 | invalide_json = json.dumps({'foo':'bar'}) 37 | for value in [None, invalide_json, 1, 'null', []]: 38 | # act 39 | data = parser(value) 40 | # assert 41 | assert data == None 42 | 43 | def test_valid_input(self, mock_input): 44 | # arrange 45 | parser = DefaultSTTParser() 46 | input_data = mock_input 47 | # act 48 | data = parser(input_data) 49 | # assert 50 | assert data == 'foo. joe.' 51 | assert 'bar' not in data 52 | assert 'joy' not in data 53 | 54 | -------------------------------------------------------------------------------- /test/contrib/test_response_base.py: -------------------------------------------------------------------------------- 1 | """ 2 | " 3 | " test response base class 4 | " 5 | """ 6 | import pytest 7 | from unittest import mock 8 | from watson_transformer.contrib.response_base import ResponseBase 9 | 10 | class TestResponseBase(): 11 | 12 | def test_callable_not_implemented(self): 13 | # arrange 14 | res = ResponseBase() 15 | # act 16 | with pytest.raises(NotImplementedError) as exinfo: 17 | res(None) 18 | # assert 19 | assert '__call__' in str(exinfo.value) 20 | 21 | def test_get_return_type_implemented(self): 22 | # arrange 23 | res = ResponseBase() 24 | # act 25 | with pytest.raises(NotImplementedError) as exinfo: 26 | res.get_return_type() 27 | # assert 28 | assert 'get_return_type' in str(exinfo.value) -------------------------------------------------------------------------------- /test/service/test_nlu.py: -------------------------------------------------------------------------------- 1 | """ 2 | " 3 | " Test NLU Service 4 | " 5 | """ 6 | 7 | import json 8 | import pytest 9 | from unittest import mock 10 | from ibm_cloud_sdk_core import ApiException 11 | from watson_transformer.service.nlu import NLU 12 | 13 | 14 | class TestNLU(): 15 | 16 | def test_nlu_init(self): 17 | # arange 18 | token = 'foo' 19 | endpoint = 'http://www.ibm.com' 20 | feature = {'foo':'bar'} 21 | # action 22 | nlu = NLU(token = token, 23 | endpoint = endpoint, 24 | features = feature) 25 | # assert 26 | assert nlu.token == token 27 | assert nlu.endpoint == endpoint 28 | assert 'features' in nlu.params 29 | assert 'foo' in nlu.params['features'] 30 | assert nlu.params['features']['foo'] == 'bar' 31 | 32 | def test_service_callable_valid_input(self): 33 | # patch where the class is located. 34 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'): 35 | with mock.patch('watson_transformer.service.nlu.NaturalLanguageUnderstandingV1') as mock_nlu_api: 36 | # arrange 37 | mock_nlu_api.return_value.analyze.return_value.get_result.return_value = {'value':'mock response'} # mock nlu.analyze().get_result() 38 | nlu = NLU(token = 'foo', 39 | endpoint='http://www.foo.com/bar', 40 | features='foo') 41 | # act 42 | response = nlu('I love this game.') 43 | # assert 44 | data = json.loads(response) 45 | assert 'value' in data 46 | assert data['value'] == 'mock response' 47 | 48 | def test_service_callable_invalid_input(self): 49 | # patch where the class is located. 50 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'): 51 | with mock.patch('watson_transformer.service.nlu.NaturalLanguageUnderstandingV1') as mock_nlu_api: 52 | # arrange 53 | mock_nlu_api.return_value.analyze.return_value.get_result.return_value = {'value':'mock response'} # mock nlu.analyze().get_result() 54 | nlu = NLU(token = 'foo', 55 | endpoint='http://www.foo.com/bar', 56 | features='foo') 57 | for value in [None, '']: 58 | # act 59 | response = nlu(value) 60 | # assert 61 | assert response == None 62 | 63 | def test_service_callable_raise_none_api_exception_strict_mode_on(self): 64 | # patch where the class is located. 65 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'): 66 | with mock.patch('watson_transformer.service.nlu.NaturalLanguageUnderstandingV1') as mock_nlu_api: 67 | # arrange 68 | mock_nlu_api.return_value.analyze.side_effect = Exception('NLU API raise exception.') # mock nlu.analyze() 69 | nlu = NLU(token = 'foo', 70 | endpoint='http://www.foo.com/bar', 71 | strict_mode=True, 72 | features='foo') 73 | for value in [' ', ' _', 'one two']: 74 | # act 75 | with pytest.raises(Exception) as exinfo: 76 | response = nlu(value) 77 | # assert 78 | assert value in str(exinfo.value) 79 | assert nlu.strict_mode == True 80 | 81 | def test_service_callable_raise_none_api_exception_strict_mode_off(self): 82 | # patch where the class is located. 83 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'): 84 | with mock.patch('watson_transformer.service.nlu.NaturalLanguageUnderstandingV1') as mock_nlu_api: 85 | # arrange 86 | mock_nlu_api.return_value.analyze.side_effect = Exception('NLU API raise exception.') # mock nlu.analyze() 87 | nlu = NLU(token = 'foo', 88 | endpoint='http://www.foo.com/bar', 89 | strict_mode=False, 90 | features='foo') 91 | for value in [' ', ' _', 'one two']: 92 | # act 93 | response = nlu(value) 94 | # assert 95 | assert response == None 96 | assert nlu.strict_mode == False 97 | 98 | def test_service_callable_raise_api_exception_strict_mode_on(self): 99 | # patch where the class is located. 100 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'): 101 | with mock.patch('watson_transformer.service.nlu.NaturalLanguageUnderstandingV1') as mock_nlu_api: 102 | # arrange 103 | mock_nlu_api.return_value.analyze.side_effect = ApiException('NLU API raise exception.') # mock nlu.analyze() 104 | nlu = NLU(token = 'foo', 105 | endpoint='http://www.foo.com/bar', 106 | strict_mode = True, 107 | features='foo') 108 | for value in [' ', ' _', 'one two']: 109 | # act 110 | response = nlu(value) 111 | # assert 112 | assert response == None 113 | assert nlu.strict_mode == True 114 | 115 | def test_service_callable_raise_api_exception_strict_mode_off(self): 116 | # patch where the class is located. 117 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'): 118 | with mock.patch('watson_transformer.service.nlu.NaturalLanguageUnderstandingV1') as mock_nlu_api: 119 | # arrange 120 | mock_nlu_api.return_value.analyze.side_effect = ApiException('NLU API raise exception.') # mock nlu.analyze() 121 | nlu = NLU(token = 'foo', 122 | endpoint='http://www.foo.com/bar', 123 | strict_mode = False, 124 | features='foo') 125 | for value in [' ', ' _', 'one two']: 126 | # act 127 | response = nlu(value) 128 | # assert 129 | assert response == None 130 | assert nlu.strict_mode == False 131 | 132 | def test_get_new_client(self): 133 | # arrange 134 | nlu = NLU(token = 'foo', 135 | endpoint='http://www.foo.com/bar', 136 | features='foo') 137 | # action 138 | new_nlu = nlu.get_new_client() 139 | # assert 140 | assert nlu.token == new_nlu.token 141 | assert nlu.endpoint == new_nlu.endpoint 142 | assert 'features' in new_nlu.params 143 | assert new_nlu.params['features'] == 'foo' 144 | assert nlu != new_nlu 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | -------------------------------------------------------------------------------- /test/service/test_service_base.py: -------------------------------------------------------------------------------- 1 | """ 2 | " 3 | " test API service base class 4 | " 5 | """ 6 | import pytest 7 | from unittest import mock 8 | from watson_transformer.service.service_base import ServiceBase 9 | 10 | @pytest.fixture(scope='function') 11 | def mock_base_service(request): 12 | # mock the service 13 | return ServiceBase() 14 | 15 | class TestServiceBase(): 16 | 17 | def test_callable(self, mock_base_service): 18 | # arrange 19 | service = mock_base_service 20 | # act 21 | with pytest.raises(NotImplementedError) as exinfo: 22 | service(None) 23 | # assert 24 | assert '__call__' in str(exinfo.value) 25 | 26 | def test_get_return_type(self, mock_base_service): 27 | # arrange 28 | service = mock_base_service 29 | # act 30 | with pytest.raises(NotImplementedError) as exinfo: 31 | service.get_return_type(None) 32 | # assert 33 | assert 'get_return_type' in str(exinfo.value) 34 | 35 | def test_get_new_client(self, mock_base_service): 36 | # arrange 37 | service = mock_base_service 38 | # act 39 | with pytest.raises(NotImplementedError) as exinfo: 40 | service.get_new_client() 41 | # assert 42 | assert 'get_new_client' in str(exinfo.value) 43 | 44 | def test_strict_mode_init(self): 45 | # arrange 46 | valid_values = [True, False] 47 | for value in valid_values: 48 | # act 49 | service_base = ServiceBase(strict_mode = value) 50 | # assert 51 | assert service_base.strict_mode == value 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /test/service/test_stt.py: -------------------------------------------------------------------------------- 1 | """ 2 | " 3 | " Test IBM STT Service 4 | " 5 | """ 6 | 7 | import json 8 | import pytest 9 | from unittest import mock 10 | from ibm_cloud_sdk_core.api_exception import ApiException 11 | from watson_transformer.service.stt import STT 12 | 13 | 14 | class TestSTT(): 15 | 16 | def test_nlu_init(self): 17 | # arange 18 | token = 'foo' 19 | endpoint = 'http://www.ibm.com' 20 | reader = lambda x: "foo is speaking to bar" 21 | feature = {'foo':'bar'} 22 | # action 23 | stt = STT(token = token, 24 | endpoint = endpoint, 25 | reader = reader, 26 | features = feature) 27 | # assert 28 | assert stt.token == token 29 | assert stt.endpoint == endpoint 30 | assert 'features' in stt.params 31 | assert 'foo' in stt.params['features'] 32 | assert stt.params['features']['foo'] == 'bar' 33 | assert stt.reader('foo') == reader('foo') 34 | 35 | def test_service_callable_valid_input(self): 36 | # patch where the class is located. 37 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'): 38 | with mock.patch('watson_transformer.service.stt.SpeechToTextV1') as mock_stt_api: 39 | # arrange 40 | mock_stt_api.return_value.recognize.return_value.get_result.return_value = {'value':'mock response'} # mock stt.recognize().get_result() 41 | stt = STT(token = 'foo', 42 | endpoint='http://www.foo.com/bar', 43 | reader = lambda x: "foo is speaking to bar.", 44 | features='foo') 45 | # act 46 | response = stt('sample.wav') 47 | # assert 48 | data = json.loads(response) 49 | assert 'value' in data 50 | assert data['value'] == 'mock response' 51 | 52 | def test_service_callable_invalid_input(self): 53 | # patch where the class is located. 54 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'): 55 | with mock.patch('watson_transformer.service.stt.SpeechToTextV1') as mock_stt_api: 56 | # arrange 57 | mock_stt_api.return_value.recognize.return_value.get_result.return_value = {'value':'mock response'} # mock stt.recognize().get_result() 58 | stt = STT(token = 'foo', 59 | endpoint='http://www.foo.com/bar', 60 | reader = lambda x: "foo is speaking to bar.", 61 | features='foo') 62 | for value in [None, '']: 63 | # act 64 | response = stt(value) 65 | # assert 66 | assert response == None 67 | 68 | def test_service_callable_raise_none_api_exception_strict_mode_on(self): 69 | # patch where the class is located. 70 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'): 71 | with mock.patch('watson_transformer.service.stt.SpeechToTextV1') as mock_stt_api: 72 | # arrange 73 | mock_stt_api.return_value.recognize.side_effect = Exception('STT API raise exception.') # mock stt.recognize().get_result() 74 | stt = STT(token = 'foo', 75 | endpoint='http://www.foo.com/bar', 76 | reader = lambda x: "foo is speaking to bar.", 77 | strict_mode = True, 78 | features='foo') 79 | for value in ['none_exist.wav', 'invalid.wav']: 80 | # act 81 | with pytest.raises(Exception) as exinfo: 82 | response = stt(value) 83 | # assert 84 | assert value in str(exinfo.value) 85 | assert stt.strict_mode == True 86 | 87 | def test_service_callable_raise_none_api_exception_strict_mode_off(self): 88 | # patch where the class is located. 89 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'): 90 | with mock.patch('watson_transformer.service.stt.SpeechToTextV1') as mock_stt_api: 91 | # arrange 92 | mock_stt_api.return_value.recognize.side_effect = Exception('raise general exception.') # mock stt.recognize().get_result() 93 | stt = STT(token = 'foo', 94 | endpoint='http://www.foo.com/bar', 95 | reader = lambda x: "foo is speaking to bar.", 96 | strict_mode = False, 97 | features='foo') 98 | for value in ['none_exist.wav', 'invalid.wav']: 99 | # act 100 | response = stt(value) 101 | # assert 102 | assert 'error_message' in response 103 | assert 'raise general exception.' in response 104 | assert stt.strict_mode == False 105 | 106 | def test_service_callable_raise_api_exception_strict_mode_on(self): 107 | # patch where the class is located. 108 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'): 109 | with mock.patch('watson_transformer.service.stt.SpeechToTextV1') as mock_stt_api: 110 | # arrange 111 | mock_stt_api.return_value.recognize.side_effect = ApiException('STT API raise exception.') # mock stt.recognize().get_result() 112 | stt = STT(token = 'foo', 113 | endpoint='http://www.foo.com/bar', 114 | reader = lambda x: "foo is speaking to bar.", 115 | strict_mode = True, 116 | features='foo') 117 | for value in ['none_exist.wav', 'invalid.wav']: 118 | # act 119 | response = stt(value) 120 | # assert 121 | assert 'api_error_message' in response 122 | assert 'STT API raise exception.' in response 123 | assert stt.strict_mode == True 124 | 125 | def test_service_callable_raise_api_exception_strict_mode_on(self): 126 | # patch where the class is located. 127 | with mock.patch('watson_transformer.service.nlu.IAMAuthenticator'): 128 | with mock.patch('watson_transformer.service.stt.SpeechToTextV1') as mock_stt_api: 129 | # arrange 130 | mock_stt_api.return_value.recognize.side_effect = ApiException('STT API raise exception.') # mock stt.recognize().get_result() 131 | stt = STT(token = 'foo', 132 | endpoint='http://www.foo.com/bar', 133 | reader = lambda x: "foo is speaking to bar.", 134 | strict_mode = False, 135 | features='foo') 136 | for value in ['none_exist.wav', 'invalid.wav']: 137 | # act 138 | response = stt(value) 139 | # assert 140 | assert 'api_error_message' in response 141 | assert 'STT API raise exception.' in response 142 | assert stt.strict_mode == False 143 | 144 | def test_get_new_client(self): 145 | # arrange 146 | stt = STT(token = 'foo', 147 | endpoint='http://www.foo.com/bar', 148 | reader = lambda x: "foo is speaking to bar.", 149 | strict_mode=False, 150 | features='foo') 151 | # action 152 | new_stt = stt.get_new_client() 153 | # assert 154 | assert stt.token == new_stt.token 155 | assert stt.endpoint == new_stt.endpoint 156 | assert 'features' in new_stt.params 157 | assert new_stt.params['features'] == 'foo' 158 | assert stt != new_stt 159 | assert stt.strict_mode == stt.strict_mode 160 | assert stt.strict_mode == False 161 | 162 | def test_reader_raise_exception(self): 163 | # arrange 164 | reader = mock.MagicMock(side_effect=Exception('failed to read the file.')) 165 | stt = STT(token = 'foo', 166 | endpoint='http://www.foo.com/bar', 167 | reader = reader, 168 | features='foo') 169 | # act 170 | with pytest.raises(Exception) as exinfo: 171 | stt('sample.wav') 172 | # assert 173 | assert 'failed to read the file.' in str(exinfo.value) 174 | 175 | def test_reader_return_none_stream(self): 176 | # arrange 177 | reader = mock.MagicMock(return_value = None) 178 | stt = STT(token = 'foo', 179 | endpoint='http://www.foo.com/bar', 180 | reader = reader, 181 | features='foo') 182 | # act 183 | response = stt('sample.wav') 184 | # assert 185 | assert response == None 186 | 187 | 188 | 189 | 190 | 191 | -------------------------------------------------------------------------------- /test/test_json_transformer.py: -------------------------------------------------------------------------------- 1 | """ 2 | " 3 | " test watson service transformer class 4 | " 5 | """ 6 | import pytest 7 | from unittest import mock 8 | from watson_transformer.json_transformer import JSONTransformer 9 | 10 | 11 | 12 | @pytest.fixture(scope='function') 13 | def mock_service(request): 14 | # mock the service 15 | service = mock.MagicMock(return_value='foo response') 16 | return lambda : service 17 | 18 | 19 | class TestJSONTransformer(): 20 | 21 | def test_init_input_col_with__valid_value(self, mock_service): 22 | # arrange 23 | mocked_service = mock_service() 24 | column_name = "input_column_name" 25 | # act 26 | for column_name in ["foo", "foo bar", "foo_bar", " _"]: 27 | transformer = JSONTransformer(inputCol=column_name, 28 | outputCol='output_column', 29 | removeInputCol=True, 30 | parser=mocked_service) 31 | # assert 32 | assert transformer.getInputCol() == column_name 33 | 34 | def test_init_input_col_with_invalid_value(self, mock_service): 35 | # arrange 36 | mocked_service = mock_service() 37 | column_name = None 38 | # act 39 | for column_name in [None, "", " ", " "]: 40 | with pytest.raises(ValueError) as exception: 41 | transformer = JSONTransformer(inputCol=column_name, 42 | outputCol='output_column', 43 | removeInputCol=True, 44 | parser=mocked_service) 45 | # assert 46 | assert "input column name" in str(exception.value) 47 | 48 | def test_init_output_col_with_valid_value(self, mock_service): 49 | # arrange 50 | mocked_service = mock_service() 51 | column_name = "input_column_name" 52 | # act 53 | for column_name in ["foo", "foo bar", "foo_bar", " _"]: 54 | transformer = JSONTransformer(inputCol=column_name, 55 | outputCol='output_column', 56 | removeInputCol=True, 57 | parser=mocked_service) 58 | 59 | def test_init_output_col_with_invalid_value(self, mock_service): 60 | # arrange 61 | mocked_service = mock_service() 62 | column_name = None 63 | # act 64 | for column_name in [None, "", " ", " "]: 65 | with pytest.raises(ValueError) as exception: 66 | JSONTransformer(inputCol='input_column', 67 | outputCol=column_name, 68 | removeInputCol=True, 69 | parser=mocked_service) 70 | # assert 71 | assert "output column name" in str(exception.value) 72 | 73 | def test_init_remove_input_column_with_valid_value(self, mock_service): 74 | # arrange 75 | mocked_service = mock_service() 76 | valid_values = [0, 1, True, False] 77 | expect_values = [False, True, True, False] 78 | # act 79 | for i in range(len(valid_values)): 80 | transformer = JSONTransformer(inputCol='input_column', 81 | outputCol='output_column', 82 | removeInputCol=valid_values[i], 83 | parser=mocked_service) 84 | # assert 85 | assert transformer.getRemoveInputCol() == expect_values[i] 86 | 87 | def test_init_remove_input_column_with_default_value(self, mock_service): 88 | # arrange 89 | mocked_service = mock_service() 90 | # act 91 | transformer = JSONTransformer(inputCol='input column', 92 | outputCol='output column', 93 | parser=mocked_service) 94 | # assert 95 | assert transformer.getRemoveInputCol() == False 96 | 97 | def test_init_valid_service(self): 98 | # arrange 99 | mocked_service = lambda x: x+1 100 | # act 101 | transformer = JSONTransformer(inputCol='input column', 102 | outputCol='output column', 103 | parser=mocked_service) 104 | # assert 105 | provided_service = transformer.getParser() 106 | assert provided_service(10) == 11 107 | 108 | 109 | def test_init_none_callable_service(self): 110 | # arrange 111 | invalid_services = [None, 12, "12"] 112 | # act 113 | for i in range(len(invalid_services)): 114 | with pytest.raises(ValueError) as exinfo: 115 | JSONTransformer(inputCol='input column', 116 | outputCol='output column', 117 | parser=invalid_services[i]) 118 | # assert 119 | assert "parser instance" in str(exinfo.value) and "callable" in str(exinfo.value) -------------------------------------------------------------------------------- /test/test_watson_service_transformer.py: -------------------------------------------------------------------------------- 1 | """ 2 | " 3 | " test watson service transformer class 4 | " 5 | """ 6 | import pytest 7 | from unittest import mock 8 | from watson_transformer.watson_service_transformer import WatsonServiceTransformer 9 | 10 | 11 | 12 | @pytest.fixture(scope='function') 13 | def mock_service(request): 14 | # mock the service 15 | service = mock.MagicMock(return_value='foo response') 16 | return lambda : service 17 | 18 | 19 | class TestWatsonServiceTransformer(): 20 | 21 | def test_init_input_col_with__valid_value(self, mock_service): 22 | # arrange 23 | mocked_service = mock_service() 24 | column_name = "input_column_name" 25 | # act 26 | for column_name in ["foo", "foo bar", "foo_bar", " _"]: 27 | transformer = WatsonServiceTransformer(inputCol=column_name, 28 | outputCol='output_column', 29 | vectorization=True, 30 | max_workers = 10, 31 | service=mocked_service) 32 | # assert 33 | assert transformer.getInputCol() == column_name 34 | 35 | def test_init_input_col_with_invalid_value(self, mock_service): 36 | # arrange 37 | mocked_service = mock_service() 38 | column_name = None 39 | # act 40 | for column_name in [None, "", " ", " "]: 41 | with pytest.raises(ValueError) as exception: 42 | _ = WatsonServiceTransformer(inputCol=column_name, 43 | outputCol='output_column', 44 | vectorization=True, 45 | max_workers = 10, 46 | service=mocked_service) 47 | # assert 48 | assert "input column name" in str(exception.value) 49 | 50 | def test_init_output_col_with_valid_value(self, mock_service): 51 | # arrange 52 | mocked_service = mock_service() 53 | column_name = "input_column_name" 54 | # act 55 | for column_name in ["foo", "foo bar", "foo_bar", " _"]: 56 | transformer = WatsonServiceTransformer(inputCol='input column', 57 | outputCol=column_name, 58 | vectorization=True, 59 | max_workers = 10, 60 | service=mocked_service) 61 | # assert 62 | assert transformer.getOutputCol() == column_name 63 | 64 | def test_init_output_col_with_invalid_value(self, mock_service): 65 | # arrange 66 | mocked_service = mock_service() 67 | column_name = None 68 | # act 69 | for column_name in [None, "", " ", " "]: 70 | with pytest.raises(ValueError) as exception: 71 | WatsonServiceTransformer(inputCol="input column", 72 | outputCol=column_name, 73 | vectorization=True, 74 | max_workers = 10, 75 | service=mocked_service) 76 | # assert 77 | assert "output column name" in str(exception.value) 78 | 79 | def test_init_max_workers_with_valid_value(self, mock_service): 80 | # arrange 81 | mocked_service = mock_service() 82 | max_workers = [1,2,3,4,5] 83 | expect_max_workers = [1,2,3,4,5] 84 | # act 85 | for i in range(len(max_workers)): 86 | transformer = WatsonServiceTransformer(inputCol='input column', 87 | outputCol='output column', 88 | vectorization=True, 89 | max_workers = max_workers[i], 90 | service=mocked_service) 91 | # assert 92 | assert transformer.getMax_workers() == expect_max_workers[i] 93 | 94 | def test_init_max_workers_with_default_value(self, mock_service): 95 | # arrange 96 | mocked_service = mock_service() 97 | expected_default_value = 5 98 | # act 99 | transformer = WatsonServiceTransformer(inputCol='input column', 100 | outputCol='output column', 101 | vectorization=True, 102 | service=mocked_service) 103 | # assert 104 | assert transformer.getMax_workers() == expected_default_value 105 | 106 | def test_init_max_workers_with_invalid_value(self, mock_service): 107 | # arrange 108 | mocked_service = mock_service() 109 | max_workers = [None, "1", -1, 0, -100] 110 | # act 111 | for i in range(len(max_workers)): 112 | with pytest.raises(ValueError) as exinfo: 113 | WatsonServiceTransformer(inputCol='input column', 114 | outputCol='output column', 115 | vectorization=True, 116 | max_workers = max_workers[i], 117 | service=mocked_service) 118 | # assert 119 | assert "maximum workers" in str(exinfo.value) 120 | 121 | def test_init_vectorization_with_valid_value(self, mock_service): 122 | # arrange 123 | mocked_service = mock_service() 124 | vectorizations = [0, 1, True, False] 125 | expect_vectorizations = [False, True, True, False] 126 | # act 127 | for i in range(len(vectorizations)): 128 | transformer = WatsonServiceTransformer(inputCol='input column', 129 | outputCol='output column', 130 | vectorization=vectorizations[i], 131 | max_workers = 10, 132 | service=mocked_service) 133 | # assert 134 | assert transformer.getVectorization() == expect_vectorizations[i] 135 | 136 | def test_init_vectorization_with_default_value(self, mock_service): 137 | # arrange 138 | mocked_service = mock_service() 139 | # act 140 | transformer = WatsonServiceTransformer(inputCol='input column', 141 | outputCol='output column', 142 | service=mocked_service) 143 | # assert 144 | assert transformer.getVectorization() == False 145 | 146 | def test_init_valid_service(self): 147 | # arrange 148 | mocked_service = lambda x: x+1 149 | # act 150 | transformer = WatsonServiceTransformer(inputCol='input column', 151 | outputCol='output column', 152 | service=mocked_service) 153 | # assert 154 | provided_service = transformer.getService() 155 | assert provided_service(10) == 11 156 | 157 | 158 | def test_init_none_callable_service(self): 159 | # arrange 160 | values = [None, 12, "12"] 161 | # act 162 | for i in range(len(values)): 163 | with pytest.raises(ValueError) as exinfo: 164 | WatsonServiceTransformer(inputCol='input column', 165 | outputCol='output column', 166 | service=values[i]) 167 | # assert 168 | assert "service instance" in str(exinfo.value) and "callable" in str(exinfo.value) --------------------------------------------------------------------------------