├── .gitignore
├── LICENSE
├── README.md
├── example
    ├── .gitignore
    ├── README.md
    ├── __init__.py
    ├── data
    │   └── hotel_comment.csv
    ├── docker-compose.yml
    ├── model
    │   └── __init__.py
    ├── model_predict.py
    ├── model_train.py
    ├── sa_client.py
    ├── sa_model2tf_serving_model.py
    ├── sa_server.py
    ├── sa_tf_serving_api_client.py
    ├── sa_ui.py
    └── tf_model
    │   └── __init__.py
├── litNlp
    ├── .gitignore
    ├── __init__.py
    ├── model_structure
    │   ├── BiLSTM.py
    │   ├── GRU.py
    │   ├── Model_TextCNN.py
    │   ├── TextCNN.py
    │   ├── TextCNN_m.py
    │   └── __init__.py
    ├── predict.py
    └── train.py
├── pic
    ├── auc_2poch.png
    ├── logo.png
    ├── server.png
    ├── tools.png
    └── ui.png
├── requirement.txt
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | .idea
  6 | Customer_Satisfaction_Analysis/.idea
  7 | model/__pycache__
  8 | # C extensions
  9 | *.so
 10 | dist
 11 | litNlp.egg-info
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | 
 72 | # PyBuilder
 73 | target/
 74 | 
 75 | # Jupyter Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # pyenv
 79 | .python-version
 80 | 
 81 | # celery beat schedule file
 82 | celerybeat-schedule
 83 | 
 84 | # SageMath parsed files
 85 | *.sage.py
 86 | 
 87 | # Environments
 88 | .env
 89 | .venv
 90 | env/
 91 | venv/
 92 | ENV/
 93 | env.bak/
 94 | venv.bak/
 95 | 
 96 | # Spyder project settings
 97 | .spyderproject
 98 | .spyproject
 99 | 
100 | # Rope project settings
101 | .ropeproject
102 | 
103 | # mkdocs documentation
104 | /site
105 | 
106 | # mypy
107 | .mypy_cache/
108 | ### Example user template template
109 | ### Example user template
110 | 
111 | # IntelliJ project files
112 | .idea
113 | *.iml
114 | out
115 | gen
116 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 |   <img src="https://github.com/CarryChang/litNlp/blob/master/pic/logo.png"><br>
  3 | </div>
  4 | 
  5 | -----------------
  6 | ## litNlp: A Fast Tool for Sentiment Analysis with Tensorflow2
  7 | [![996.icu](https://img.shields.io/badge/link-996.icu-red.svg)](https://996.icu)
  8 | [![PyPI Latest Release](https://img.shields.io/pypi/v/litNlp.svg)](https://pypi.org/project/litNlp/)
  9 | [![Downloads](https://pepy.tech/badge/litnlp)](https://pepy.tech/project/litnlp)
 10 | [![Downloads](https://pepy.tech/badge/litnlp/month)](https://pepy.tech/project/litnlp/month)
 11 | [![Downloads](https://pepy.tech/badge/litnlp/week)](https://pepy.tech/project/litnlp/week)
 12 | 
 13 | 
 14 | # litNlp 简介
 15 | 
 16 | litNlp 是兼容最新版 Tensorflow 2.0 实现的一个轻量级的深度情感极性推理模型，使用字符级代替词语级进一步提升训练和推理速度，可以实现细粒度的多级别情感极性训练和预测，TF2 下 GPU 和 CPU 平台都能直接安装运行，是搭建 NLP 情感分析和分类模型 Baseline 的快速方案。
 17 | 
 18 | 	1. 内置深度学习情感分析模型。
 19 | 	2. 直接提供模型训练，默认 Text-CNN 字符级卷积网络作为 baseline ，自带早停操作，使用少的参数即可开始训练多分类模型。
 20 | 	3. 使用 Streamlit 快速对模型进行 UI 演示。
 21 | 	4. 增加 TF Serving 的转化和部署。
 22 | 	5. 增加 docker-compose up 的启动方式
 23 | 
 24 | 
 25 | ## 直接使用 emample/sa_ui.py 进行前端 ui 展示效果
 26 | 
 27 | ```python
 28 |     # 安装 streamlit 之后直接运行脚本
 29 |     streamlit run sa_ui.py
 30 | ```
 31 | 
 32 | <div align=center><img  src="https://github.com/CarryChang/litNlp/blob/master/pic/ui.png"></div>
 33 | 
 34 | ## 使用方法
 35 | > 1. pip install  litNlp
 36 | > 2. 模型需要先通过训练，保存在 sa_model 里面，然后就可以批预测，具体的使用见 example 文件内容
 37 | 
 38 | ```python
 39 | 	from litNlp.predict import SA_Model_Predict
 40 | 	import numpy as np
 41 | 	
 42 | 	# 加载模型的字典项
 43 | 	tokenize_path = 'model/tokenizer.pickle'
 44 | 	# train_method : 模型训练方式，默认 textcnn ，可选：bilstm , gru
 45 | 	train_method = 'textcnn'
 46 | 	# 模型的保存位置，后续用于推理
 47 | 	sa_model_path_m = 'model/{}.h5'.format(train_method)
 48 | 	# 开始输入待测样例
 49 | 	predict_text = ['这个我不喜欢', '这个我喜欢不']
 50 | 	# 加载模型
 51 | 	model = SA_Model_Predict(tokenize_path, sa_model_path_m, max_len=100)
 52 | 	# 开始推理
 53 | 	sa_score = model.predict(predict_text)
 54 | 	# 情感极性概率
 55 | 	print(np.asarray(sa_score)[:,1])
 56 | 	# 情感label输出
 57 | 	print(np.argmax(np.asarray(sa_score), axis=1))
 58 | 
 59 | ```
 60 | 
 61 | ## 参数解释
 62 | ```python
 63 |     # 最大句子长度
 64 |     maxlen = 100
 65 |     # 最大的tokenizer字典长度
 66 |     max_words = 1000
 67 |     # 设置embedding大小
 68 |     embedding_dim = 300
 69 |     # 模型的保存位置，后续用于推理
 70 |     sa_model_path_m = 'sa_model/c_cnn_m.h5'
 71 |     # 离线保存tokenizer
 72 |     tokenize_path ='sa_model/tokenizer.pickle'
 73 |     # 分类的类别数
 74 |     num_classes = 2
 75 |     # train_method : 模型训练方式，默认textcnn，可选：bilstm, gru
 76 |     train_method = 'textcnn'
 77 | ```
 78 | 
 79 | ## 2 个 epoch 的二分类性能
 80 | 
 81 | <div align=center><img  src="https://github.com/CarryChang/litNlp/blob/master/pic/auc_2poch.png"></div>
 82 | 
 83 | ## jupyter 实验
 84 | 
 85 | >  情感分析，优化语义的情感推理
 86 | <div align=center><img  src="https://github.com/CarryChang/litNlp/blob/master/pic/tools.png"></div>
 87 | 
 88 | ## Flask Gunicorn 模型部署
 89 | python sa_server.py 即可对训练的情感分析模型进行部署，模型首次推理需要预热，后续推理耗时在 200ms 之内。
 90 | 
 91 | <div align=center><img  src="https://github.com/CarryChang/litNlp/blob/master/pic/server.png"></div>
 92 | 
 93 | ## Tensorflow Serving 模型部署
 94 | 
 95 | 利用 python example/sa_model2tf_serving_model.py 进行模型转换之后即可直接进行 TF Serving 的服务部署。
 96 | 
 97 | 首先拉取对应版本的 TF Serving Docker
 98 | 
 99 |     docker pull tensorflow/serving:2.3.0
100 |     
101 | 直接利用 Docker 加载转换之后的模型即可完成模型部署，TensorFlow Serving 会自动选择版本号最大的模型进行载入。
102 | 
103 | Docker 命令行的 Dev 启动模式
104 | 
105 |     docker run -t --rm -p 9500:8500 -p:9501:8501 \
106 |     -v "$(pwd)/tf_model/:/models/textcnn" \
107 |     -e MODEL_NAME=textcnn -tensorflow_inter_op_parallelism=4 \
108 |     tensorflow/serving:2.3.0
109 |  
110 | 
111 | Docker 命令行的 Pro 启动模式
112 | 
113 |     docker run -d --rm -p 9500:8500 -p:9501:8501 \
114 |     -v "$(pwd)/tf_model/:/models/textcnn" \
115 |     -e MODEL_NAME=textcnn -tensorflow_inter_op_parallelism=4 \
116 |     tensorflow/serving:2.3.0
117 | 
118 | 或者在 yml 所在的文件夹下增加直接使用 docker-compose up 进行服务的启动。
119 | 
120 | 服务请求： 
121 | 部署之后使用 python sa_tf_serving_api_client.py 进行 TF serving 服务的调用。
122 |  
123 | 


--------------------------------------------------------------------------------
/example/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | .idea
  6 | Customer_Satisfaction_Analysis/.idea
  7 | model/__pycache__
  8 | # C extensions
  9 | *.so
 10 | dist
 11 | litNlp.egg-info
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | 
 72 | # PyBuilder
 73 | target/
 74 | 
 75 | # Jupyter Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # pyenv
 79 | .python-version
 80 | 
 81 | # celery beat schedule file
 82 | celerybeat-schedule
 83 | 
 84 | # SageMath parsed files
 85 | *.sage.py
 86 | 
 87 | # Environments
 88 | .env
 89 | .venv
 90 | env/
 91 | venv/
 92 | ENV/
 93 | env.bak/
 94 | venv.bak/
 95 | 
 96 | # Spyder project settings
 97 | .spyderproject
 98 | .spyproject
 99 | 
100 | # Rope project settings
101 | .ropeproject
102 | 
103 | # mkdocs documentation
104 | /site
105 | 
106 | # mypy
107 | .mypy_cache/
108 | ### Example user template template
109 | ### Example user template
110 | 
111 | # IntelliJ project files
112 | .idea
113 | *.iml
114 | out
115 | gen
116 | 


--------------------------------------------------------------------------------
/example/README.md:
--------------------------------------------------------------------------------
1 | ### litNlp Demo1. 电商情感极性输出
2 | 
3 | 2. 酒店情感极性输出


--------------------------------------------------------------------------------
/example/__init__.py:
--------------------------------------------------------------------------------
1 | name = "example_litNlp"


--------------------------------------------------------------------------------
/example/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | 
 3 | services:
 4 |   tfserving_predict:
 5 |     image: tensorflow/serving:2.3.0
 6 |     environment:
 7 |       MODEL_NAME: textcnn
 8 |     ports:
 9 |       - 9500:8500
10 |       - 9501:8501
11 |     volumes:
12 |       - ./tf_model/:/models/textcnn
13 |     restart: on-failure
14 |     entrypoint:
15 |       - /usr/bin/tf_serving_entrypoint.sh
16 |       - --tensorflow_inter_op_parallelism=2
17 |  


--------------------------------------------------------------------------------
/example/model/__init__.py:
--------------------------------------------------------------------------------
1 | name = "example_litNlp"


--------------------------------------------------------------------------------
/example/model_predict.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 |  
 3 | 
 4 | from litNlp.predict import SA_Model_Predict
 5 | import numpy as np
 6 | 
 7 | # 加载模型的字典项
 8 | tokenize_path = 'model/tokenizer.pickle'
 9 | # train_method : 模型训练方式，默认 textcnn ，可选：bilstm , gru
10 | train_method = 'textcnn'
11 | # 模型的保存位置，后续用于推理
12 | sa_model_path_m = 'model/{}.h5'.format(train_method)
13 | # 开始输入待测样例
14 | predict_text = ['这个我不喜欢', '这个我喜欢不']
15 | # 加载模型
16 | model = SA_Model_Predict(tokenize_path, sa_model_path_m, max_len=100)
17 | # 开始推理
18 | sa_score = model.predict(predict_text)
19 | # 情感极性概率
20 | print(np.asarray(sa_score)[:,1])
21 | # 情感label输出
22 | print(np.argmax(np.asarray(sa_score), axis=1))


--------------------------------------------------------------------------------
/example/model_train.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time: 2020/6/21 0021 20:46
 3 | import pandas as pd
 4 | from litNlp.train import SA_Model_Train
 5 | # e_comment
 6 | # train_data = pd.read_csv('data/ebusiness_comment.csv')
 7 | # hotel
 8 | train_data = pd.read_csv('data/hotel_comment.csv')
 9 | # 进行字符级处理
10 | train_data['text_cut'] = train_data['text'].apply(lambda x: " ".join(list(x)))
11 | # 最大句子长度
12 | maxlen = 100
13 | # 设置 tokenizer 字典大小
14 | max_words = 1000
15 | # 设置随机 embedding 大小
16 | embedding_dim = 300
17 | # train_method : 模型训练方式，默认 textcnn ，可选：bilstm , gru
18 | train_method = 'textcnn'
19 | # 模型的保存位置，后续用于推理
20 | sa_model_path_m = 'model/{}.h5'.format(train_method)
21 | # 离线保存 tokenizer
22 | tokenize_path ='model/tokenizer.pickle'
23 | # train: evaluate默认在训练完毕之后开启计算
24 | label = train_data['label']
25 | train_data = train_data['text_cut']
26 | model = SA_Model_Train(max_words, embedding_dim, maxlen, tokenize_path, sa_model_path_m, train_method)
27 | # 模型使用两极情感标注，定义 2 类标签类别，参数可以调节
28 | model.train(train_data, label, num_classes=2, batch_size=256, epochs=2, verbose=1, evaluate=True)


--------------------------------------------------------------------------------
/example/sa_client.py:
--------------------------------------------------------------------------------
 1 | #! -*- coding: utf-8 -*-
 2 | import requests
 3 | import time
 4 | import json
 5 | 
 6 | 
 7 | def sa_api_request(content):
 8 |     st = time.time()
 9 |     api_url = 'http://127.0.0.1:5021/sa_api'
10 |     para = {"content": content}
11 |     model_result = requests.post(api_url, data=json.dumps(para)).json()
12 |     print(model_result)
13 |     print('request time used:{}'.format(time.time() - st))
14 | 
15 | 
16 | if __name__ == '__main__':
17 |     content = '这家酒店真的不错'
18 |     # 接口请求
19 |     sa_api_request(content)
20 | 


--------------------------------------------------------------------------------
/example/sa_model2tf_serving_model.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | # 待转化的模型，默认 textcnn ，可选：bilstm , gru
 7 | train_method = 'textcnn'
 8 | # 模型的保存位置，后续用于推理
 9 | sa_model_path_m = 'model/{}.h5'.format(train_method)
10 | # 模型加载
11 | model = tf.keras.models.load_model(sa_model_path_m)
12 | # TF Serving 按照最大的 tag 进行模型的热更新，设置模型的tag
13 | tag = 1
14 | # 转化之后的模型路径
15 | save_path = "tf_model/{}/".format(tag)
16 | # 保存为 tf serving 加载的 model 形式
17 | model.save(save_path, save_format='tf') 
18 | 
19 | 


--------------------------------------------------------------------------------
/example/sa_server.py:
--------------------------------------------------------------------------------
 1 | #! -*- coding: utf-8 -*-
 2 | from flask_restful import Resource, Api, request
 3 | from litNlp.predict import SA_Model_Predict
 4 | from flask import Flask
 5 | import json
 6 | 
 7 | app = Flask(__name__)
 8 | api = Api(app)
 9 | 
10 | # 初始化模型，第一次推理需要预热
11 | tokenize_path = 'model/tokenizer.pickle'
12 | sa_model_path_m = 'model/model.h5'
13 | # 模型加载
14 | model = SA_Model_Predict(tokenize_path, sa_model_path_m, max_len=100)
15 | 
16 | 
17 | class sa_post_api(Resource):
18 |     def post(self):
19 |         # 接收对象
20 |         parser = json.loads(request.get_data())
21 |         content = str(parser['content'])
22 |         sa_score = round(float(model.predict([content])[0][1]), 5)
23 |         show_data = dict()
24 |         show_data['sa_score'] = sa_score
25 |         show_data['status'] = 1
26 |         if sa_score > 0.5:
27 |             show_data['label'] = '积极'
28 |         elif sa_score < 0.5:
29 |             show_data['label'] = '消极'
30 |         else:
31 |             show_data['label'] = '中性'
32 |         # print(show_data)
33 |         return show_data
34 | 
35 | 
36 | # 定义 POST 接口的请求信息
37 | api.add_resource(sa_post_api, '/sa_api')
38 | 
39 | if __name__ == '__main__':
40 |     app.run(port='5021')
41 | 


--------------------------------------------------------------------------------
/example/sa_tf_serving_api_client.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @USER: CarryChang
 3 | 
 4 | from tensorflow.keras.preprocessing.sequence import pad_sequences
 5 | import numpy as np
 6 | import requests
 7 | import pickle
 8 | import json
 9 | 
10 | # 设置单个用户评论的最大句子长度
11 | maxlen = 100
12 | # 保存向量字典
13 | tokenize_path = 'model/tokenizer.pickle'
14 | 
15 | predict_text = ['这个环境不喜欢', '这个环境喜欢不']
16 | # 特征处理
17 | with open(tokenize_path, 'rb') as tokenize_save:
18 |     tokenizer_load = pickle.load(tokenize_save)
19 | 
20 | # 字符级
21 | tk_list = [list(text) for text in predict_text]
22 | # 字符填充
23 | test_text = pad_sequences(tokenizer_load.texts_to_sequences(tk_list), maxlen)
24 | 
25 | # 多个评论进行推理
26 | data = {'instances': test_text.tolist()}
27 | # tf_model_textcnn 模型部署，REST 的访问端口为 9501
28 | predict_url = 'http://localhost:9501/v1/models/textcnn:predict'
29 | r = requests.post(predict_url, data=json.dumps(data))
30 | # 直接提取矩阵中积极的情感
31 | print("待测样例的情感值是：")
32 | print(np.array(r.json()['predictions'])[:, 1])
33 | 


--------------------------------------------------------------------------------
/example/sa_ui.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from litNlp.predict import SA_Model_Predict
 3 | import streamlit as st
 4 | 
 5 | # 初始化模型
 6 | tokenize_path = 'model/tokenizer.pickle'
 7 | sa_model_path_m = 'model/model.h5'
 8 | model = SA_Model_Predict(tokenize_path, sa_model_path_m, max_len=100)
 9 | # 不用项目自动重启
10 | st.subheader('文本情感分析')
11 | # st.write('文本情感分析')
12 | # 接受前端的内容显示
13 | comment_input = st.text_input('请输入一行测试文本: ')
14 | # 开始处理内容
15 | if comment_input != '':
16 |     # 文本处理
17 |     comment = str(comment_input).strip()
18 |     # 添加等待，并开始预测
19 |     with st.spinner('Predicting...'):
20 |         sa_score = float(model.predict([comment])[0][1])
21 |         show_data = dict()
22 |         show_data['status'] = 1
23 |         show_data['sa_score'] = sa_score
24 |         if sa_score > 0.5:
25 |             show_data['label'] = '积极'
26 |         elif sa_score < 0.5:
27 |             show_data['label'] = '消极'
28 |         else:
29 |             show_data['label'] = '中性'
30 |     # 最后展示内容
31 |     st.write('分析结果: ')
32 |     st.write(show_data)
33 | 


--------------------------------------------------------------------------------
/example/tf_model/__init__.py:
--------------------------------------------------------------------------------
1 | name = "tf_serving"


--------------------------------------------------------------------------------
/litNlp/.gitignore:
--------------------------------------------------------------------------------
 1 | # Created by .ignore support plugin (hsz.mobi)
 2 | ### Example user template template
 3 | ### Example user template
 4 | 
 5 | # IntelliJ project files
 6 | .idea
 7 | *.iml
 8 | out
 9 | gen
10 | __pycache__/
11 | model/__pycache__


--------------------------------------------------------------------------------
/litNlp/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time: 2020/6/22 0022 12:08


--------------------------------------------------------------------------------
/litNlp/model_structure/BiLSTM.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time: 2020/6/20 0020 10:38
 3 | from tensorflow.keras.layers import Dense, Embedding, Flatten,Dropout,Convolution1D,BatchNormalization
 4 | from tensorflow.keras.optimizers import Adam,RMSprop
 5 | from tensorflow.keras.models import Sequential
 6 | from tensorflow.keras.layers import Bidirectional,Activation,LSTM
 7 | class BILSTM_Model:
 8 |     def create_model(self, max_words,embedding_dim, maxlen, n_class=2):
 9 |         model = Sequential()
10 |         #  embedding layer
11 |         model.add(Embedding(max_words, embedding_dim, input_length=maxlen))
12 |         # BiLSTM
13 |         model.add(Bidirectional(LSTM(units=32, return_sequences=True)))
14 |         model.add(LSTM(units=16, return_sequences=False))
15 |         model.add(Dense(512, activation='relu'))
16 |         model.add(Dropout(0.5))
17 |         model.add(Dense(n_class,  activation='softmax'))
18 |         model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=1e-3), metrics=['accuracy'])
19 |         return model


--------------------------------------------------------------------------------
/litNlp/model_structure/GRU.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time: 2020/6/20 0020 10:38
 3 | from tensorflow.keras.layers import Dense, Embedding, Flatten,Dropout,Convolution1D,BatchNormalization
 4 | from tensorflow.keras.optimizers import Adam,RMSprop
 5 | from tensorflow.keras.models import Sequential
 6 | from tensorflow.keras.layers import Activation,MaxPool1D,Input,GRU
 7 | class GRU_Model:
 8 |     def create_model(self, max_words, embedding_dim, maxlen, n_class=2):
 9 |         model = Sequential()
10 |         #  embedding layer
11 |         model.add(Embedding(max_words, embedding_dim, input_length=maxlen))
12 |         # GRU
13 |         model.add(GRU(units=32, return_sequences=True))
14 |         model.add(GRU(units=16, return_sequences=False))
15 |         model.add(Dense(512, activation='relu'))
16 |         model.add(Dropout(0.5))
17 |         model.add(Dense(n_class,  activation='softmax'))
18 |         model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=1e-3), metrics=['accuracy'])
19 |         return model
20 | 


--------------------------------------------------------------------------------
/litNlp/model_structure/Model_TextCNN.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time: 2020/6/20 0020 0:46
 3 | from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, ReduceLROnPlateau
 4 | from tensorflow.keras.layers import Dense, Embedding, Flatten,Dropout,Convolution1D,BatchNormalization
 5 | from tensorflow.keras.preprocessing.sequence import pad_sequences
 6 | from tensorflow.keras.optimizers import Adam,RMSprop
 7 | from tensorflow.keras.models import Sequential, Model
 8 | from sklearn.model_selection import train_test_split
 9 | from tensorflow.keras.preprocessing.text import Tokenizer
10 | from tensorflow.keras.layers import concatenate,GlobalAveragePooling1D,Activation,MaxPool1D,Input
11 | class sa_model:
12 |     def create_model(self, max_words, embedding_dim, maxlen, n_class=2):
13 |         # 使用model模式
14 |         main_input = Input(shape=(maxlen,), dtype='float64')
15 |         embedder = Embedding(max_words + 1, embedding_dim, input_length=maxlen)
16 |         embed = embedder(main_input)
17 |         # 3,4,5 windows
18 |         cnn1 = Convolution1D(256, 3, padding='same', strides=1, activation='relu')(embed)
19 |         cnn1 = MaxPool1D(pool_size=4)(cnn1)
20 |         cnn2 = Convolution1D(256, 4, padding='same', strides=1, activation='relu')(embed)
21 |         cnn2 = MaxPool1D(pool_size=4)(cnn2)
22 |         cnn3 = Convolution1D(256, 5, padding='same', strides=1, activation='relu')(embed)
23 |         cnn3 = MaxPool1D(pool_size=4)(cnn3)
24 |         # concat
25 |         cnn = concatenate([cnn1, cnn2, cnn3], axis=-1)
26 |         flat = Flatten()(cnn)
27 |         drop = Dropout(0.5)(flat)
28 |         main_output = Dense(n_class, activation='softmax')(drop)
29 |         model = Model(inputs=main_input, outputs=main_output)
30 |         model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=1e-3), metrics=['accuracy'])
31 |         return model
32 | 


--------------------------------------------------------------------------------
/litNlp/model_structure/TextCNN.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time: 2020/6/20 0020 10:38
 3 | from tensorflow.keras.layers import Dense, Embedding, Flatten,Dropout,Convolution1D,BatchNormalization
 4 | from tensorflow.keras.optimizers import Adam,RMSprop
 5 | from tensorflow.keras.models import Sequential, Model
 6 | from tensorflow.keras.layers import concatenate,GlobalAveragePooling1D,Activation,MaxPooling1D,Input
 7 | class sa_model:
 8 |     def create_model(self, max_words, embedding_dim, maxlen):
 9 |         model = Sequential()
10 |         #  embedding layer
11 |         model.add(Embedding(max_words, embedding_dim, input_length=maxlen))
12 |         model.add(Convolution1D(64, 3, input_shape=(-1, embedding_dim)))
13 |         model.add(Activation('relu'))
14 |         model.add(MaxPooling1D(2, 2))
15 |         model.add(Flatten())
16 |         model.add(Dense(512, activation='relu'))
17 |         model.add(Dropout(0.5))
18 |         model.add(Dense(1,  activation='sigmoid'))
19 |         model.compile(loss='binary_crossentropy',optimizer=Adam(lr=1e-3), metrics=['accuracy'])
20 |         return model
21 | 


--------------------------------------------------------------------------------
/litNlp/model_structure/TextCNN_m.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time: 2020/6/20 0020 10:38
 3 | from tensorflow.keras.layers import Dense, Embedding, Flatten,Dropout,Convolution1D,BatchNormalization
 4 | from tensorflow.keras.optimizers import Adam,RMSprop
 5 | from tensorflow.keras.models import Sequential, Model
 6 | from tensorflow.keras.layers import GlobalAveragePooling1D,Activation,MaxPooling1D,Input
 7 | class TextCNN_m:
 8 |     def create_model(self,max_words,embedding_dim, maxlen, n_class=2):
 9 |         model = Sequential()
10 |         #  embedding layer
11 |         model.add(Embedding(max_words, embedding_dim, input_length=maxlen))
12 |         model.add(Convolution1D(64, 3, input_shape=(-1, embedding_dim)))
13 |         model.add(Activation('relu'))
14 |         model.add(MaxPooling1D(2, 2))
15 |         model.add(Flatten())
16 |         model.add(Dense(512, activation='relu'))
17 |         model.add(Dropout(0.5))
18 |         model.add(Dense(n_class,  activation='softmax'))
19 |         model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=1e-3), metrics=['accuracy'])
20 |         return model
21 | 


--------------------------------------------------------------------------------
/litNlp/model_structure/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CarryChang/litNlp/53bc4df6168fcfdc109afbc4acd0e4838c6899dc/litNlp/model_structure/__init__.py


--------------------------------------------------------------------------------
/litNlp/predict.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time: 2020/6/20 0020 0:55
 3 | from tensorflow.keras.preprocessing.sequence import pad_sequences
 4 | from tensorflow.keras.models import load_model
 5 | import pickle
 6 | class SA_Model_Predict:
 7 |     def __init__(self,tokenize_path, sa_model_path_m, max_len=100):
 8 |         with open(tokenize_path, 'rb') as tokenize_save:
 9 |             self.tokenizer_load = pickle.load(tokenize_save)
10 |         self.max_len = max_len
11 |         self.sa_model_path_m = sa_model_path_m
12 |     def predict(self,predict_text):
13 |         tk_list = [list(text) for text in predict_text]
14 |         test_text = pad_sequences(self.tokenizer_load.texts_to_sequences(tk_list), self.max_len)
15 |         model_load = load_model(self.sa_model_path_m)
16 |         test_proba_list = model_load.predict(test_text)
17 |         return test_proba_list
18 | # if __name__ == '__main__':
19 | #     # 内置参数，批处理文本
20 | #     predict_text = ['这个我不喜欢', '这个我喜欢不']
21 | #     # 初始化模型
22 | #     model = SA_Model_Predict(tokenize_path=tokenizer_path, sa_model_path_m=sa_model_path)
23 | #     sa_score = model.predict(predict_text)
24 | #     # 多分类模型输出
25 | #     print([i[1] for i in sa_score])


--------------------------------------------------------------------------------
/litNlp/train.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time: 2020/6/20 0020 0:46
 3 | from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, ReduceLROnPlateau
 4 | from tensorflow.keras.preprocessing.sequence import pad_sequences
 5 | from sklearn.model_selection import train_test_split
 6 | from tensorflow.keras.preprocessing.text import Tokenizer
 7 | from tensorflow.keras.utils import to_categorical
 8 | from .model_structure.BiLSTM import BILSTM_Model
 9 | from .model_structure.GRU import GRU_Model
10 | from .model_structure.TextCNN_m import TextCNN_m
11 | from sklearn import metrics
12 | import numpy as np
13 | import pickle
14 | class SA_Model_Train:
15 |     def __init__(self, max_words, embedding_dim, maxlen, tokenize_path, sa_model_path_m, train_method=''):
16 |         if train_method == 'gru':
17 |             self.init_model = GRU_Model()
18 |         elif train_method == 'bilstm':
19 |             self.init_model = BILSTM_Model()
20 |         else:
21 |             # 默认textcnn
22 |             self.init_model = TextCNN_m()
23 |         self.max_words = max_words
24 |         self.tokenize_path = tokenize_path
25 |         self.embedding_dim = embedding_dim
26 |         self.maxlen = maxlen
27 |         self.sa_model_path_m = sa_model_path_m
28 |         self.model = self.init_model.create_model(self.max_words, self.embedding_dim, self.maxlen)
29 |     def train_tk(self,train_data):
30 |         tokenizer = Tokenizer(filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', num_words=self.max_words)
31 |         tokenizer.fit_on_texts(train_data)
32 |         with open(self.tokenize_path, 'wb') as tokenize:
33 |             pickle.dump(tokenizer, tokenize)
34 |         return tokenizer
35 |     def train(self,train_data,label,num_classes,batch_size=256,epochs=10,verbose=1,evaluate=True):
36 |         # to_categorical
37 |         targets_values = to_categorical(label, num_classes=num_classes)
38 |         # data split
39 |         x_train, y_train, x_test, y_test = train_test_split(train_data, targets_values, test_size=0.2, random_state=1)
40 |         # pad_sequences
41 |         tokenizer = self.train_tk(train_data)
42 |         x_train, x_test = pad_sequences(tokenizer.texts_to_sequences(x_train), self.maxlen), np.array(x_test)
43 |         y_train, y_test = pad_sequences(tokenizer.texts_to_sequences(y_train), self.maxlen), np.array(y_test)
44 |         self.model.fit(x_train, x_test, batch_size, epochs, verbose,
45 |                        validation_split=0.2, callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.0001)])
46 |         self.model.save(self.sa_model_path_m)
47 |         try:
48 |             pre_result = self.model.predict(y_train, batch_size=256, verbose=0)
49 |         except:
50 |             result_ = self.model.predict_classes(y_train, batch_size=256, verbose=0)
51 |             pre_result = np.argmax(result_, axis=1)
52 |         if evaluate:
53 |             result = [np.argmax(i) for i in pre_result]
54 |             y_test = [np.argmax(i) for i in y_test]
55 |             report = metrics.classification_report(y_test, result)
56 |             acc = metrics.accuracy_score(y_test, result)
57 |             auc = metrics.roc_auc_score(y_test, result)
58 |             print(report)
59 |             print('acc:  {}    auc:  {}'.format(acc, auc))
60 | # if __name__ == '__main__':
61 | #     # C-CNN-SA(字符级卷积网络)
62 | #     train_data = pd.read_csv('data/sa_data_train.csv')
63 | #     # list sentence
64 | #     train_data['text_cut'] = train_data['text'].apply(lambda x: " ".join(list(x)))
65 | #     model = SA()
66 | #     tokenizer = model.train_tk()
67 | #     # 2-8的分割数据,固定测试数据
68 | #     targets_values = to_categorical(train_data['label'], num_classes=num_classes)
69 | #     x_train, y_train, x_test, y_test = train_test_split(train_data['text_cut'],targets_values, test_size=0.2, random_state=1)
70 | #     # pad_sequences
71 | #     x_train, x_test = pad_sequences(tokenizer.texts_to_sequences(x_train), maxlen), np.array(x_test)
72 | #     y_train, y_test = pad_sequences(tokenizer.texts_to_sequences(y_train), maxlen), np.array(y_test)
73 | #     # train
74 | #     pre_result = model.train(x_train, x_test)
75 | #     # evaluate
76 | #     model.evaluate(pre_result, y_test)
77 | 
78 | 
79 | 
80 | 
81 | 
82 | 
83 | 


--------------------------------------------------------------------------------
/pic/auc_2poch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CarryChang/litNlp/53bc4df6168fcfdc109afbc4acd0e4838c6899dc/pic/auc_2poch.png


--------------------------------------------------------------------------------
/pic/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CarryChang/litNlp/53bc4df6168fcfdc109afbc4acd0e4838c6899dc/pic/logo.png


--------------------------------------------------------------------------------
/pic/server.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CarryChang/litNlp/53bc4df6168fcfdc109afbc4acd0e4838c6899dc/pic/server.png


--------------------------------------------------------------------------------
/pic/tools.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CarryChang/litNlp/53bc4df6168fcfdc109afbc4acd0e4838c6899dc/pic/tools.png


--------------------------------------------------------------------------------
/pic/ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CarryChang/litNlp/53bc4df6168fcfdc109afbc4acd0e4838c6899dc/pic/ui.png


--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
1 | tensorflow>=2.0.1
2 | streamlit
3 | docker-compose
4 | scikit-learn
5 | pandas
6 | numpy
7 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import setuptools
 3 | import io
 4 | import os
 5 | import platform
 6 | import subprocess
 7 | import sys
 8 | with io.open('README.md', 'r', encoding='utf-8') as f:
 9 |     long_description = f.read()
10 | REQUIRED_PACKAGES = [
11 |     'h5py', 'requests'
12 | ]
13 | setuptools.setup(
14 |   name="litNlp",
15 |   version="0.8.5",
16 |   packages=['litNlp', 'litNlp.model_structure'],
17 |   author="CarryChang",
18 |   author_email="coolcahng@gmail.com",
19 |   url='https://github.com/CarryChang/litNlp',
20 |   license='https://www.apache.org/licenses/LICENSE-2.0',
21 |   include_package_data=True,
22 |   description='A fast tool for sentiment analysis model with tensorflow2.0 ',
23 |   # long_description='litNlp 是基于 Tensorflow2.0 实现的一个轻量级的深度情感极性推理模型，可以实现细粒度的多级别情感极性训练和预测。 GPU 和 CPU 平台通用，是搭建 NLP 分类模型类 baseline 的快速方案。'
24 |   long_description=long_description,
25 |   long_description_content_type='text/markdown',
26 |   install_requires=REQUIRED_PACKAGES,
27 |   python_requires=">=3.5",
28 |   zip_safe=True,
29 |   classifiers=(
30 |         "License :: OSI Approved :: Apache Software License",
31 |         "Operating System :: OS Independent",
32 |         'Intended Audience :: Developers',
33 |         'Intended Audience :: Education',
34 |         'Intended Audience :: Science/Research',
35 |         'Programming Language :: Python :: 3.5',
36 |         'Programming Language :: Python :: 3.6',
37 |         'Programming Language :: Python :: 3.7',
38 |         'Topic :: Scientific/Engineering',
39 |         'Topic :: Software Development',
40 |         'Topic :: Software Development :: Libraries',
41 |         'Topic :: Software Development :: Libraries :: Python Modules',
42 |     ),
43 |   extras_require={
44 |         "cpu": ["tensorflow>=2.0.1"],
45 |         "gpu": ["tensorflow-gpu>=2.0.1"],
46 |     },
47 |   entry_points={
48 |     },
49 |   keywords=['text classification', 'nlp','batch predict',
50 |               'deep learning', 'tensorflow', 'ml',],
51 | )


--------------------------------------------------------------------------------