├── .gitignore
├── LICENSE
├── README.md
├── calenda.py
├── console.py
├── data_process.py
├── draw.py
├── getMsg.py
├── icon
    ├── icon.ico
    └── icon.png
├── logo
    ├── logo.png
    └── logo2.png
├── main.py
├── save.py
├── show_gui.py
├── solve.py
├── 仓耳与墨 W03.TTF
└── 使用教程
    ├── 使用教程.pdf
    └── 词云形状自定义教程.pdf


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | .idea/
161 | /data/
162 | /聊天记录/
163 | /test.py
164 | /api/
165 | /橙子的聊天记录分析器.zip
166 | /用户数据/
167 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 OOrangeeee
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 微信聊天记录分析
  2 | 
  3 | >写在前面：本程序是本人（在读大二学生）利用课余时间三天内写完的送给我女朋友的小惊喜，碍于课业压力，时间不足和能力有限本程序还有很多漏洞和缺点，任何问题都可以联系我（联系方式写在代码头了），有时间我会更新的，还希望大佬勿喷。
  4 | 
  5 | **详细的使用教程都在"使用教程"文件夹中。**
  6 | 
  7 | ## 软件概述
  8 | 
  9 | >本软件专注于对已获取的微信聊天数据进行深入分析。
 10 | 为了使用本软件的数据分析功能，您首先需要通过专业人士开发的“留痕”软件来爬取微信聊天记录并导出为csv格式。最后的板块内我分享了留痕的一个版本供大家使用并且分享了其仓库链接，请大家多多支持大佬的工作。一旦完成了微信数据的爬取和解密，便可将这些数据导入到我们的软件中，进行全面而详细的分析。
 11 | 我的软件设计用以洞察和解读微信聊天记录，为用户提供深入的数据洞见。（特别适合情侣之间分析聊天记录
 12 | 
 13 | ***注：如果想直接使用程序，请直接跳到最后，查看“运行程序”板块。爬取方式也在那里。生成的图片直接查看或许会失真（尤其是总体的热力图，plt的锅），请在文件夹中查看，是完全正常的。(图片的保存路径是./用户数据/data/src，用户数据文件夹就在exe的同级目录里)***
 14 | 
 15 | 本软件致力于保障用户隐私，所有源代码均可供查阅，确保用户信息及聊天记录的安全性和保密性。我们以橙子和柠檬的聊天数据为例，展现了本软件的数据分析能力。若需更改分析对象，用户可在源代码中搜索并替换相应的名字。本软件提供的数据分析功能包括：
 16 | 
 17 | 1. **聊天记录词频分析**：
 18 |    1. 对两人聊天记录中的高频词进行统计，生成三张精美的词云图（橙子一张，柠檬一张，以及二人共同的一张）。并且可以自定义词云形状。
 19 | 2. **Emoji使用分析**：
 20 |    1. 统计两人最常用的emoji，并通过柱状图进行比较展示。
 21 | 3. **表情包使用分析**：
 22 |    1. 分类统计两人使用的表情包种类，并以柱状图形式展示每人的种类数。
 23 |    2. 比较两人使用表情包的数量，并通过柱状图进行展示。
 24 |    3. 分析并对比两人使用不同表情包的频率，以柱状图形式展示。
 25 | 4. **聊天热度分析**：
 26 |    1. 生成每月的日历形式热力图，分别针对橙子和柠檬，以及他们共同的记录。
 27 |    2. 统计并展示同一时期内的聊天热度变化，以折线图形式展示。
 28 | 5. **聊天时间分析**：
 29 |    1. 分析一天24小时内两人的聊天活跃度，并生成相应的热力图（橙子一张，柠檬一张，以及共同的一张）。
 30 | 6. **聊天情感分析**：
 31 |    1. 利用百度智能云的语句情感分析API，对聊天记录进行情感分析，并生成三张饼图（橙子一张，柠檬一张，以及共同的一张）。
 32 | 
 33 | 所有分析结果和数据均可在根目录下的**用户数据**文件夹中查阅。这些功能旨在为用户提供深入、全面的聊天数据分析，帮助用户更好地理解和存储宝贵的交流信息。
 34 | 
 35 | ## 更新说明
 36 | 
 37 | **在2.0版本的重大更新后，程序现已实现全面优化，完美适配各类聊天记录。用户能够灵活地输入自己的姓名，并分析任意时间段的聊天内容，不再受限于之前的四个月时间限制。此外，程序还提供了便捷的情感分析功能选择，用户可根据需要轻松输入API相关信息进行分析。总的来说，经过全面升级的2.0版本，终于成为了一个真正优秀的产品。这一过程告诫我们，优化代码的重要性不容忽视，希望大家能从中汲取教训，千万不要把代码写死。**
 38 | 
 39 | **由于大家很喜欢本程序，故本人抽出时间更新了3.0版本。本版本完全实现程序的可视化（终于不再是该死的命令行了），并且大家可以自由选择词云的形状。**
 40 | 
 41 | ## 环境配置
 42 | 
 43 | 关于软件环境的配置，我有几点建议想与你分享。首先，我不建议你完全依赖导出的环境配置来搭建自己的系统。在实际操作中，**根据自己的需求逐步补充所缺少的组件往往是一个更为高效和灵活的方法**。原因很简单：每个人的需求和环境都是独一无二的，盲目跟随他人的配置可能并不适合你。
 44 | 
 45 | 至于为什么不提供一个自动化的环境配置程序，诚实地说，这主要是因为我有些懒惰。编写这样的程序需要花费大量的时间和精力，而且往往难以满足所有人的需求。
 46 | 
 47 | 如果你不想深入自定义名称、月份等参数（这需要修改代码，尽管只是几个参数，但前提是你需要有足够的专业知识来阅读和理解代码），你可以选择使用我预先打包好的程序。我已经尽可能地将其配置得简单易用，希望能够满足你的基本需求。
 48 | 
 49 | 感谢你的理解和支持！希望我的建议对你有所帮助。
 50 | 
 51 | ``` bash
 52 | # This file may be used to create an environment using:
 53 | # $ conda create --name <env> --file <this file>
 54 | # platform: win-64
 55 | altgraph=0.17.3=py39haa95532_0
 56 | blas=1.0=mkl
 57 | bottleneck=1.3.5=py39h080aedc_0
 58 | brotli=1.0.9=h2bbff1b_7
 59 | brotli-bin=1.0.9=h2bbff1b_7
 60 | ca-certificates=2023.12.12=haa95532_0
 61 | contourpy=1.2.0=py39h59b6b97_0
 62 | cycler=0.11.0=pyhd3eb1b0_0
 63 | et_xmlfile=1.1.0=py39haa95532_0
 64 | fonttools=4.25.0=pyhd3eb1b0_0
 65 | freetype=2.12.1=ha860e81_0
 66 | future=0.18.3=py39haa95532_0
 67 | giflib=5.2.1=h8cc25b3_3
 68 | icc_rt=2022.1.0=h6049295_2
 69 | icu=73.1=h6c2663c_0
 70 | importlib_resources=6.1.1=py39haa95532_1
 71 | intel-openmp=2023.1.0=h59b6b97_46320
 72 | joblib=1.2.0=py39haa95532_0
 73 | jpeg=9e=h2bbff1b_1
 74 | kiwisolver=1.4.4=py39hd77b12b_0
 75 | krb5=1.20.1=h5b6d351_0
 76 | lerc=3.0=hd77b12b_0
 77 | libbrotlicommon=1.0.9=h2bbff1b_7
 78 | libbrotlidec=1.0.9=h2bbff1b_7
 79 | libbrotlienc=1.0.9=h2bbff1b_7
 80 | libclang=14.0.6=default_hb5a9fac_1
 81 | libclang13=14.0.6=default_h8e68704_1
 82 | libdeflate=1.17=h2bbff1b_1
 83 | libpng=1.6.39=h8cc25b3_0
 84 | libpq=12.15=h906ac69_1
 85 | libtiff=4.5.1=hd77b12b_0
 86 | libwebp=1.3.2=hbc33d0d_0
 87 | libwebp-base=1.3.2=h2bbff1b_0
 88 | lz4-c=1.9.4=h2bbff1b_0
 89 | matplotlib=3.8.0=py39haa95532_0
 90 | matplotlib-base=3.8.0=py39h4ed8f06_0
 91 | mkl=2023.1.0=h6b88ed4_46358
 92 | mkl-service=2.4.0=py39h2bbff1b_1
 93 | mkl_fft=1.3.8=py39h2bbff1b_0
 94 | mkl_random=1.2.4=py39h59b6b97_0
 95 | munkres=1.1.4=py_0
 96 | numexpr=2.8.7=py39h2cd9be0_0
 97 | numpy=1.26.3=py39h055cbcc_0
 98 | numpy-base=1.26.3=py39h65a83cf_0
 99 | openjpeg=2.4.0=h4fc8c34_0
100 | openpyxl=3.0.10=py39h2bbff1b_0
101 | openssl=3.0.12=h2bbff1b_0
102 | packaging=23.1=py39haa95532_0
103 | pandas=1.2.4=pypi_0
104 | pefile=2022.5.30=py39haa95532_0
105 | pillow=10.0.1=py39h045eedc_0
106 | pip=23.3.1=py39haa95532_0
107 | ply=3.11=py39haa95532_0
108 | pyinstaller=5.13.2=py39h2bbff1b_0
109 | pyinstaller-hooks-contrib=2022.14=py39haa95532_0
110 | pyparsing=3.0.9=py39haa95532_0
111 | pyqt=5.15.10=py39hd77b12b_0
112 | pyqt5-sip=12.13.0=py39h2bbff1b_0
113 | python=3.9.18=h1aa4202_0
114 | python-dateutil=2.8.2=pyhd3eb1b0_0
115 | python-tzdata=2023.3=pyhd3eb1b0_0
116 | pytz=2023.3.post1=py39haa95532_0
117 | pywin32=305=py39h2bbff1b_0
118 | pywin32-ctypes=0.2.0=py39haa95532_1000
119 | qt-main=5.15.2=h19c9488_10
120 | scikit-learn=1.3.0=py39h4ed8f06_0
121 | scipy=1.11.4=py39h309d312_0
122 | seaborn=0.12.2=py39haa95532_0
123 | setuptools=68.2.2=py39haa95532_0
124 | sip=6.7.12=py39hd77b12b_0
125 | six=1.16.0=pyhd3eb1b0_1
126 | sqlite=3.41.2=h2bbff1b_0
127 | tbb=2021.8.0=h59b6b97_0
128 | threadpoolctl=2.2.0=pyh0d69192_0
129 | tk=8.6.12=h2bbff1b_0
130 | tomli=2.0.1=py39haa95532_0
131 | tornado=6.3.3=py39h2bbff1b_0
132 | tzdata=2023d=h04d1e81_0
133 | vc=14.2=h21ff451_1
134 | vs2015_runtime=14.27.29016=h5e58377_2
135 | wheel=0.41.2=py39haa95532_0
136 | wordcloud=1.9.2=py39h2bbff1b_0
137 | xz=5.4.5=h8cc25b3_0
138 | zipp=3.17.0=py39haa95532_0
139 | zlib=1.2.13=h8cc25b3_0
140 | zstd=1.5.5=hd43e919_0
141 | absl-py==0.15.0
142 | altgraph @ file:///C:/b/abs_f2edualeyv/croot/altgraph_1670426107695/work
143 | astunparse==1.6.3
144 | baidu-aip==4.16.13
145 | Bottleneck @ file:///C:/Windows/Temp/abs_3198ca53-903d-42fd-87b4-03e6d03a8381yfwsuve8/croots/recipe/bottleneck_1657175565403/work
146 | cachetools==5.3.2
147 | certifi==2023.11.17
148 | chardet==3.0.4
149 | charset-normalizer==3.3.2
150 | clang==5.0
151 | click==8.1.7
152 | colorama==0.4.6
153 | contourpy @ file:///C:/b/abs_853rfy8zse/croot/contourpy_1700583617587/work
154 | cycler @ file:///tmp/build/80754af9/cycler_1637851556182/work
155 | et-xmlfile==1.1.0
156 | filelock==3.9.0
157 | fire==0.5.0
158 | flatbuffers==1.12
159 | fonttools==4.25.0
160 | fsspec==2023.4.0
161 | future @ file:///C:/b/abs_3dcibf18zi/croot/future_1677599891380/work
162 | gast==0.4.0
163 | google-auth==2.25.2
164 | google-auth-oauthlib==1.2.0
165 | google-pasta==0.2.0
166 | grpcio==1.60.0
167 | h11==0.9.0
168 | h2==3.2.0
169 | h5py==3.1.0
170 | hpack==3.0.0
171 | hstspreload==2024.1.5
172 | httpcore==0.9.1
173 | httpx==0.13.3
174 | hyperframe==5.2.0
175 | icon-font-to-png==0.4.1
176 | idna==2.10
177 | importlib-metadata==7.0.0
178 | importlib-resources @ file:///C:/b/abs_d0dmp77t95/croot/importlib_resources-suite_1704281892795/work
179 | install==1.3.5
180 | jieba==0.42.1
181 | Jinja2==3.1.2
182 | joblib==1.3.2
183 | Keras-Preprocessing==1.1.2
184 | kiwisolver @ file:///C:/b/abs_88mdhvtahm/croot/kiwisolver_1672387921783/work
185 | libclang==16.0.6
186 | libretranslatepy==2.1.1
187 | lxml==5.1.0
188 | Markdown==3.5.1
189 | MarkupSafe==2.1.3
190 | matplotlib @ file:///C:/b/abs_e26vnvd5s1/croot/matplotlib-suite_1698692153288/work
191 | mkl-fft @ file:///C:/b/abs_19i1y8ykas/croot/mkl_fft_1695058226480/work
192 | mkl-random @ file:///C:/b/abs_edwkj1_o69/croot/mkl_random_1695059866750/work
193 | mkl-service==2.4.0
194 | mpmath==1.3.0
195 | munkres==1.1.4
196 | networkx==3.0
197 | numexpr @ file:///C:/b/abs_5fucrty5dc/croot/numexpr_1696515448831/work
198 | numpy @ file:///C:/b/abs_16b2j7ad8n/croot/numpy_and_numpy_base_1704311752418/work/dist/numpy-1.26.3-cp39-cp39-win_amd64.whl#sha256=02e606e23ca31bb00a40d147fd1ce4dd7d241395346a4196592d5abe54a333bc
199 | oauthlib==3.2.2
200 | openpyxl==3.0.10
201 | opt-einsum==3.3.0
202 | packaging @ file:///C:/b/abs_28t5mcoltc/croot/packaging_1693575224052/work
203 | palettable==3.3.3
204 | pandas==1.2.4
205 | pefile @ file:///C:/b/abs_feg_7trsni/croot/pefile_1670877329726/work
206 | Pillow==9.3.0
207 | ply==3.11
208 | pyasn1==0.5.1
209 | pyasn1-modules==0.3.0
210 | pyinstaller @ file:///C:/b/abs_b94gi_3vjm/croot/pyinstaller_1703109616045/work
211 | pyinstaller-hooks-contrib @ file:///C:/b/abs_c2hemrb3nh/croot/pyinstaller-hooks-contrib_1670877320457/work
212 | pyparsing @ file:///C:/Users/BUILDE~1/AppData/Local/Temp/abs_7f_7lba6rl/croots/recipe/pyparsing_1661452540662/work
213 | PyQt5==5.15.10
214 | PyQt5-sip @ file:///C:/b/abs_c0pi2mimq3/croot/pyqt-split_1698769125270/work/pyqt_sip
215 | python-dateutil==2.8.2
216 | pytz==2023.3.post1
217 | pywin32==305.1
218 | pywin32-ctypes @ file:///C:/ci/pywin32-ctypes_1607553594546/work
219 | requests==2.31.0
220 | requests-oauthlib==1.3.1
221 | rfc3986==1.5.0
222 | rsa==4.9
223 | scikit-learn==1.3.2
224 | scipy==1.11.4
225 | seaborn @ file:///C:/b/abs_68ltdkoyoo/croot/seaborn_1673479199997/work
226 | sip @ file:///C:/b/abs_edevan3fce/croot/sip_1698675983372/work
227 | six==1.15.0
228 | sniffio==1.3.0
229 | stylecloud==0.5.2
230 | sympy==1.12
231 | tensorboard==2.15.1
232 | tensorboard-data-server==0.7.2
233 | tensorflow-estimator==2.15.0
234 | tensorflow-gpu==2.6.0
235 | tensorflow-io-gcs-filesystem==0.31.0
236 | termcolor==1.1.0
237 | threadpoolctl==3.2.0
238 | tinycss==0.4
239 | tomli @ file:///C:/Windows/TEMP/abs_ac109f85-a7b3-4b4d-bcfd-52622eceddf0hy332ojo/croots/recipe/tomli_1657175513137/work
240 | torch==2.1.2+cu118
241 | torchaudio==2.1.2+cu118
242 | torchvision==0.16.2+cu118
243 | tornado @ file:///C:/b/abs_0cbrstidzg/croot/tornado_1696937003724/work
244 | translate==3.6.1
245 | typing-extensions==3.7.4.3
246 | tzdata==2023.3
247 | urllib3==2.1.0
248 | Werkzeug==3.0.1
249 | wordcloud @ file:///C:/b/abs_66ccn47hik/croot/wordcloud_1687301655958/work
250 | wrapt==1.12.1
251 | zipp==3.17.0
252 | ```
253 | 
254 | 如需配置环境可参考上方文档。
255 | 
256 | ## 运行代码
257 | 
258 | clone后在clone的文件夹下打开cmd，输入以下代码：
259 | 
260 | ```bash
261 | python
262 | main.py
263 | ```
264 | 
265 | 随后根据软件提示使用即可。
266 | 
267 | ## 运行程序
268 | 
269 | 如果有不懂代码的朋友想使用此软件，本软件也贴心的准备好了打包好的程序，只需要前往此链接，下载压缩包后解压根目录下的**橙子的聊天记录分析器**压缩包，进入**橙子的聊天记录分析器**文件夹中启动**橙子的聊天记录分析器.exe**即可。
270 | 
271 | **请注意，此百度网盘链接里还有完整的使用教程，大家可以阅读一下**
272 | 
273 | 为了帮助大家使用，其中还包含了大佬的爬取微信聊天记录的软件（留痕），请大家多多支持大佬的工作。
274 | 
275 | ```bash
276 | 链接：https://pan.baidu.com/s/1zRRj7sT3snTyQsQFfwD_RA?pwd=0714 
277 | 提取码：0714 
278 | --来自百度网盘超级会员V6的分享
279 | ```
280 | 
281 | 欢迎大家使用此软件分析聊天记录。
282 | 
283 | 另外为了方便大家爬取解密微信聊天记录在此附上大佬的git仓库，用于爬取数据：
284 | 
285 | ```bash
286 | https://github.com/LC044/WeChatMsg
287 | ```
288 | 
289 | 2024.1.20 橙子
290 | 
291 | >2024.1.25更新
292 | >2024.1.26更新
293 | >2024.1.27更新
294 | >2024.1.30更新
295 | >2024.2.3更新
296 | >2024.2.7更新


--------------------------------------------------------------------------------
/calenda.py:
--------------------------------------------------------------------------------
 1 | # 最后编辑：
 2 | # 晋晨曦 2024.1.26 15.46
 3 | # qq：2950171570
 4 | # email：Jin0714@outlook.com  回复随缘
 5 | from calendar import monthrange
 6 | from numpy import ones
 7 | 
 8 | 
 9 | def generate_calendar(year, month):
10 |     """
11 |     返回日历样式的numpy数组
12 |     :param year: 年份
13 |     :param month: 月份
14 |     :return: numpy数组
15 |     """
16 |     first_day_weekday, month_days = monthrange(year, month)
17 |     rows_needed = ((first_day_weekday + month_days - 1) // 7) + 1
18 |     calendar_array = ones((rows_needed, 7), dtype=int)
19 |     day_counter = 1
20 | 
21 |     for week in range(rows_needed):
22 |         for day in range(7):
23 |             if week > 0 or day >= first_day_weekday:
24 |                 if day_counter <= month_days:
25 |                     calendar_array[week][day] = 0
26 |                 day_counter += 1
27 | 
28 |     return calendar_array
29 | 
30 | 
31 | def get_month_dates(year1, month1, year2, month2):
32 |     """
33 |     返回时间范围
34 |     :param year1: 开始年
35 |     :param month1: 开始月
36 |     :param year2: 结束年
37 |     :param month2: 结束月
38 |     :return: 时间范围
39 |     """
40 |     first_date = f"{year1}-{str(month1).zfill(2)}-01"
41 | 
42 |     last_day = monthrange(year2, month2)[1]
43 |     last_date = f"{year2}-{str(month2).zfill(2)}-{last_day}"
44 | 
45 |     return first_date, last_date
46 | 


--------------------------------------------------------------------------------
/console.py:
--------------------------------------------------------------------------------
 1 | from ctypes import WinDLL
 2 | 
 3 | 
 4 | def no_con():
 5 |     kernel32 = WinDLL("kernel32")
 6 |     user32 = WinDLL("user32")
 7 |     HWND = kernel32.GetConsoleWindow()
 8 |     if HWND != 0:
 9 |         user32.ShowWindow(HWND, 0)
10 | 


--------------------------------------------------------------------------------
/data_process.py:
--------------------------------------------------------------------------------
 1 | # 最后编辑：
 2 | # 晋晨曦 2024.1.26 15:46
 3 | # qq：2950171570
 4 | # email：Jin0714@outlook.com  回复随缘
 5 | from re import sub
 6 | 
 7 | 
 8 | def process_data(df):
 9 |     """
10 |     处理数据
11 |     :param df:原始数据
12 |     :return:处理好的数据
13 |     """
14 | 
15 |     # 提取数据
16 |     columns = ["StrTime", "StrContent", "IsSender"]
17 |     df = df[columns].copy()
18 |     df.rename(columns={"StrTime": "time", "StrContent": "data"}, inplace=True)
19 | 
20 |     # 清洗数据
21 |     df = df.dropna(subset=["data"])
22 |     df_x = df.copy()
23 |     df["data"] = df["data"].astype(str)
24 | 
25 |     # 分类数据
26 |     is_sender_1 = df["IsSender"] == 1
27 |     is_sender_0 = df["IsSender"] == 0
28 |     j_df = df[is_sender_1].copy()
29 |     n_df = df[is_sender_0].copy()
30 |     j_df = j_df.drop("IsSender", axis=1)
31 |     n_df = n_df.drop("IsSender", axis=1)
32 |     df_x = df_x.drop("IsSender", axis=1)
33 | 
34 |     df_x = df_x[df_x["data"].apply(not_start_with_msg)]
35 |     df_x["data"] = df_x["data"].apply(remove_bracketed_text_and_count_all)
36 |     df_x = df_x[df_x["data"].apply(len) > 0]
37 | 
38 |     j_df["time"] = j_df["time"].str.replace("/", "-")
39 |     n_df["time"] = n_df["time"].str.replace("/", "-")
40 |     df_x["time"] = df_x["time"].str.replace("/", "-")
41 | 
42 |     j_df = j_df.reset_index(drop=True)
43 |     n_df = n_df.reset_index(drop=True)
44 |     df_x = df_x.reset_index(drop=True)
45 | 
46 |     return j_df, n_df, df_x
47 | 
48 | 
49 | def not_start_with_msg(value):
50 |     """
51 |     判断是否不以<开头
52 |     :param value: 文本
53 |     :return: 不以<开头为true，否则flase
54 |     """
55 |     return not value.startswith("<")
56 | 
57 | 
58 | def remove_bracketed_text_and_count_all(s):
59 |     """
60 |     删除所有聊天记录[]中文字
61 |     :param s:语句
62 |     :return:删除后的语句
63 |     """
64 |     return sub(r"\[.*?\]", "", s)
65 | 


--------------------------------------------------------------------------------
/draw.py:
--------------------------------------------------------------------------------
  1 | # 最后编辑：
  2 | # 晋晨曦 2024.2.2 17.13
  3 | # qq：2950171570
  4 | # email：Jin0714@outlook.com  回复随缘
  5 | import matplotlib.pyplot as plt
  6 | from pandas import Series
  7 | from stylecloud import gen_stylecloud
  8 | from os import path
  9 | from os import remove
 10 | from seaborn import heatmap
 11 | from math import ceil
 12 | 
 13 | 
 14 | class draw_data:
 15 |     def __init__(self, name1, name2):
 16 |         """
 17 |         构造函数，初始化一些可能需要的属性。
 18 |         """
 19 |         self.name1 = name1
 20 |         self.name2 = name2
 21 |         pass
 22 | 
 23 |     def __str__(self):
 24 |         """
 25 |         字符串表示，用于打印对象时提供有用的信息。
 26 |         """
 27 |         return "draw_data类实例，用于可视化数据"
 28 |         pass
 29 | 
 30 |     # 绘制emoji
 31 | 
 32 |     def split_dict(self, original_dict, size):
 33 |         """
 34 |         划分字典
 35 |         :param original_dict: 初始字典
 36 |         :param size: 划分数量
 37 |         :return: 一个列表，包含划分后的字典
 38 |         """
 39 |         keys = list(original_dict.keys())
 40 |         split_dicts = []
 41 | 
 42 |         for i in range(0, len(keys), size):
 43 |             subset_keys = keys[i : i + size]
 44 |             new_dict = {key: original_dict[key] for key in subset_keys}
 45 |             split_dicts.append(new_dict)
 46 | 
 47 |         return split_dicts
 48 | 
 49 |     def draw_emoji(self, dict1, dict2, max_count):
 50 |         """
 51 |         批量画emoji图的驱动函数
 52 |         :param dict1:name1emoji字典
 53 |         :param dict2:name2emoji字典
 54 |         :return:无
 55 |         """
 56 |         length = len(dict1)
 57 |         num_20 = length // 20
 58 |         if length % 20 != 0:
 59 |             num_20 += 1
 60 |         j = self.split_dict(dict1, 20)
 61 |         n = self.split_dict(dict2, 20)
 62 |         for x in range(num_20):
 63 |             self.draw_emoji_tool(j[x], n[x], x, max_count)
 64 | 
 65 |     def draw_emoji_tool(self, dict1, dict2, num, max_count):
 66 |         """
 67 |         画emoji图的工作函数
 68 |         :param dict1:name1emoji字典
 69 |         :param dict2:name2emoji字典
 70 |         :param num:第几个图
 71 |         :return:无
 72 |         """
 73 |         # 提取键和值
 74 | 
 75 |         keys = list(dict1.keys())
 76 |         values1 = list(dict1.values())
 77 |         values2 = list(dict2.values())
 78 | 
 79 |         plt.figure(figsize=(10, 6))
 80 | 
 81 |         x = range(len(keys))
 82 |         width = 0.35
 83 | 
 84 |         plt.ylim(0, max_count)
 85 |         plt.bar(
 86 |             [i - width / 2 for i in x],
 87 |             values1,
 88 |             width=width,
 89 |             label=self.name1,
 90 |             color="orange",
 91 |             edgecolor="black",
 92 |         )
 93 |         plt.bar(
 94 |             [i + width / 2 for i in x],
 95 |             values2,
 96 |             width=width,
 97 |             label=self.name2,
 98 |             color="yellow",
 99 |             edgecolor="black",
100 |         )
101 | 
102 |         title = "emoji统计！num " + str(num + 1) + " !"
103 |         plt.xlabel("emoji类型")
104 |         plt.ylabel("频率")
105 |         plt.title(title)
106 |         plt.xticks(x, keys)
107 |         plt.legend()
108 | 
109 |         filepath = "./用户数据/data/src/emoji/" + title + ".png"
110 |         plt.savefig(filepath, format="png")
111 | 
112 |         plt.tight_layout()
113 | 
114 |     # 绘制表情包
115 | 
116 |     def union_bqb(self, j_df, n_df):
117 |         """
118 |         扩写df
119 |         :param j_df: 晋晨曦df
120 |         :param n_df: 宁静df
121 |         :return: 返回结果
122 |         """
123 |         data_union = Series(list(set(j_df["data"]).union(set(n_df["data"]))))
124 | 
125 |         df1_extended = data_union.to_frame(name="data").merge(
126 |             j_df, on="data", how="left"
127 |         )
128 |         df2_extended = data_union.to_frame(name="data").merge(
129 |             n_df, on="data", how="left"
130 |         )
131 | 
132 |         df1_extended["count"].fillna(0, inplace=True)
133 |         df2_extended["count"].fillna(0, inplace=True)
134 | 
135 |         df1_extended.sort_values(by="data", inplace=True)
136 |         df2_extended.sort_values(by="data", inplace=True)
137 |         return df1_extended, df2_extended
138 | 
139 |     def split_dataframe(self, df, n_parts):
140 |         """
141 |         划分表情包
142 |         :param df:划分对象
143 |         :param n_parts:划分几部分
144 |         :return:划分后的df列表
145 |         """
146 |         part_size = len(df) // n_parts
147 | 
148 |         split_dfs = [df[i * part_size : (i + 1) * part_size] for i in range(n_parts)]
149 | 
150 |         return split_dfs
151 | 
152 |     def draw_bqb(self, bqb_j, bqb_n, max_count):
153 |         """
154 |         画表情包的图
155 |         :param bqb_j: name1表情包df
156 |         :param bqb_n: name2表情包df
157 |         :return: 无
158 |         """
159 |         # 画种类
160 |         self.draw_bqb_kinds(bqb_j, bqb_n)
161 |         # 画数量
162 |         self.draw_bqb_count(bqb_j, bqb_n, max_count)
163 |         # 细分画
164 |         bqb_j, bqb_n = self.union_bqb(bqb_j, bqb_n)
165 |         count = len(bqb_n) // 28
166 |         if len(bqb_n) % 28 != 0:
167 |             count += 1
168 |         dfs_j = self.split_dataframe(bqb_j, count)
169 |         dfs_l = self.split_dataframe(bqb_n, count)
170 |         for i in range(count):
171 |             self.draw_bqb_details(dfs_j[i], dfs_l[i], i, max_count)
172 | 
173 |     def draw_bqb_kinds(self, df1, df2):
174 |         """
175 |         画图表情包种类
176 |         :param df1:name1df
177 |         :param df2:name2df
178 |         :return:无
179 |         """
180 |         unique_count_df1 = df1["data"].nunique()
181 |         unique_count_df2 = df2["data"].nunique()
182 | 
183 |         labels = [self.name1, self.name2]
184 |         counts = [unique_count_df1, unique_count_df2]
185 | 
186 |         plt.figure(figsize=(10, 6))
187 | 
188 |         plt.bar(labels, counts, color=["orange", "yellow"], edgecolor="black")
189 | 
190 |         title = "表情包种数统计!"
191 |         plt.title(title)
192 |         plt.xlabel("对象")
193 |         plt.ylabel("用的表情包种数")
194 |         filepath = "./用户数据/data/src/表情包/" + title + ".png"
195 |         plt.savefig(filepath, format="png")
196 | 
197 |     def draw_bqb_count(self, df1, df2, max_count):
198 |         """
199 |         表情包数目
200 |         :param df1:name1数目
201 |         :param df2:name2数目
202 |         :return:
203 |         """
204 |         total_count_df1 = df1["count"].sum()
205 |         total_count_df2 = df2["count"].sum()
206 | 
207 |         labels = [self.name1, self.name2]
208 |         counts = [total_count_df1, total_count_df2]
209 | 
210 |         plt.figure(figsize=(10, 6))
211 | 
212 |         plt.bar(labels, counts, color=["orange", "yellow"], edgecolor="black")
213 | 
214 |         title = "表情包数量统计!"
215 |         plt.title(title)
216 |         plt.xlabel("对象")
217 |         plt.ylabel("用的表情包数量")
218 |         filepath = "./用户数据/data/src/表情包/" + title + ".png"
219 |         plt.savefig(filepath, format="png")
220 | 
221 |     def draw_bqb_details(self, df1, df2, num, max_count):
222 |         """
223 |         画表情包图
224 |         :param df1: name1df子集
225 |         :param df2: name2df子集
226 |         :param num: 第几个
227 |         :return: 无
228 |         """
229 |         index_data = list(df1["data"])
230 | 
231 |         counts_df1 = {
232 |             data: df1[df1["data"] == data]["count"].sum() for data in index_data
233 |         }
234 |         counts_df2 = {
235 |             data: df2[df2["data"] == data]["count"].sum() for data in index_data
236 |         }
237 | 
238 |         x_labels = range(1, len(index_data) + 1)
239 | 
240 |         counts1 = [counts_df1.get(data, 0) for data in index_data]
241 |         counts2 = [counts_df2.get(data, 0) for data in index_data]
242 | 
243 |         plt.figure(figsize=(20, 6))
244 | 
245 |         plt.ylim(0, max_count)
246 | 
247 |         plt.bar(
248 |             [x + 0.05 for x in x_labels],
249 |             counts1,
250 |             color="orange",
251 |             width=0.3,
252 |             label=self.name1,
253 |             edgecolor="black",
254 |         )
255 |         plt.bar(
256 |             [x + 0.35 for x in x_labels],
257 |             counts2,
258 |             color="yellow",
259 |             width=0.3,
260 |             label=self.name2,
261 |             edgecolor="black",
262 |         )
263 | 
264 |         title = "不同表情包使用频率 num " + str(num + 1) + " !"
265 |         plt.title(title)
266 |         plt.xlabel("表情包编号")
267 |         plt.ylabel("频率")
268 | 
269 |         plt.xticks([x + 0.2 for x in x_labels], [str(x) for x in x_labels])
270 | 
271 |         plt.legend()
272 | 
273 |         filepath = "./用户数据/data/src/表情包/" + title + ".png"
274 |         plt.savefig(filepath, format="png")
275 | 
276 |     # 绘制词云
277 | 
278 |     def draw_word_cloud(self, df, shape, mode):
279 |         """
280 |         画词云
281 |         :param df: 数据
282 |         :param shape: 形状
283 |         :param mode: 模式
284 |         :return: 无
285 |         """
286 |         if len(df) >= 200:
287 |             df_using = df.head(200)
288 |         else:
289 |             df_using = df
290 |         f_path = "temp.csv"
291 |         df_using.to_csv(f_path, index=False)
292 |         output_path = "./用户数据/data/src/word/" + mode + "词云.png"
293 |         gen_stylecloud(
294 |             file_path=f_path,
295 |             size=1920,
296 |             icon_name=shape,
297 |             palette="colorbrewer.diverging.Spectral_11",
298 |             background_color="black",
299 |             max_words=len(df_using),
300 |             max_font_size=120,
301 |             font_path="仓耳与墨 W03.TTF",
302 |             output_name=output_path,
303 |         )
304 |         if path.exists(f_path):
305 |             remove(f_path)
306 |         else:
307 |             print("完蛋")
308 | 
309 |     # 绘制热力图和变化趋势
310 | 
311 |     def draw_heatmap_all(self, rili_dfs, title, masks, length, months, max_count):
312 |         """
313 |         统调子图函数
314 |         :param rili_dfs: 日历df
315 |         :param title: 标题
316 |         :param masks: 遮罩
317 |         :return: 无
318 |         """
319 |         for i in range(0, length):
320 |             self.draw_heatmap_small(rili_dfs[i], title, masks[i], months[i], max_count)
321 |             pass
322 | 
323 |     def draw_heatmap_small(self, rili_df, title, mask, month, max_count):
324 |         """
325 |         画小图像
326 |         :param rili_df:日历
327 |         :param title:标题
328 |         :param mask:遮罩
329 |         :param month:月份
330 |         :return:
331 |         """
332 |         year = month[0]
333 |         time = month[1]
334 |         plt.figure(figsize=(5, 5))
335 |         plt.title(title + "的聊天热力图 " + str(year) + " " + str(time) + "月 版!!")
336 |         data = rili_df
337 |         data = data.fillna(0)
338 |         data = data.astype(int)
339 |         heatmap(
340 |             data=data,
341 |             mask=mask,
342 |             vmax=max_count,
343 |             vmin=0,
344 |             cmap="YlOrRd",
345 |             linewidths=0.5,
346 |             linecolor="white",
347 |             cbar=True,
348 |             cbar_kws={"label": "信息条数"},
349 |         )
350 |         counter = 1
351 |         for y in range(data.shape[0]):
352 |             for x in range(data.shape[1]):
353 |                 if not mask[y, x]:  # 如果格子未被遮罩
354 |                     plt.text(
355 |                         x + 0.5,
356 |                         y + 0.5,
357 |                         str(counter),
358 |                         ha="center",
359 |                         va="center",
360 |                         color="black",
361 |                     )
362 |                     counter += 1
363 |         plt.yticks([])
364 |         plt.tight_layout()
365 |         filepath = (
366 |             "./用户数据/data/src/热力图/"
367 |             + title
368 |             + "的聊天热力图 "
369 |             + str(year)
370 |             + str(time)
371 |             + "月 版!!.png"
372 |         )
373 |         plt.savefig(filepath, format="png")
374 | 
375 |     def draw_heatmap_big(self, rili_dfs, title, masks, length, months, max_count):
376 |         """
377 |         画热力图总览
378 |         :param rili_dfs: 日历
379 |         :param title: 标题
380 |         :param masks: 遮罩
381 |         :param length: 数量
382 |         :param months: 月份
383 |         :param max_count: 最大计数
384 |         """
385 |         rows = ceil(length / 4)
386 |         cols = 4
387 |         fig, axes = plt.subplots(rows, cols, figsize=(10, 5 * rows))
388 | 
389 |         if rows == 1 or cols == 1:
390 |             axes = axes.reshape(rows, cols)
391 | 
392 |         for i in range(rows * cols):
393 |             row = i // cols
394 |             col = i % cols
395 |             ax = axes[row, col]
396 | 
397 |             # 如果 i 小于 length，则绘制子图，否则隐藏该子图
398 |             if i < length:
399 |                 data = rili_dfs[i]
400 |                 data = data.fillna(0)
401 |                 data = data.astype(int)
402 |                 heatmap(
403 |                     data=data,
404 |                     mask=masks[i],
405 |                     vmax=max_count,
406 |                     vmin=0,
407 |                     cmap="YlOrRd",
408 |                     linewidths=0.5,
409 |                     linecolor="white",
410 |                     ax=ax,
411 |                     cbar=False,
412 |                 )
413 |                 counter = 1
414 |                 for y in range(data.shape[0]):
415 |                     for x in range(data.shape[1]):
416 |                         if not masks[i][y, x]:  # 如果格子未被遮罩
417 |                             ax.text(
418 |                                 x + 0.5,
419 |                                 y + 0.5,
420 |                                 str(counter),
421 |                                 ha="center",
422 |                                 va="center",
423 |                                 color="black",
424 |                             )
425 |                             counter += 1
426 |                 ax.set_title(f"{months[i][0]} 年 {months[i][1]} 月")
427 |                 ax.set_yticklabels([])
428 |                 ax.set_aspect("equal")
429 |             else:
430 |                 ax.axis("off")
431 | 
432 |         fig.text(0.5, 0.01, title + " 的聊天热力图总览！！", ha="center", fontsize=15)
433 |         cbar_ax = fig.add_axes([0.2, 0.90, 0.6, 0.07])
434 |         norm = plt.Normalize(vmin=0, vmax=max_count)
435 |         sm = plt.cm.ScalarMappable(cmap="YlOrRd", norm=norm)
436 |         fig.colorbar(sm, cax=cbar_ax, orientation="horizontal", label="信息条数")
437 |         plt.subplots_adjust(bottom=0.1)
438 |         filepath = "./用户数据/data/src/热力图/" + title + "聊天热力图总览.png"
439 |         plt.savefig(filepath, format="png")
440 | 
441 |     def draw_heat_how(self, df, title, max_count):
442 |         """
443 |         热度变化趋势
444 |         :param df: 数据
445 |         :param title: 标题
446 |         :return: 无
447 |         """
448 |         plt.figure(figsize=(15, 6))
449 |         plt.ylim(0, max_count)
450 |         plt.plot(df.index, df["counts"], marker="o")
451 | 
452 |         plt.title(title + "聊天热度变化趋势")
453 |         plt.xlabel("时间")
454 |         plt.ylabel("热度")
455 | 
456 |         # plt.grid(True)
457 |         filepath = "./用户数据/data/src/热力图/" + title + "聊天热度变化趋势.png"
458 |         plt.savefig(filepath, format="png")
459 | 
460 |     def draw_time_heat(self, time_df, title, max_count):
461 |         """
462 |         画时间热力图
463 |         :param time_df: 图数据
464 |         :param title: 标题
465 |         :return: 无
466 |         """
467 |         plt.figure(figsize=(10, 3))
468 |         plt.title(title + "的聊天时间分布热力图!!")
469 |         data = time_df
470 |         data = data.fillna(0)
471 |         data = data.astype(int)
472 |         heatmap(
473 |             data=data,
474 |             vmax=max_count,
475 |             vmin=0,
476 |             cmap="YlOrRd",
477 |             linewidths=0.5,
478 |             linecolor="white",
479 |             cbar=True,
480 |             cbar_kws={"label": "信息条数", "orientation": "horizontal"},
481 |         )
482 |         # counter = 0  # 初始化计数器
483 |         # for y in range(data.shape[0]):
484 |         #     for x in range(data.shape[1]):
485 |         #         plt.text(
486 |         #             x + 0.5,
487 |         #             y + 0.5,
488 |         #             str(counter),
489 |         #             ha="center",
490 |         #             va="center",
491 |         #             color="black",
492 |         #         )
493 |         #         counter += 1
494 |         plt.yticks([])
495 |         plt.tight_layout()
496 |         filepath = "./用户数据/data/src/time/" + title + "的聊天时间分布热力图!!.png"
497 |         plt.savefig(filepath, format="png")
498 | 
499 |     # 情绪分析
500 | 
501 |     def draw_emo(self, df, mode):
502 |         """
503 |         绘制饼图
504 |         :param df: 数据
505 |         :param mode: 模式
506 |         :return: 无
507 |         """
508 |         df["percentage"] = df["counts"] / df["counts"].sum() * 100
509 |         colors = {0: "#CFE3C8", 1: "#FFD686", 2: "#E59069"}
510 |         fig, ax = plt.subplots(figsize=(10, 10))
511 |         wedges, texts, autotexts = ax.pie(
512 |             df["percentage"],
513 |             startangle=140,
514 |             autopct="%1.1f%%",
515 |             colors=[colors[rank] for rank in df["rank"]],
516 |         )
517 | 
518 |         legend_labels = ["负面", "中立", "正面"]
519 |         ax.legend(
520 |             wedges,
521 |             legend_labels,
522 |             title="情绪的颜色对应",
523 |             loc="center left",
524 |             bbox_to_anchor=(0, 0),
525 |         )
526 | 
527 |         plt.title(mode + "情绪占比")
528 |         plt.axis("equal")
529 |         filepath = "./用户数据/data/src/emo/" + mode + "的情绪分析图!!.png"
530 |         plt.savefig(filepath, format="png")
531 | 


--------------------------------------------------------------------------------
/getMsg.py:
--------------------------------------------------------------------------------
 1 | # 最后编辑：
 2 | # 晋晨曦 2024.1.20 20:28
 3 | # qq：2950171570
 4 | # email：Jin0714@outlook.com  回复随缘
 5 | from pandas import read_csv
 6 | 
 7 | 
 8 | def read_msg(path):
 9 |     """
10 |     读取数据
11 |     :param path: 数据位置
12 |     :return: df
13 |     """
14 |     lemon = read_csv(path)
15 |     return lemon
16 | 


--------------------------------------------------------------------------------
/icon/icon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oorangeeee/Message_Analysis/2dc1283915f2905e401c4e407f1985bae47c32a9/icon/icon.ico


--------------------------------------------------------------------------------
/icon/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oorangeeee/Message_Analysis/2dc1283915f2905e401c4e407f1985bae47c32a9/icon/icon.png


--------------------------------------------------------------------------------
/logo/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oorangeeee/Message_Analysis/2dc1283915f2905e401c4e407f1985bae47c32a9/logo/logo.png


--------------------------------------------------------------------------------
/logo/logo2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oorangeeee/Message_Analysis/2dc1283915f2905e401c4e407f1985bae47c32a9/logo/logo2.png


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | # 最后编辑：
 2 | # 晋晨曦 2024.2.2 17.13
 3 | # qq：2950171570
 4 | # email：Jin0714@outlook.com  回复随缘
 5 | from matplotlib import rcParams
 6 | from os import makedirs
 7 | from show_gui import ShowGui
 8 | from console import no_con
 9 | 
10 | 
11 | def main():
12 |     """
13 |     主函数
14 |     :return: 无
15 |     """
16 | 
17 |     # 初始化程序
18 |     makedirs("用户数据/api", exist_ok=True)
19 |     makedirs("用户数据/data", exist_ok=True)
20 |     makedirs("用户数据/data/bqb", exist_ok=True)
21 |     makedirs("用户数据/data/emoji", exist_ok=True)
22 |     makedirs("用户数据/data/src", exist_ok=True)
23 |     makedirs("用户数据/data/word", exist_ok=True)
24 |     makedirs("用户数据/data/src/emo", exist_ok=True)
25 |     makedirs("用户数据/data/src/emoji", exist_ok=True)
26 |     makedirs("用户数据/data/src/time", exist_ok=True)
27 |     makedirs("用户数据/data/src/word", exist_ok=True)
28 |     makedirs("用户数据/data/src/表情包", exist_ok=True)
29 |     makedirs("用户数据/data/src/热力图", exist_ok=True)
30 | 
31 |     rcParams["font.family"] = str("SimHei")
32 | 
33 |     sh = ShowGui()
34 | 
35 |     no_con()
36 | 
37 |     sh.show()
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     main()
42 | 


--------------------------------------------------------------------------------
/save.py:
--------------------------------------------------------------------------------
 1 | # 最后编辑：
 2 | # 晋晨曦 2024.1.20 20:28
 3 | # qq：2950171570
 4 | # email：Jin0714@outlook.com  回复随缘
 5 | class save_data:
 6 |     def __init__(self):
 7 |         """
 8 |         构造函数，初始化一些可能需要的属性。
 9 |         """
10 |         pass
11 | 
12 |     def __str__(self):
13 |         """
14 |         字符串表示，用于打印对象时提供有用的信息。
15 |         """
16 |         return "draw类实例，用于保存数据"
17 |         pass
18 | 
19 |     def save_data_all(self, data, path):
20 |         """
21 |         保存所有数据
22 |         :param data: 数据
23 |         :param path: 路径
24 |         :return:
25 |         """
26 |         for d, p in zip(data, path):
27 |             d.to_excel(p, index=False)
28 |         pass
29 | 


--------------------------------------------------------------------------------
/show_gui.py:
--------------------------------------------------------------------------------
  1 | import tkinter as tk
  2 | from tkinter import filedialog
  3 | from tkinter import messagebox
  4 | from tkinter import PhotoImage
  5 | from ctypes import windll
  6 | from functools import partial
  7 | import getMsg as r
  8 | import solve
  9 | import data_process as dp
 10 | from webbrowser import open_new_tab
 11 | 
 12 | 
 13 | class ShowGui:
 14 |     def __init__(self):
 15 |         try:
 16 |             windll.shcore.SetProcessDpiAwareness(1)
 17 |         except (AttributeError, ValueError):
 18 |             pass
 19 |         self.root = tk.Tk()
 20 |         self.root.title("橙子作品之聊天记录分析")
 21 |         self.root.geometry("1600x1200")
 22 |         self.root.iconbitmap('./icon/icon.ico')
 23 |         self.shape1 = "fas fa-dog"
 24 |         self.shape2 = "far fa-lemon"
 25 |         self.shape3 = "fas fa-paw"
 26 |         self.init_pages_start()
 27 |         pass
 28 | 
 29 |     def show(self):
 30 |         self.page_start.pack(fill="both", expand=True)
 31 |         self.root.mainloop()
 32 | 
 33 |     def init_pages_start(self):
 34 |         self.page_start = tk.Frame(self.root)
 35 |         self.center_frame_start = tk.Frame(self.page_start)
 36 |         self.center_frame_start.place(relx=0.5, rely=0.3, anchor=tk.CENTER)
 37 | 
 38 |         self.logo = PhotoImage(file="./logo/logo.png")
 39 |         self.logo2 = PhotoImage(file="./logo/logo2.png")
 40 |         tk.Label(self.center_frame_start, image=self.logo).pack()
 41 |         tk.Label(self.root, image=self.logo2).pack(side="top", anchor="nw")
 42 | 
 43 |         tk.Button(
 44 |             self.center_frame_start,
 45 |             text="启动程序",
 46 |             command=self.show_page_choice_path,
 47 |             font=("SimHei", 16),
 48 |         ).pack()
 49 | 
 50 |     def init_pages_choice_path(self):
 51 |         self.page_choice_path = tk.Frame(self.root)
 52 |         self.center_frame_choice_path = tk.Frame(self.page_choice_path)
 53 |         self.center_frame_choice_path.place(relx=0.5, rely=0.4, anchor=tk.CENTER)
 54 |         tk.Button(
 55 |             self.center_frame_choice_path,
 56 |             text="选择聊天记录",
 57 |             command=self.load_date,
 58 |             font=("SimHei", 16),
 59 |         ).pack()
 60 | 
 61 |     def init_pages_load_name(self):
 62 |         self.page_load_name = tk.Frame(self.root)
 63 |         self.center_frame_load_name = tk.Frame(self.page_load_name)
 64 |         self.center_frame_load_name.place(relx=0.5, rely=0.4, anchor=tk.CENTER)
 65 |         tk.Label(
 66 |             self.center_frame_load_name,
 67 |             text="请输入两个分析对象的名字",
 68 |             font=("SimHei", 16),
 69 |         ).pack(pady=(0, 20))
 70 |         tk.Label(
 71 |             self.center_frame_load_name,
 72 |             text="分析对象1的名字(你的名字，如：橙子先生)：",
 73 |             font=("SimHei", 16),
 74 |         ).pack()
 75 |         name_entry_one = tk.Entry(self.center_frame_load_name)
 76 |         name_entry_one.pack(pady=5)
 77 |         tk.Label(
 78 |             self.center_frame_load_name,
 79 |             text="分析对象2的名字(对方的名字，如：柠檬女士)：",
 80 |             font=("SimHei", 16),
 81 |         ).pack()
 82 |         name_entry_two = tk.Entry(self.center_frame_load_name)
 83 |         name_entry_two.pack(pady=5)
 84 |         tk.Button(
 85 |             self.center_frame_load_name,
 86 |             text="确认",
 87 |             command=partial(self.save_name, name_entry_one, name_entry_two),
 88 |             font=("SimHei", 16),
 89 |         ).pack(pady=10)
 90 | 
 91 |     def initpages_process_data(self):
 92 |         self.page_process_data = tk.Frame(self.root)
 93 |         self.center_frame_process_data = tk.Frame(self.page_process_data)
 94 |         self.center_frame_process_data.place(relx=0.5, rely=0.4, anchor=tk.CENTER)
 95 |         tk.Label(
 96 |             self.center_frame_process_data,
 97 |             text=f"将分析从 {self.s_date} 到 {self.e_date} 的聊天记录",
 98 |             font=("SimHei", 16),
 99 |         ).pack(pady=(0, 20))
100 |         tk.Button(
101 |             self.center_frame_process_data,
102 |             text="开始分析",
103 |             command=self.Go,
104 |             font=("SimHei", 16),
105 |         ).pack()
106 | 
107 |     def initpages_process_heat(self):
108 |         self.s.get_max_count_date()
109 |         self.page_process_heat = tk.Frame(self.root)
110 |         self.center_frame_process_heat = tk.Frame(self.page_process_heat)
111 |         self.center_frame_process_heat.place(relx=0.5, rely=0.4, anchor=tk.CENTER)
112 |         tk.Label(
113 |             self.center_frame_process_heat,
114 |             text="聊天热度分析\n现在开始分析聊天热度\n请选择分析谁的聊天记录？",
115 |             font=("SimHei", 16),
116 |         ).pack(pady=(0, 20))
117 |         tk.Label(
118 |             self.center_frame_process_heat,
119 |             text="-----------------------",
120 |             font=("SimHei", 16),
121 |         ).pack(pady=(0, 10))
122 |         tk.Button(
123 |             self.center_frame_process_heat,
124 |             text=self.name1,
125 |             command=partial(self.process_heat, 1),
126 |             font=("SimHei", 16),
127 |         ).pack(pady=(0, 10))
128 |         tk.Label(
129 |             self.center_frame_process_heat,
130 |             text="-----------------------",
131 |             font=("SimHei", 16),
132 |         ).pack(pady=(0, 10))
133 |         tk.Button(
134 |             self.center_frame_process_heat,
135 |             text=self.name2,
136 |             command=partial(self.process_heat, 2),
137 |             font=("SimHei", 16),
138 |         ).pack(pady=(0, 10))
139 |         tk.Label(
140 |             self.center_frame_process_heat,
141 |             text="-----------------------",
142 |             font=("SimHei", 16),
143 |         ).pack(pady=(0, 10))
144 |         tk.Button(
145 |             self.center_frame_process_heat,
146 |             text=self.name1 + "和" + self.name2,
147 |             command=partial(self.process_heat, 3),
148 |             font=("SimHei", 16),
149 |         ).pack(pady=(0, 10))
150 |         tk.Label(
151 |             self.center_frame_process_heat,
152 |             text="-----------------------",
153 |             font=("SimHei", 16),
154 |         ).pack(pady=(0, 10))
155 |         tk.Button(
156 |             self.center_frame_process_heat,
157 |             text="我全都要！（推荐）",
158 |             command=partial(self.process_heat, 4),
159 |             font=("SimHei", 16),
160 |         ).pack()
161 |         tk.Label(
162 |             self.center_frame_process_heat,
163 |             text="-----------------------",
164 |             font=("SimHei", 16),
165 |         ).pack(pady=(0, 10))
166 |         tk.Button(
167 |             self.center_frame_process_heat,
168 |             text="跳过",
169 |             command=partial(self.process_heat, 5),
170 |             font=("SimHei", 16),
171 |         ).pack()
172 | 
173 |     def initpages_process_time(self):
174 |         self.s.get_max_count_time()
175 |         self.page_process_time = tk.Frame(self.root)
176 |         self.center_frame_process_time = tk.Frame(self.page_process_time)
177 |         self.center_frame_process_time.place(relx=0.5, rely=0.4, anchor=tk.CENTER)
178 |         tk.Label(
179 |             self.center_frame_process_time,
180 |             text="聊天时间分析\n现在开始分析聊天时间分布\n请选择分析谁的聊天记录？",
181 |             font=("SimHei", 16),
182 |         ).pack(pady=(0, 20))
183 |         tk.Label(
184 |             self.center_frame_process_time,
185 |             text="-----------------------",
186 |             font=("SimHei", 16),
187 |         ).pack(pady=(0, 10))
188 |         tk.Button(
189 |             self.center_frame_process_time,
190 |             text=self.name1,
191 |             command=partial(self.process_time, 1),
192 |             font=("SimHei", 16),
193 |         ).pack(pady=(0, 10))
194 |         tk.Label(
195 |             self.center_frame_process_time,
196 |             text="-----------------------",
197 |             font=("SimHei", 16),
198 |         ).pack(pady=(0, 10))
199 |         tk.Button(
200 |             self.center_frame_process_time,
201 |             text=self.name2,
202 |             command=partial(self.process_time, 2),
203 |             font=("SimHei", 16),
204 |         ).pack(pady=(0, 10))
205 |         tk.Label(
206 |             self.center_frame_process_time,
207 |             text="-----------------------",
208 |             font=("SimHei", 16),
209 |         ).pack(pady=(0, 10))
210 |         tk.Button(
211 |             self.center_frame_process_time,
212 |             text=self.name1 + "和" + self.name2,
213 |             command=partial(self.process_time, 3),
214 |             font=("SimHei", 16),
215 |         ).pack(pady=(0, 10))
216 |         tk.Label(
217 |             self.center_frame_process_time,
218 |             text="-----------------------",
219 |             font=("SimHei", 16),
220 |         ).pack(pady=(0, 10))
221 |         tk.Button(
222 |             self.center_frame_process_time,
223 |             text="我全都要！（推荐）",
224 |             command=partial(self.process_time, 4),
225 |             font=("SimHei", 16),
226 |         ).pack()
227 |         tk.Label(
228 |             self.center_frame_process_time,
229 |             text="-----------------------",
230 |             font=("SimHei", 16),
231 |         ).pack(pady=(0, 10))
232 |         tk.Button(
233 |             self.center_frame_process_time,
234 |             text="跳过",
235 |             command=partial(self.process_time, 5),
236 |             font=("SimHei", 16),
237 |         ).pack()
238 | 
239 |     def initpages_process_biaoqingbao(self):
240 |         self.page_process_bqb = tk.Frame(self.root)
241 |         self.center_frame_process_bqb = tk.Frame(self.page_process_bqb)
242 |         self.center_frame_process_bqb.place(relx=0.5, rely=0.4, anchor=tk.CENTER)
243 |         tk.Label(
244 |             self.center_frame_process_bqb,
245 |             text="现在开始分析表情包",
246 |             font=("SimHei", 16),
247 |         ).pack(pady=(0, 10))
248 |         tk.Button(
249 |             self.center_frame_process_bqb,
250 |             text="开始分析表情包",
251 |             command=partial(self.process_bqb, 1),
252 |             font=("SimHei", 16),
253 |         ).pack()
254 |         tk.Label(
255 |             self.center_frame_process_bqb,
256 |             text="-----------------------",
257 |             font=("SimHei", 16),
258 |         ).pack(pady=(0, 10))
259 |         tk.Button(
260 |             self.center_frame_process_bqb,
261 |             text="跳过",
262 |             command=partial(self.process_bqb, 0),
263 |             font=("SimHei", 16),
264 |         ).pack()
265 | 
266 |     def initpages_process_emoji(self):
267 |         self.page_process_emoji = tk.Frame(self.root)
268 |         self.center_frame_process_emoji = tk.Frame(self.page_process_emoji)
269 |         self.center_frame_process_emoji.place(relx=0.5, rely=0.4, anchor=tk.CENTER)
270 |         tk.Label(
271 |             self.center_frame_process_emoji,
272 |             text="现在开始分析emoji",
273 |             font=("SimHei", 16),
274 |         ).pack(pady=(0, 10))
275 |         tk.Button(
276 |             self.center_frame_process_emoji,
277 |             text="开始分析emoji",
278 |             command=partial(self.process_emoji, 1),
279 |             font=("SimHei", 16),
280 |         ).pack()
281 |         tk.Label(
282 |             self.center_frame_process_emoji,
283 |             text="-----------------------",
284 |             font=("SimHei", 16),
285 |         ).pack(pady=(0, 10))
286 |         tk.Button(
287 |             self.center_frame_process_emoji,
288 |             text="跳过",
289 |             command=partial(self.process_emoji, 0),
290 |             font=("SimHei", 16),
291 |         ).pack()
292 | 
293 |     def initpages_process_word_all(self):
294 |         self.page_process_word_all = tk.Frame(self.root)
295 |         self.center_frame_process_word_all = tk.Frame(self.page_process_word_all)
296 |         self.center_frame_process_word_all.place(relx=0.5, rely=0.4, anchor=tk.CENTER)
297 |         tk.Label(
298 |             self.center_frame_process_word_all,
299 |             text="现在开始分析词频",
300 |             font=("SimHei", 16),
301 |         ).pack(pady=(0, 10))
302 |         tk.Button(
303 |             self.center_frame_process_word_all,
304 |             text="自定义词云形状",
305 |             command=partial(self.process_word_all, 1),
306 |             font=("SimHei", 16),
307 |         ).pack()
308 |         tk.Label(
309 |             self.center_frame_process_word_all,
310 |             text="-----------------------",
311 |             font=("SimHei", 16),
312 |         ).pack(pady=(0, 10))
313 |         tk.Button(
314 |             self.center_frame_process_word_all,
315 |             text="使用默认词云形状",
316 |             command=partial(self.process_word_all, 2),
317 |             font=("SimHei", 16),
318 |         ).pack()
319 |         tk.Label(
320 |             self.center_frame_process_word_all,
321 |             text="-----------------------",
322 |             font=("SimHei", 16),
323 |         ).pack(pady=(0, 10))
324 |         tk.Button(
325 |             self.center_frame_process_word_all,
326 |             text="跳过",
327 |             command=partial(self.process_word_all, 0),
328 |             font=("SimHei", 16),
329 |         ).pack()
330 | 
331 |     def initpages_process_word_1(self):
332 |         self.page_process_word_1 = tk.Frame(self.root)
333 |         self.center_frame_process_word_1 = tk.Frame(self.page_process_word_1)
334 |         self.center_frame_process_word_1.place(relx=0.5, rely=0.4, anchor=tk.CENTER)
335 |         tk.Label(
336 |             self.center_frame_process_word_1,
337 |             text="进入下面的网站，点击“查看图标”，找到喜欢的形状，点击右上角的复制，然后粘贴到下面的输入栏即可\n如果还是不明白，就去查看使用教程中的词云形状自定义教程",
338 |             font=("SimHei", 16),
339 |         ).pack(pady=(0, 10))
340 |         link_label = tk.Label(
341 |             self.center_frame_process_word_1,
342 |             text="点击打开网站，选取词云形状",
343 |             fg="blue",
344 |             cursor="hand2",
345 |             font=("SimHei", 16),
346 |         )
347 |         link_label.pack()
348 |         link_label.bind(
349 |             "<Button-1>", lambda e: self.open_link("https://fa5.dashgame.com/#/")
350 |         )
351 |         tk.Label(
352 |             self.center_frame_process_word_1,
353 |             text="请输入三个形状编码",
354 |             font=("SimHei", 16),
355 |         ).pack(pady=(0, 10))
356 |         tk.Label(
357 |             self.center_frame_process_word_1,
358 |             text=f"{self.name1}的词云形状：",
359 |             font=("SimHei", 16),
360 |         ).pack()
361 |         shape_entry1 = tk.Entry(self.center_frame_process_word_1)
362 |         shape_entry1.pack(pady=5)
363 |         tk.Label(
364 |             self.center_frame_process_word_1,
365 |             text=f"{self.name2}的词云形状：",
366 |             font=("SimHei", 16),
367 |         ).pack()
368 |         shape_entry2 = tk.Entry(self.center_frame_process_word_1)
369 |         shape_entry2.pack(pady=5)
370 |         tk.Label(
371 |             self.center_frame_process_word_1,
372 |             text="全部聊天记录的词云形状：",
373 |             font=("SimHei", 16),
374 |         ).pack()
375 |         shape_entry3 = tk.Entry(self.center_frame_process_word_1)
376 |         shape_entry3.pack(pady=5)
377 |         tk.Button(
378 |             self.center_frame_process_word_1,
379 |             text="确认",
380 |             command=partial(self.save_shape, shape_entry1, shape_entry2, shape_entry3),
381 |             font=("SimHei", 16),
382 |         ).pack(pady=10)
383 |         tk.Label(
384 |             self.center_frame_process_word_1,
385 |             text="请注意一定要输入正确的词云代码，例如：far fa-lemon，如果输入错误会导致程序出现未知错误",
386 |             font=("SimHei", 16),
387 |         ).pack()
388 | 
389 |     def initpages_process_word_2(self):
390 |         self.s.change_shape(self.shape1, self.shape2, self.shape3)
391 |         self.page_process_word_2 = tk.Frame(self.root)
392 |         self.center_frame_process_word_2 = tk.Frame(self.page_process_word_2)
393 |         self.center_frame_process_word_2.place(relx=0.5, rely=0.4, anchor=tk.CENTER)
394 |         tk.Label(
395 |             self.center_frame_process_word_2,
396 |             text="词频分析\n现在开始分析词频并生成词云\n请选择分析谁的聊天记录？",
397 |             font=("SimHei", 16),
398 |         ).pack(pady=(0, 20))
399 |         tk.Label(
400 |             self.center_frame_process_word_2,
401 |             text="-----------------------",
402 |             font=("SimHei", 16),
403 |         ).pack(pady=(0, 10))
404 |         tk.Button(
405 |             self.center_frame_process_word_2,
406 |             text=self.name1,
407 |             command=partial(self.process_word, 1),
408 |             font=("SimHei", 16),
409 |         ).pack(pady=(0, 10))
410 |         tk.Label(
411 |             self.center_frame_process_word_2,
412 |             text="-----------------------",
413 |             font=("SimHei", 16),
414 |         ).pack(pady=(0, 10))
415 |         tk.Button(
416 |             self.center_frame_process_word_2,
417 |             text=self.name2,
418 |             command=partial(self.process_word, 2),
419 |             font=("SimHei", 16),
420 |         ).pack(pady=(0, 10))
421 |         tk.Label(
422 |             self.center_frame_process_word_2,
423 |             text="-----------------------",
424 |             font=("SimHei", 16),
425 |         ).pack(pady=(0, 10))
426 |         tk.Button(
427 |             self.center_frame_process_word_2,
428 |             text=self.name1 + "和" + self.name2,
429 |             command=partial(self.process_word, 3),
430 |             font=("SimHei", 16),
431 |         ).pack(pady=(0, 10))
432 |         tk.Label(
433 |             self.center_frame_process_word_2,
434 |             text="-----------------------",
435 |             font=("SimHei", 16),
436 |         ).pack(pady=(0, 10))
437 |         tk.Button(
438 |             self.center_frame_process_word_2,
439 |             text="我全都要！（推荐）",
440 |             command=partial(self.process_word, 4),
441 |             font=("SimHei", 16),
442 |         ).pack()
443 | 
444 |     def initpages_process_emo_all(self):
445 |         self.page_process_emo_all = tk.Frame(self.root)
446 |         self.center_frame_process_emo_all = tk.Frame(self.page_process_emo_all)
447 |         self.center_frame_process_emo_all.place(relx=0.5, rely=0.4, anchor=tk.CENTER)
448 |         tk.Label(
449 |             self.center_frame_process_emo_all,
450 |             text="你是否想分析每一句聊天记录的情感倾向来判断谁在聊天中带来了更多的正能量，谁又经常在聊天中诉苦？！\n请注意，此部分需要您自行获得百度智能云中自然语言分析中的情感倾向分析API，这部分请自行百度。\n新用户有50万次免费使用，但是QPS只有2，也就是一秒只能分析两句话，速度会很慢。\n请注意本部分需要联网",
451 |             font=("SimHei", 16),
452 |         ).pack(pady=(0, 10))
453 |         tk.Button(
454 |             self.center_frame_process_emo_all,
455 |             text="想！",
456 |             command=partial(self.process_emo, 1),
457 |             font=("SimHei", 16),
458 |         ).pack()
459 |         tk.Label(
460 |             self.center_frame_process_emo_all,
461 |             text="-----------------------",
462 |             font=("SimHei", 16),
463 |         ).pack(pady=(0, 10))
464 |         tk.Button(
465 |             self.center_frame_process_emo_all,
466 |             text="不想",
467 |             command=partial(self.process_emo, 0),
468 |             font=("SimHei", 16),
469 |         ).pack()
470 |         tk.Label(
471 |             self.center_frame_process_emo_all,
472 |             text="请注意：如果你要进行情感分析，请一定确保自己的api次数足够，或者自己的账户余额足够，否则当次数用完会前功尽弃",
473 |             font=("SimHei", 16),
474 |         ).pack(pady=(0, 10))
475 | 
476 |     def initpages_process_emo_1(self):
477 |         self.page_process_emo_1 = tk.Frame(self.root)
478 |         self.center_frame_process_emo_1 = tk.Frame(self.page_process_emo_1)
479 |         self.center_frame_process_emo_1.place(relx=0.5, rely=0.4, anchor=tk.CENTER)
480 |         tk.Label(
481 |             self.center_frame_process_emo_1,
482 |             text="请输入您API的QPS",
483 |             font=("SimHei", 16),
484 |         ).pack(pady=(0, 10))
485 |         qps_entry = tk.Entry(self.center_frame_process_emo_1)
486 |         qps_entry.pack(pady=5)
487 |         tk.Label(
488 |             self.center_frame_process_emo_1,
489 |             text="请输入App_ID",
490 |             font=("SimHei", 16),
491 |         ).pack(pady=(0, 10))
492 |         api_entry1 = tk.Entry(self.center_frame_process_emo_1)
493 |         api_entry1.pack(pady=5)
494 |         tk.Label(
495 |             self.center_frame_process_emo_1,
496 |             text="请输入API_KEY",
497 |             font=("SimHei", 16),
498 |         ).pack()
499 |         api_entry2 = tk.Entry(self.center_frame_process_emo_1)
500 |         api_entry2.pack(pady=5)
501 |         tk.Label(
502 |             self.center_frame_process_emo_1,
503 |             text="请输入SECRET_KEY",
504 |             font=("SimHei", 16),
505 |         ).pack()
506 |         api_entry3 = tk.Entry(self.center_frame_process_emo_1)
507 |         api_entry3.pack(pady=5)
508 |         tk.Button(
509 |             self.center_frame_process_emo_1,
510 |             text="确定",
511 |             command=partial(
512 |                 self.save_api, qps_entry, api_entry1, api_entry2, api_entry3
513 |             ),
514 |             font=("SimHei", 16),
515 |         ).pack(pady=10)
516 |         tk.Label(
517 |             self.center_frame_process_emo_1,
518 |             text="请注意一定要输入正确的api，如果输入错误会导致程序出现未知错误",
519 |             font=("SimHei", 16),
520 |         ).pack()
521 | 
522 |     def initpages_process_emo_2(self):
523 |         self.page_process_emo_2 = tk.Frame(self.root)
524 |         self.center_frame_process_emo_2 = tk.Frame(self.page_process_emo_2)
525 |         self.center_frame_process_emo_2.place(relx=0.5, rely=0.4, anchor=tk.CENTER)
526 |         tk.Label(
527 |             self.center_frame_process_emo_2,
528 |             text="情感分析\n现在开始分析情感\n请选择分析谁的聊天记录？",
529 |             font=("SimHei", 16),
530 |         ).pack(pady=(0, 20))
531 |         tk.Label(
532 |             self.center_frame_process_emo_2,
533 |             text="-----------------------",
534 |             font=("SimHei", 16),
535 |         ).pack(pady=(0, 10))
536 |         tk.Button(
537 |             self.center_frame_process_emo_2,
538 |             text=self.name1,
539 |             command=partial(self.process_emo_do, 1),
540 |             font=("SimHei", 16),
541 |         ).pack(pady=(0, 10))
542 |         tk.Label(
543 |             self.center_frame_process_emo_2,
544 |             text="-----------------------",
545 |             font=("SimHei", 16),
546 |         ).pack(pady=(0, 10))
547 |         tk.Button(
548 |             self.center_frame_process_emo_2,
549 |             text=self.name2,
550 |             command=partial(self.process_emo_do, 2),
551 |             font=("SimHei", 16),
552 |         ).pack(pady=(0, 10))
553 |         tk.Label(
554 |             self.center_frame_process_emo_2,
555 |             text="-----------------------",
556 |             font=("SimHei", 16),
557 |         ).pack(pady=(0, 10))
558 |         tk.Button(
559 |             self.center_frame_process_emo_2,
560 |             text=self.name1 + "和" + self.name2,
561 |             command=partial(self.process_emo_do, 3),
562 |             font=("SimHei", 16),
563 |         ).pack(pady=(0, 10))
564 |         tk.Label(
565 |             self.center_frame_process_emo_2,
566 |             text="-----------------------",
567 |             font=("SimHei", 16),
568 |         ).pack(pady=(0, 10))
569 |         tk.Button(
570 |             self.center_frame_process_emo_2,
571 |             text="我全都要！（推荐）",
572 |             command=partial(self.process_emo_do, 4),
573 |             font=("SimHei", 16),
574 |         ).pack()
575 | 
576 |     def initpages_process_end(self):
577 |         self.page_process_end = tk.Frame(self.root)
578 |         self.center_frame_process_end = tk.Frame(self.page_process_end)
579 |         self.center_frame_process_end.place(relx=0.5, rely=0.4, anchor=tk.CENTER)
580 |         tk.Label(
581 |             self.center_frame_process_end,
582 |             text="恭喜！分析完成！可以退出啦！\n本软件由橙子先生一人独立开发，免费分享给大家使用！\n如果大家想的话，可以给橙子先生或者柠檬女士点个关注，这会让我们有成就感\n并且我预计会在未来不定期分享一些自己写的好玩的程序，谢谢大家。",
583 |             font=("SimHei", 16),
584 |         ).pack()
585 |         link_label_1 = tk.Label(
586 |             self.center_frame_process_end,
587 |             text="点击打开橙子先生小红书主页",
588 |             fg="blue",
589 |             cursor="hand2",
590 |             font=("SimHei", 16),
591 |         )
592 |         link_label_1.pack()
593 |         link_label_1.bind(
594 |             "<Button-1>",
595 |             lambda e: self.open_link(
596 |                 "https://www.xiaohongshu.com/user/profile/60f2fd48000000000100aaff"
597 |             ),
598 |         )
599 |         link_label_2 = tk.Label(
600 |             self.center_frame_process_end,
601 |             text="点击打开橙子先生bilibili主页",
602 |             fg="blue",
603 |             cursor="hand2",
604 |             font=("SimHei", 16),
605 |         )
606 |         link_label_2.pack()
607 |         link_label_2.bind(
608 |             "<Button-1>",
609 |             lambda e: self.open_link(
610 |                 "https://space.bilibili.com/316695110?spm_id_from=333.999.0.0"
611 |             ),
612 |         )
613 |         link_label_3 = tk.Label(
614 |             self.center_frame_process_end,
615 |             text="点击打开柠檬女士小红书主页",
616 |             fg="blue",
617 |             cursor="hand2",
618 |             font=("SimHei", 16),
619 |         )
620 |         link_label_3.pack()
621 |         link_label_3.bind(
622 |             "<Button-1>",
623 |             lambda e: self.open_link(
624 |                 "https://www.xiaohongshu.com/user/profile/5a79c3c64eacab6e6f5400c1?channelType=web_engagement_notification_page&channelTabId=mentions"
625 |             ),
626 |         )
627 |         tk.Label(
628 |             self.center_frame_process_end,
629 |             text="PS：结果都保存在 用户数据/data 文件夹中，无论是导出的表格还是图片都在里面\n图片在 用户数据/data/src 文件夹里\n祝大家生活愉快!",
630 |             font=("SimHei", 14),
631 |         ).pack()
632 | 
633 |     def show_page_choice_path(self):
634 |         self.init_pages_choice_path()
635 |         self.page_start.pack_forget()
636 |         self.page_choice_path.pack(fill="both", expand=True)
637 | 
638 |     def show_page_load_name(self):
639 |         self.init_pages_load_name()
640 |         self.page_choice_path.pack_forget()
641 |         self.page_load_name.pack(fill="both", expand=True)
642 | 
643 |     def show_page_process_data(self):
644 |         self.initpages_process_data()
645 |         self.page_load_name.pack_forget()
646 |         self.page_process_data.pack(fill="both", expand=True)
647 | 
648 |     def show_page_process_heat(self):
649 |         self.initpages_process_heat()
650 |         self.page_process_data.pack_forget()
651 |         self.page_process_heat.pack(fill="both", expand=True)
652 | 
653 |     def show_page_process_time(self):
654 |         self.initpages_process_time()
655 |         self.page_process_heat.pack_forget()
656 |         self.page_process_time.pack(fill="both", expand=True)
657 | 
658 |     def show_page_process_bqb(self):
659 |         self.initpages_process_biaoqingbao()
660 |         self.page_process_time.pack_forget()
661 |         self.page_process_bqb.pack(fill="both", expand=True)
662 | 
663 |     def show_page_process_emoji(self):
664 |         self.initpages_process_emoji()
665 |         self.page_process_bqb.pack_forget()
666 |         self.page_process_emoji.pack(fill="both", expand=True)
667 | 
668 |     def show_page_process_word_all(self):
669 |         self.initpages_process_word_all()
670 |         self.page_process_emoji.pack_forget()
671 |         self.page_process_word_all.pack(fill="both", expand=True)
672 | 
673 |     def show_page_process_word_1(self):
674 |         self.initpages_process_word_1()
675 |         self.page_process_word_all.pack_forget()
676 |         self.page_process_word_1.pack(fill="both", expand=True)
677 | 
678 |     def show_page_process_word_2_from1(self):
679 |         self.initpages_process_word_2()
680 |         self.page_process_word_1.pack_forget()
681 |         self.page_process_word_2.pack(fill="both", expand=True)
682 | 
683 |     def show_page_process_word_2_fromall(self):
684 |         self.initpages_process_word_2()
685 |         self.page_process_word_all.pack_forget()
686 |         self.page_process_word_2.pack(fill="both", expand=True)
687 | 
688 |     def show_page_process_emo_all(self):
689 |         self.initpages_process_emo_all()
690 |         self.page_process_word_2.pack_forget()
691 |         self.page_process_emo_all.pack(fill="both", expand=True)
692 | 
693 |     def show_page_process_emo_all_from_all(self):
694 |         self.initpages_process_emo_all()
695 |         self.page_process_word_all.pack_forget()
696 |         self.page_process_emo_all.pack(fill="both", expand=True)
697 | 
698 |     def show_page_process_emo_1(self):
699 |         self.initpages_process_emo_1()
700 |         self.page_process_emo_all.pack_forget()
701 |         self.page_process_emo_1.pack(fill="both", expand=True)
702 | 
703 |     def show_page_process_emo_2(self):
704 |         self.initpages_process_emo_2()
705 |         self.page_process_emo_1.pack_forget()
706 |         self.page_process_emo_2.pack(fill="both", expand=True)
707 | 
708 |     def show_page_end_emo_all(self):
709 |         self.initpages_process_end()
710 |         self.page_process_emo_all.pack_forget()
711 |         self.page_process_end.pack(fill="both", expand=True)
712 | 
713 |     def show_page_end_emo_2(self):
714 |         self.initpages_process_end()
715 |         self.page_process_emo_2.pack_forget()
716 |         self.page_process_end.pack(fill="both", expand=True)
717 | 
718 |     def open_link(self, url):
719 |         open_new_tab(url)
720 | 
721 |     def save_name(self, name_entry1, name_entry2):
722 |         self.name1 = name_entry1.get()
723 |         self.name2 = name_entry2.get()
724 |         self.process_data()
725 |         self.show_page_process_data()
726 | 
727 |     def save_shape(self, shape_entry1, shape_entry2, shape_entry3):
728 |         self.shape1 = shape_entry1.get()
729 |         self.shape2 = shape_entry2.get()
730 |         self.shape3 = shape_entry3.get()
731 |         self.show_page_process_word_2_from1()
732 | 
733 |     def save_api(self, qps_entry, api_entry1, api_entry2, api_entry3):
734 |         self.api1 = api_entry1.get()
735 |         self.api2 = api_entry2.get()
736 |         self.api3 = api_entry3.get()
737 |         self.QPS = qps_entry.get()
738 |         self.s.get_api(self.QPS, self.api1, self.api2, self.api3)
739 |         self.show_page_process_emo_2()
740 | 
741 |     def load_date(self):
742 |         while True:
743 |             file_path = filedialog.askopenfilename(
744 |                 parent=self.page_choice_path, filetypes=[("CSV files", "*.csv")]
745 |             )
746 |             if file_path:
747 |                 if file_path.endswith(".csv"):
748 |                     self.df = r.read_msg(file_path)
749 |                     p_l = tk.Label(
750 |                         self.root,
751 |                         text=f"聊天记录文件为：{file_path}",
752 |                         bg="yellow",
753 |                         font=("SimHei", 12),
754 |                         width=500,
755 |                         height=1,
756 |                     )
757 |                     p_l.pack(side=tk.BOTTOM)
758 |                     break
759 |                 else:
760 |                     messagebox.showerror("错误", "请选择一个CSV文件", parent=self.root)  # 显示错误消息
761 |             else:
762 |                 break
763 |         self.show_page_load_name()
764 | 
765 |     def process_data(self):
766 |         self.j_df, self.n_df, self.all_df = dp.process_data(self.df)
767 |         self.s_date = self.all_df.iloc[0]["time"]
768 |         self.e_date = self.all_df.iloc[-1]["time"]
769 | 
770 |     def Go(self):
771 |         self.s = solve.solve(self.j_df, self.n_df, self.all_df, self.name1, self.name2)
772 |         self.show_page_process_heat()
773 | 
774 |     def process_heat(self, choice):
775 |         if choice == 1:
776 |             self.s.process_heat(self.name1)
777 |             messagebox.showinfo("分析结果", "分析完毕，结果保存在 ./用户数据/data/src/热力图 中，请自行查看！")
778 |         elif choice == 2:
779 |             self.s.process_heat(self.name2)
780 |             messagebox.showinfo("分析结果", "分析完毕，结果保存在 ./用户数据/data/src/热力图 中，请自行查看！")
781 |         elif choice == 3:
782 |             self.s.process_heat(self.name1 + self.name2)
783 |             messagebox.showinfo("分析结果", "分析完毕，结果保存在 ./用户数据/data/src/热力图 中，请自行查看！")
784 |         elif choice == 4:
785 |             self.s.process_heat(self.name1)
786 |             self.s.process_heat(self.name2)
787 |             self.s.process_heat(self.name1 + self.name2)
788 |             messagebox.showinfo("分析结果", "分析完毕，结果保存在 ./用户数据/data/src/热力图 中，请自行查看！")
789 |         self.show_page_process_time()
790 | 
791 | 
792 |     def process_time(self, choice):
793 |         if choice == 1:
794 |             self.s.process_time(self.name1)
795 |             messagebox.showinfo("分析结果", "分析完毕，结果保存在 ./用户数据/data/src/time 中，请自行查看！")
796 |         elif choice == 2:
797 |             self.s.process_time(self.name2)
798 |             messagebox.showinfo("分析结果", "分析完毕，结果保存在 ./用户数据/data/src/time 中，请自行查看！")
799 |         elif choice == 3:
800 |             self.s.process_time(self.name1 + self.name2)
801 |             messagebox.showinfo("分析结果", "分析完毕，结果保存在 ./用户数据/data/src/time 中，请自行查看！")
802 |         elif choice == 4:
803 |             self.s.process_time(self.name1)
804 |             self.s.process_time(self.name2)
805 |             self.s.process_time(self.name1 + self.name2)
806 |             messagebox.showinfo("分析结果", "分析完毕，结果保存在 ./用户数据/data/src/time 中，请自行查看！")
807 |         self.show_page_process_bqb()
808 | 
809 |     def process_emoji(self, choice):
810 |         if choice == 1:
811 |             self.s.process_emoji()
812 |             messagebox.showinfo("分析结果", "分析完毕，结果保存在 ./用户数据/data/src/emoji 中，请自行查看！")
813 |         self.show_page_process_word_all()
814 | 
815 |     def process_bqb(self, choice):
816 |         if choice == 1:
817 |             self.s.process_biaoqingbao()
818 |             messagebox.showinfo("分析结果", "分析完毕，结果保存在 ./用户数据/data/src/表情包 中，请自行查看！")
819 |         self.show_page_process_emoji()
820 | 
821 |     def process_word_all(self, choice):
822 |         if choice == 1:
823 |             self.show_page_process_word_1()
824 |         elif choice == 2:
825 |             self.show_page_process_word_2_fromall()
826 |         else:
827 |             self.show_page_process_emo_all_from_all()
828 | 
829 |     def process_word(self, choice):
830 |         if choice == 1:
831 |             self.s.process_words(self.name1)
832 |             messagebox.showinfo("分析结果", "分析完毕，结果保存在 ./用户数据/data/src/word 中，请自行查看！")
833 |         elif choice == 2:
834 |             self.s.process_words(self.name2)
835 |             messagebox.showinfo("分析结果", "分析完毕，结果保存在 ./用户数据/data/src/word 中，请自行查看！")
836 |         elif choice == 3:
837 |             self.s.process_words(self.name1 + self.name2)
838 |             messagebox.showinfo("分析结果", "分析完毕，结果保存在 ./用户数据/data/src/word 中，请自行查看！")
839 |         elif choice == 4:
840 |             self.s.process_words(self.name1)
841 |             self.s.process_words(self.name2)
842 |             self.s.process_words(self.name1 + self.name2)
843 |             messagebox.showinfo("分析结果", "分析完毕，结果保存在 ./用户数据/data/src/word 中，请自行查看！")
844 |         self.show_page_process_emo_all()
845 | 
846 |     def process_emo(self, choice):
847 |         if choice == 1:
848 |             self.show_page_process_emo_1()
849 |         else:
850 |             self.show_page_end_emo_all()
851 | 
852 |     def process_emo_do(self, choice):
853 |         if choice == 1:
854 |             self.s.process_emo(self.name1)
855 |             messagebox.showinfo("分析结果", "分析完毕，结果保存在 ./用户数据/data/src/emo 中，请自行查看！")
856 |         elif choice == 2:
857 |             self.s.process_emo(self.name2)
858 |             messagebox.showinfo("分析结果", "分析完毕，结果保存在 ./用户数据/data/src/emo 中，请自行查看！")
859 |         elif choice == 3:
860 |             self.s.process_emo(self.name1 + self.name2)
861 |             messagebox.showinfo("分析结果", "分析完毕，结果保存在 ./用户数据/data/src/emo 中，请自行查看！")
862 |         elif choice == 4:
863 |             self.s.process_emo(self.name1)
864 |             self.s.process_emo(self.name2)
865 |             self.s.process_emo(self.name1 + self.name2)
866 |             messagebox.showinfo("分析结果", "分析完毕，结果保存在 ./用户数据/data/src/emo 中，请自行查看！")
867 |         self.show_page_end_emo_2()
868 | 


--------------------------------------------------------------------------------
/solve.py:
--------------------------------------------------------------------------------
  1 | # 最后编辑：
  2 | # 晋晨曦 2024.2.2 17.13
  3 | # qq：2950171570
  4 | # email：Jin0714@outlook.com  回复随缘
  5 | from jieba import cut
  6 | from re import findall
  7 | from re import sub
  8 | from re import search
  9 | from collections import defaultdict
 10 | import pandas as pd
 11 | from aip import AipNlp
 12 | from time import sleep
 13 | 
 14 | import draw
 15 | import save
 16 | import calenda as ca
 17 | 
 18 | 
 19 | class solve:
 20 |     def __init__(self, j_df, n_df, all_df, name1, name2):
 21 |         """
 22 |         构造函数
 23 |         :param j_df: name1聊天记录
 24 |         :param n_df: name2聊天记录
 25 |         :param all_df: 全部聊天记录
 26 |         :param name1: 主分析人
 27 |         :param name2: 聊天对象
 28 |         """
 29 |         self.emoji_j = defaultdict(int)
 30 |         self.emoji_l = defaultdict(int)
 31 |         self.d = draw.draw_data(name1, name2)
 32 |         self.sa = save.save_data()
 33 |         self.j_df = j_df
 34 |         self.n_df = n_df
 35 |         self.all_df = all_df
 36 |         self.j_df_clean = pd.DataFrame()
 37 |         self.n_df_clean = pd.DataFrame()
 38 |         self.all_df_clean = pd.DataFrame()
 39 |         self.words = pd.DataFrame()
 40 |         self.client = AipNlp("None", "None", "None")
 41 |         self.clean_data()
 42 |         self.name1 = name1
 43 |         self.name2 = name2
 44 |         self.shape1 = "fas fa-dog"
 45 |         self.shape2 = "fas fa-dog"
 46 |         self.shape3 = "fas fa-paw"
 47 |         self.s_year, self.s_month, self.e_year, self.e_month = self.find_time(
 48 |             self.all_df
 49 |         )
 50 |         self.months = self.find_month()
 51 |         self.months_size = len(self.months)
 52 |         pass
 53 | 
 54 |     def __str__(self):
 55 |         """
 56 |         字符串表示，用于打印对象时提供有用的信息。
 57 |         """
 58 |         return "solve类实例，用于分析数据"
 59 |         pass
 60 | 
 61 |     def not_start_with_msg_words(self, value):
 62 |         """
 63 |         判断是否不以<开头
 64 |         :param value: 文本
 65 |         :return: 不以<开头为true，否则flase
 66 |         """
 67 |         return not value.startswith("<")
 68 | 
 69 |     def remove_bracketed_text_and_count_words(self, s):
 70 |         """
 71 |         删除所有聊天记录[]中文字
 72 |         :param s:语句
 73 |         :return:删除后的语句
 74 |         """
 75 |         return sub(r"\[.*?\]", "", s)
 76 | 
 77 |     def clean_data(self):
 78 |         """
 79 |         清洗数据
 80 |         :return:无
 81 |         """
 82 |         self.j_df_clean = self.j_df[
 83 |             self.j_df["data"].apply(self.not_start_with_msg_words)
 84 |         ].copy()
 85 |         self.j_df_clean.loc[:, "data"] = self.j_df_clean["data"].apply(
 86 |             self.remove_bracketed_text_and_count_words
 87 |         )
 88 |         self.j_df_clean = self.j_df_clean[self.j_df_clean["data"].apply(len) > 0]
 89 | 
 90 |         self.n_df_clean = self.n_df[
 91 |             self.n_df["data"].apply(self.not_start_with_msg_words)
 92 |         ].copy()
 93 |         self.n_df_clean.loc[:, "data"] = self.n_df_clean["data"].apply(
 94 |             self.remove_bracketed_text_and_count_words
 95 |         )
 96 |         self.n_df_clean = self.n_df_clean[self.n_df_clean["data"].apply(len) > 0]
 97 | 
 98 |         self.all_df_clean = self.all_df.copy()
 99 | 
100 |     def find_time(self, df):
101 |         """
102 |         找出时间范围的年月
103 |         :param df: 数据
104 |         :return: 年月
105 |         """
106 |         earliest_time = df.iloc[0]["time"]
107 |         latest_time = df.iloc[-1]["time"]
108 |         earliest_year, earliest_month, _ = earliest_time.split("-")
109 |         latest_year, latest_month, _ = latest_time.split("-")
110 |         return (
111 |             int(earliest_year),
112 |             int(earliest_month),
113 |             int(latest_year),
114 |             int(latest_month),
115 |         )
116 | 
117 |     def find_month(self):
118 |         """
119 |         找到分析的月份列表
120 |         :return: 月份列表
121 |         """
122 |         months = []
123 | 
124 |         # 当前的年份和月份
125 |         current_year, current_month = self.s_year, self.s_month
126 | 
127 |         # 循环直到当前年月等于结束年月
128 |         while (current_year, current_month) <= (self.e_year, self.e_month):
129 |             # 将当前年月添加到列表中
130 |             months.append((current_year, current_month))
131 | 
132 |             # 如果当前月份是12月，则进入下一年的1月
133 |             if current_month == 12:
134 |                 current_year += 1
135 |                 current_month = 1
136 |             else:
137 |                 # 否则，月份增加1
138 |                 current_month += 1
139 | 
140 |         return months
141 | 
142 |     # 表情包分析
143 | 
144 |     def not_start_with_msg(self, value):
145 |         """
146 |         判断是否不以<开头
147 |         :param value: 文本
148 |         :return: 不以<开头为true，否则flase
149 |         """
150 |         return not value.startswith("<")
151 | 
152 |     def start_with_msg(self, value):
153 |         """
154 |         判断是否以<开头
155 |         :param value: 文本
156 |         :return: 不以<开头为true，否则flase
157 |         """
158 |         return value.startswith("<")
159 | 
160 |     def extract_androidmd5(self, text):
161 |         """
162 |         提取图片文件的androidmd5码用于分析图片种类
163 |         :param text:图片文件
164 |         :return:提取结果
165 |         """
166 |         match = search(r'androidmd5="([^"]*)"', text)
167 |         return match.group(1) if match else None
168 | 
169 |     def process_biaoqingbao(self):
170 |         """
171 |         分析表情包
172 |         :return: 处理后的数据和结果
173 |         """
174 |         # 分离表情包
175 |         j_df_bqb = self.j_df[self.j_df["data"].apply(self.start_with_msg)]
176 |         n_df_bqb = self.n_df[self.n_df["data"].apply(self.start_with_msg)]
177 |         self.j_df = self.j_df[self.j_df["data"].apply(self.not_start_with_msg)]
178 |         self.n_df = self.n_df[self.n_df["data"].apply(self.not_start_with_msg)]
179 | 
180 |         # 处理数据
181 |         j_df_bqb = j_df_bqb.copy()
182 |         j_df_bqb["data"] = j_df_bqb["data"].apply(self.extract_androidmd5)
183 | 
184 |         n_df_bqb = n_df_bqb.copy()
185 |         n_df_bqb["data"] = n_df_bqb["data"].apply(self.extract_androidmd5)
186 | 
187 |         # 统计表情包
188 |         value_counts = j_df_bqb["data"].value_counts()
189 |         j_df_bqb = value_counts.reset_index()
190 |         j_df_bqb.columns = ["data", "count"]
191 |         value_counts = n_df_bqb["data"].value_counts()
192 |         n_df_bqb = value_counts.reset_index()
193 |         n_df_bqb.columns = ["data", "count"]
194 | 
195 |         j_df_bqb = j_df_bqb.sort_values(by="count", ascending=False)
196 |         n_df_bqb = n_df_bqb.sort_values(by="count", ascending=False)
197 | 
198 |         save_data = [j_df_bqb, n_df_bqb]
199 |         save_path = [
200 |             "./用户数据/data/bqb/" + self.name1 + "_bqb.xlsx",
201 |             "./用户数据/data/bqb/" + self.name2 + "_bqb.xlsx",
202 |         ]
203 |         self.sa.save_data_all(save_data, save_path)
204 | 
205 |         max_count_j = max(j_df_bqb["count"])
206 |         max_count_n = max(n_df_bqb["count"])
207 |         max_count = max(max_count_j, max_count_n)
208 | 
209 |         self.d.draw_bqb(j_df_bqb, n_df_bqb, int(max_count + 5))
210 | 
211 |     # emoji分析
212 | 
213 |     def remove_bracketed_text_and_count_j(self, s):
214 |         """
215 |         删除所有name1聊天记录[]中文字，统计emoji
216 |         :param s:语句
217 |         :return:删除后的语句
218 |         """
219 |         # 使用正则表达式找到所有被 "[]" 包围的文字
220 |         bracketed_texts = findall(r"\[(.*?)\]", s)
221 | 
222 |         # 更新统计字典
223 |         for text in bracketed_texts:
224 |             self.emoji_j[text] += 1
225 | 
226 |         # 删除被 "[]" 包围的文字
227 |         return sub(r"\[.*?\]", "", s)
228 | 
229 |     def remove_bracketed_text_and_count_l(self, s):
230 |         """
231 |         删除所有name2聊天记录[]中文字，统计emoji
232 |         :param s:语句
233 |         :return:删除后的语句
234 |         """
235 |         # 使用正则表达式找到所有被 "[]" 包围的文字
236 |         bracketed_texts = findall(r"\[(.*?)\]", s)
237 | 
238 |         # 更新统计字典
239 |         for text in bracketed_texts:
240 |             self.emoji_l[text] += 1
241 | 
242 |         # 删除被 "[]" 包围的文字
243 |         return sub(r"\[.*?\]", "", s)
244 | 
245 |     def sort_dicts(self, dict1, dict2):
246 |         """
247 |         将量字典归为并集，并排序
248 |         :param dict1: 字典一
249 |         :param dict2: 字典二
250 |         :return:每个字典的全集并排序
251 |         """
252 |         # 合并两个字典的键并去重
253 |         all_keys = set(dict1.keys()) | set(dict2.keys())
254 |         # 对键进行排序
255 |         sorted_keys = sorted(all_keys)
256 |         # 创建两个新字典，按排序后的键存放键对
257 |         sorted_dict1 = {key: dict1.get(key, None) for key in sorted_keys}
258 |         sorted_dict2 = {key: dict2.get(key, None) for key in sorted_keys}
259 | 
260 |         return sorted_dict1, sorted_dict2
261 | 
262 |     def process_emoji(self):
263 |         """
264 |         统计两个人的emoji使用情况
265 |         :return: 返回处理好的聊天记录和得到的分析数据
266 |         """
267 |         # 统计和删除emoji
268 |         self.j_df.loc[:, "data"] = self.j_df["data"].apply(
269 |             self.remove_bracketed_text_and_count_j
270 |         )
271 |         self.n_df.loc[:, "data"] = self.n_df["data"].apply(
272 |             self.remove_bracketed_text_and_count_l
273 |         )
274 | 
275 |         # 清洗数据
276 |         self.j_df = self.j_df[self.j_df["data"].apply(len) > 0]
277 |         self.n_df = self.n_df[self.n_df["data"].apply(len) > 0]
278 | 
279 |         # 合并两个字典的键并去重
280 |         all_keys = set(self.emoji_j.keys()).union(set(self.emoji_l.keys()))
281 |         for key in all_keys:
282 |             self.emoji_j.setdefault(key, 0)
283 |             self.emoji_l.setdefault(key, 0)
284 | 
285 |         # 转化为df
286 |         emoji_df_j = pd.DataFrame(list(self.emoji_j.items()), columns=["data", "count"])
287 |         emoji_df_l = pd.DataFrame(list(self.emoji_l.items()), columns=["data", "count"])
288 | 
289 |         # 排序
290 |         emoji_df_j = emoji_df_j.sort_values(by="count", ascending=False)
291 |         emoji_df_l = emoji_df_l.sort_values(by="count", ascending=False)
292 | 
293 |         # 按照一个顺序排列
294 |         emoji_j_sorted, emoji_l_sorted = self.sort_dicts(self.emoji_j, self.emoji_l)
295 | 
296 |         # 保存
297 |         save_data = [emoji_df_j, emoji_df_l]
298 |         save_path = [
299 |             "./用户数据/data/emoji/" + self.name1 + "_emoji.xlsx",
300 |             "./用户数据/data/emoji/" + self.name2 + "_emoji.xlsx",
301 |         ]
302 |         self.sa.save_data_all(save_data, save_path)
303 | 
304 |         max_count = max(max(emoji_df_j["count"]), max(emoji_df_l["count"]))
305 | 
306 |         self.d.draw_emoji(emoji_j_sorted, emoji_l_sorted, int(max_count + 5))
307 | 
308 |     # 词语分析
309 | 
310 |     def change_shape(self, shape1, shape2, shape3):
311 |         self.shape1 = shape1
312 |         self.shape2 = shape2
313 |         self.shape3 = shape3
314 | 
315 |     def process_words(self, mode):
316 |         """
317 |         分析语句
318 |         :param mode: 分析模式
319 |         :return: 无
320 |         """
321 | 
322 |         if mode == self.name1 + self.name2:
323 |             data_words = self.all_df_clean["data"].copy()
324 |             shape = self.shape3
325 |             title = "两个人"
326 |             pass
327 |         elif mode == self.name2:
328 |             data_words = self.n_df_clean["data"].copy()
329 |             shape = self.shape2
330 |             title = mode
331 |             pass
332 |         elif mode == self.name1:
333 |             data_words = self.j_df_clean["data"].copy()
334 |             shape = self.shape1
335 |             title = mode
336 |             pass
337 |         else:
338 |             print("参数错误，退出")
339 |             return
340 |         ans = {}
341 |         for d in data_words:
342 |             words = cut(d, cut_all=False)
343 |             for w in words:
344 |                 if w in ans and len(w) > 1:
345 |                     ans[w] += 1
346 |                 elif len(w) > 1:
347 |                     ans[w] = 1
348 |         sorted_ans = sorted(ans.items(), key=lambda x: x[1], reverse=True)
349 |         ans.clear()
350 |         for data in sorted_ans:
351 |             ans[data[0]] = data[1]
352 |         self.words = pd.DataFrame(list(ans.items()), columns=["data", "counts"])
353 |         # self.words = self.words[self.words["data"].apply(lambda s: s != "主人")]
354 |         self.words.dropna(subset=["data"])
355 |         self.words = self.words[self.words["data"].apply(len) > 1]
356 |         sava_data = [self.words.copy()]
357 |         sava_path = ["./用户数据/data/word/" + title + "_words_counts.xlsx"]
358 |         self.sa.save_data_all(sava_data, sava_path)
359 |         self.d.draw_word_cloud(self.words.copy(), shape, title)
360 | 
361 |     # 分析热度
362 | 
363 |     def get_max_count_date(self):
364 |         date_df = self.all_df.copy()
365 |         date_df["time"] = pd.to_datetime(date_df["time"])
366 |         date_df["date"] = date_df["time"].dt.date
367 |         s_date, e_date = ca.get_month_dates(
368 |             self.s_year, self.s_month, self.e_year, self.e_month
369 |         )
370 |         date_range = pd.date_range(start=s_date, end=e_date)
371 |         date_counts = date_df.groupby("date").size().reindex(date_range, fill_value=0)
372 |         date_counts = date_counts.to_frame()
373 |         date_counts = date_counts.reset_index()
374 |         date_counts.columns = ["time", "counts"]
375 |         date_counts["week_day"] = date_counts["time"].apply(lambda s: s.weekday() + 1)
376 |         date_counts.sort_values(by="time")
377 |         date_counts["month"] = date_counts["time"].dt.month
378 |         date_counts["year"] = date_counts["time"].dt.year
379 |         date_dfs = [group for _, group in date_counts.groupby(["year", "month"])]
380 |         max_count = max(date_counts["counts"]) + 50
381 |         self.max_heat_date = max_count
382 | 
383 |     def make_rili_df(self, date_counts):
384 |         """
385 |         生成日历df
386 |         :param date_counts: 每天的count
387 |         :return: 结果
388 |         """
389 |         days = ["0", "星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
390 |         columns_date_df = [days[i] for i in range(1, 8)]
391 |         df_new = pd.DataFrame(columns=columns_date_df)
392 |         temp_row = [0] * 7
393 |         is_has = False
394 |         for _, row_old in date_counts.iterrows():
395 |             time = row_old["week_day"] - 1  # 将时间1-7转换为索引0-6
396 |             count = row_old["counts"]
397 |             temp_row[time] = count
398 |             is_has = True
399 |             if time == 6:  # 如果填充到了星期日，添加行到df_new
400 |                 df_new = pd.concat(
401 |                     [df_new, pd.DataFrame([temp_row], columns=columns_date_df)],
402 |                     ignore_index=True,
403 |                 )
404 |                 temp_row = [0] * 7
405 |                 is_has = False
406 |         # 处理最后一行
407 |         if is_has:  # 如果最后一行不是全0
408 |             df_new = pd.concat(
409 |                 [df_new, pd.DataFrame([temp_row], columns=columns_date_df)],
410 |                 ignore_index=True,
411 |             )
412 |         # 输出新的DataFrame
413 |         return df_new
414 | 
415 |     def make_masks(
416 |         self,
417 |     ):
418 |         """
419 |         制作遮罩
420 |         :return: 结果
421 |         """
422 |         ans = []
423 |         for year, month in self.months:
424 |             now_rili = ca.generate_calendar(year, month)
425 |             ans.append(now_rili)
426 |         return ans
427 | 
428 |     def process_heat(self, mode):
429 |         """
430 |         分析聊天热度
431 |         :param mode: 分析模式
432 |         :return: 无
433 |         """
434 |         if mode == self.name1 + self.name2:
435 |             date_df = self.all_df.copy()
436 |             title = "两个人"
437 |             pass
438 |         elif mode == self.name1:
439 |             date_df = self.j_df.copy()
440 |             title = mode
441 |             pass
442 |         elif mode == self.name2:
443 |             date_df = self.n_df.copy()
444 |             title = mode
445 |             pass
446 |         else:
447 |             print("参数错误，退出")
448 |             return
449 |         date_df["time"] = pd.to_datetime(date_df["time"])
450 |         date_df["date"] = date_df["time"].dt.date
451 |         s_date, e_date = ca.get_month_dates(
452 |             self.s_year, self.s_month, self.e_year, self.e_month
453 |         )
454 |         date_range = pd.date_range(start=s_date, end=e_date)
455 |         date_counts = date_df.groupby("date").size().reindex(date_range, fill_value=0)
456 |         date_counts = date_counts.to_frame()
457 |         date_counts = date_counts.reset_index()
458 |         date_counts.columns = ["time", "counts"]
459 |         date_counts["week_day"] = date_counts["time"].apply(lambda s: s.weekday() + 1)
460 | 
461 |         date_counts.sort_values(by="time")
462 |         date_counts["month"] = date_counts["time"].dt.month
463 |         date_counts["year"] = date_counts["time"].dt.year
464 |         date_dfs = [group for _, group in date_counts.groupby(["year", "month"])]
465 |         rili_dfs = [self.make_rili_df(df) for df in date_dfs]
466 |         mask = self.make_masks()
467 | 
468 |         date_counts_no_zeros = date_counts[
469 |             date_counts["counts"].apply(lambda s: s != 0)
470 |         ]
471 | 
472 |         self.d.draw_heat_how(date_counts_no_zeros, title, self.max_heat_date)
473 | 
474 |         self.d.draw_heatmap_big(
475 |             rili_dfs,
476 |             title,
477 |             mask,
478 |             self.months_size,
479 |             self.months,
480 |             self.max_heat_date,
481 |         )
482 |         self.d.draw_heatmap_all(
483 |             rili_dfs,
484 |             title,
485 |             mask,
486 |             self.months_size,
487 |             self.months,
488 |             self.max_heat_date,
489 |         )
490 | 
491 |     # 分析聊天时间
492 | 
493 |     def get_max_count_time(self):
494 |         hour_df = self.all_df.copy()
495 |         hour_df["time"] = pd.to_datetime(hour_df["time"])
496 |         hour_df["hour"] = hour_df["time"].dt.hour
497 |         hour_counts = hour_df.groupby("hour").size().reindex(range(0, 24), fill_value=0)
498 |         hour_counts = hour_counts.to_frame()
499 |         hour_counts = hour_counts.reset_index()
500 |         hour_counts.columns = ["hour", "counts"]
501 |         hour_counts.sort_values(by="hour")
502 |         columns_date_df = [str(i) for i in range(0, 24)]
503 |         hour_df_image = pd.DataFrame(columns=columns_date_df)
504 |         temp_row = [0] * 24
505 |         for index, row in hour_counts.iterrows():
506 |             temp_row[index] = row["counts"]
507 |         hour_df_image = pd.concat(
508 |             [hour_df_image, pd.DataFrame([temp_row], columns=columns_date_df)],
509 |             ignore_index=True,
510 |         )
511 |         max_count = hour_df_image.max(axis=1).values[0]
512 |         self.max_count_time = max_count
513 | 
514 |     def process_time(self, mode):
515 |         """
516 |         分析聊天热度时间
517 |         :param mode: 分析模式
518 |         :return: 无
519 |         """
520 |         if mode == self.name1 + self.name2:
521 |             hour_df = self.all_df.copy()
522 |             title = "两个人"
523 |             pass
524 |         elif mode == self.name1:
525 |             hour_df = self.j_df.copy()
526 |             title = mode
527 |             pass
528 |         elif mode == self.name2:
529 |             hour_df = self.n_df.copy()
530 |             title = mode
531 |             pass
532 |         else:
533 |             print("参数错误，退出")
534 |             return
535 |         hour_df["time"] = pd.to_datetime(hour_df["time"])
536 |         hour_df["hour"] = hour_df["time"].dt.hour
537 |         hour_counts = hour_df.groupby("hour").size().reindex(range(0, 24), fill_value=0)
538 |         hour_counts = hour_counts.to_frame()
539 |         hour_counts = hour_counts.reset_index()
540 |         hour_counts.columns = ["hour", "counts"]
541 |         hour_counts.sort_values(by="hour")
542 |         columns_date_df = [str(i) for i in range(0, 24)]
543 |         hour_df_image = pd.DataFrame(columns=columns_date_df)
544 |         temp_row = [0] * 24
545 |         for index, row in hour_counts.iterrows():
546 |             temp_row[index] = row["counts"]
547 |         hour_df_image = pd.concat(
548 |             [hour_df_image, pd.DataFrame([temp_row], columns=columns_date_df)],
549 |             ignore_index=True,
550 |         )
551 |         self.d.draw_time_heat(hour_df_image, title, self.max_count_time)
552 | 
553 |     # 分析情感
554 | 
555 |     def analyse_word(self, s):
556 |         """
557 |         分析情感
558 |         :param s:
559 |         :return:
560 |         """
561 |         # print("-------")
562 |         # print(s)
563 |         sleep(self.QPS)
564 |         result = self.client.sentimentClassify(s)  # 调用api
565 |         # print("-------")
566 |         return result
567 | 
568 |     def save_emotion(self, mode):
569 |         """
570 |         生成情感分析文件
571 |         :return: 无
572 |         """
573 |         if mode == self.name1 + self.name2:
574 |             emo_df = self.all_df_clean.copy()
575 |             title = "全部"
576 |             pass
577 |         elif mode == self.name1:
578 |             emo_df = self.j_df_clean.copy()
579 |             title = mode
580 |             pass
581 |         elif mode == self.name2:
582 |             emo_df = self.n_df_clean.copy()
583 |             title = mode
584 |             pass
585 |         else:
586 |             print("参数错误，退出")
587 |             return
588 |         emo_df["emo"] = emo_df["data"].apply(self.analyse_word)
589 |         path = "./用户数据/data/" + title + "情感分析.xlsx"
590 |         emo_df.to_excel(path, index=False)
591 | 
592 |     def is_items(self, s):
593 |         """
594 |         清洗错误数据
595 |         :param s: 文本
596 |         :return: 是否正确
597 |         """
598 |         return "items" in s
599 | 
600 |     def get_sentiment(self, s):
601 |         """
602 |         获取倾向
603 |         :param s:结论
604 |         :return: 倾向
605 |         """
606 |         match = search(r"'sentiment': (\d+)", s)
607 |         return int(match.group(1)) if match else None
608 | 
609 |     def get_api(self, q, a1, a2, a3):
610 |         QPS = int(q)
611 |         if QPS >= 20:
612 |             self.QPS = 0
613 |         else:
614 |             self.QPS = 1.0 / QPS
615 |         self.a1 = a1
616 |         self.a2 = a2
617 |         self.a3 = a3
618 | 
619 |     def process_emo(self, mode):
620 |         """
621 |         分析情感
622 |         :param mode: 分析模式
623 |         :return: 无
624 |         """
625 |         if mode == self.name1 + self.name2:
626 |             title = "全部"
627 |             pass
628 |         elif mode == self.name1:
629 |             title = self.name1
630 |             pass
631 |         elif mode == self.name2:
632 |             title = self.name2
633 |             pass
634 |         else:
635 |             print("参数错误，退出")
636 |             return
637 |         App_ID = self.a1
638 |         API_KEY = self.a2
639 |         SECRET_KEY = self.a3
640 |         api_path = "./用户数据/api/api.txt"
641 |         text = App_ID + "\n" + API_KEY + "\n" + SECRET_KEY + "\n"
642 |         with open(api_path, "w") as file:
643 |             file.write(text)
644 |         self.client = AipNlp(App_ID, API_KEY, SECRET_KEY)
645 |         print(App_ID)
646 |         print(API_KEY)
647 |         print(SECRET_KEY)
648 |         self.save_emotion(mode)
649 |         path = "./用户数据/data/" + title + "情感分析.xlsx"
650 |         emo_df = pd.read_excel(path)
651 |         emo_df = emo_df[emo_df["emo"].apply(self.is_items)]
652 |         emo_df["emo_rank"] = emo_df["emo"].apply(self.get_sentiment)
653 |         emo_rank_counts = (
654 |             emo_df.groupby("emo_rank").size().reindex(range(0, 3), fill_value=0)
655 |         )
656 |         emo_rank_counts = emo_rank_counts.to_frame()
657 |         emo_rank_counts = emo_rank_counts.reset_index()
658 |         emo_rank_counts.columns = ["rank", "counts"]
659 |         emo_rank_counts.sort_values(by="rank")
660 |         self.d.draw_emo(emo_rank_counts, title)
661 | 
662 |     # 保存数据
663 |     def save_kinds_of_data(self):
664 |         """
665 |         保存数据
666 |         :return:无
667 |         """
668 |         d_data = [self.n_df.copy(), self.j_df.copy(), self.all_df.copy()]
669 |         d_path = [
670 |             "./用户数据/data/" + self.name2 + ".xlsx",
671 |             "./用户数据/data/" + self.name1 + ".xlsx",
672 |             "./用户数据/data/all.xlsx",
673 |         ]
674 |         self.sa.save_data_all(d_data, d_path)
675 | 


--------------------------------------------------------------------------------
/仓耳与墨 W03.TTF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oorangeeee/Message_Analysis/2dc1283915f2905e401c4e407f1985bae47c32a9/仓耳与墨 W03.TTF


--------------------------------------------------------------------------------
/使用教程/使用教程.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oorangeeee/Message_Analysis/2dc1283915f2905e401c4e407f1985bae47c32a9/使用教程/使用教程.pdf


--------------------------------------------------------------------------------
/使用教程/词云形状自定义教程.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oorangeeee/Message_Analysis/2dc1283915f2905e401c4e407f1985bae47c32a9/使用教程/词云形状自定义教程.pdf


--------------------------------------------------------------------------------