├── .gitignore ├── LICENSE ├── README.md ├── calenda.py ├── console.py ├── data_process.py ├── draw.py ├── getMsg.py ├── icon ├── icon.ico └── icon.png ├── logo ├── logo.png └── logo2.png ├── main.py ├── save.py ├── show_gui.py ├── solve.py ├── 仓耳与墨 W03.TTF └── 使用教程 ├── 使用教程.pdf └── 词云形状自定义教程.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | .idea/ 161 | /data/ 162 | /聊天记录/ 163 | /test.py 164 | /api/ 165 | /橙子的聊天记录分析器.zip 166 | /用户数据/ 167 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 OOrangeeee 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 微信聊天记录分析 2 | 3 | >写在前面:本程序是本人(在读大二学生)利用课余时间三天内写完的送给我女朋友的小惊喜,碍于课业压力,时间不足和能力有限本程序还有很多漏洞和缺点,任何问题都可以联系我(联系方式写在代码头了),有时间我会更新的,还希望大佬勿喷。 4 | 5 | **详细的使用教程都在"使用教程"文件夹中。** 6 | 7 | ## 软件概述 8 | 9 | >本软件专注于对已获取的微信聊天数据进行深入分析。 10 | 为了使用本软件的数据分析功能,您首先需要通过专业人士开发的“留痕”软件来爬取微信聊天记录并导出为csv格式。最后的板块内我分享了留痕的一个版本供大家使用并且分享了其仓库链接,请大家多多支持大佬的工作。一旦完成了微信数据的爬取和解密,便可将这些数据导入到我们的软件中,进行全面而详细的分析。 11 | 我的软件设计用以洞察和解读微信聊天记录,为用户提供深入的数据洞见。(特别适合情侣之间分析聊天记录 12 | 13 | ***注:如果想直接使用程序,请直接跳到最后,查看“运行程序”板块。爬取方式也在那里。生成的图片直接查看或许会失真(尤其是总体的热力图,plt的锅),请在文件夹中查看,是完全正常的。(图片的保存路径是./用户数据/data/src,用户数据文件夹就在exe的同级目录里)*** 14 | 15 | 本软件致力于保障用户隐私,所有源代码均可供查阅,确保用户信息及聊天记录的安全性和保密性。我们以橙子和柠檬的聊天数据为例,展现了本软件的数据分析能力。若需更改分析对象,用户可在源代码中搜索并替换相应的名字。本软件提供的数据分析功能包括: 16 | 17 | 1. **聊天记录词频分析**: 18 | 1. 对两人聊天记录中的高频词进行统计,生成三张精美的词云图(橙子一张,柠檬一张,以及二人共同的一张)。并且可以自定义词云形状。 19 | 2. **Emoji使用分析**: 20 | 1. 统计两人最常用的emoji,并通过柱状图进行比较展示。 21 | 3. **表情包使用分析**: 22 | 1. 分类统计两人使用的表情包种类,并以柱状图形式展示每人的种类数。 23 | 2. 比较两人使用表情包的数量,并通过柱状图进行展示。 24 | 3. 分析并对比两人使用不同表情包的频率,以柱状图形式展示。 25 | 4. **聊天热度分析**: 26 | 1. 生成每月的日历形式热力图,分别针对橙子和柠檬,以及他们共同的记录。 27 | 2. 统计并展示同一时期内的聊天热度变化,以折线图形式展示。 28 | 5. **聊天时间分析**: 29 | 1. 分析一天24小时内两人的聊天活跃度,并生成相应的热力图(橙子一张,柠檬一张,以及共同的一张)。 30 | 6. **聊天情感分析**: 31 | 1. 利用百度智能云的语句情感分析API,对聊天记录进行情感分析,并生成三张饼图(橙子一张,柠檬一张,以及共同的一张)。 32 | 33 | 所有分析结果和数据均可在根目录下的**用户数据**文件夹中查阅。这些功能旨在为用户提供深入、全面的聊天数据分析,帮助用户更好地理解和存储宝贵的交流信息。 34 | 35 | ## 更新说明 36 | 37 | **在2.0版本的重大更新后,程序现已实现全面优化,完美适配各类聊天记录。用户能够灵活地输入自己的姓名,并分析任意时间段的聊天内容,不再受限于之前的四个月时间限制。此外,程序还提供了便捷的情感分析功能选择,用户可根据需要轻松输入API相关信息进行分析。总的来说,经过全面升级的2.0版本,终于成为了一个真正优秀的产品。这一过程告诫我们,优化代码的重要性不容忽视,希望大家能从中汲取教训,千万不要把代码写死。** 38 | 39 | **由于大家很喜欢本程序,故本人抽出时间更新了3.0版本。本版本完全实现程序的可视化(终于不再是该死的命令行了),并且大家可以自由选择词云的形状。** 40 | 41 | ## 环境配置 42 | 43 | 关于软件环境的配置,我有几点建议想与你分享。首先,我不建议你完全依赖导出的环境配置来搭建自己的系统。在实际操作中,**根据自己的需求逐步补充所缺少的组件往往是一个更为高效和灵活的方法**。原因很简单:每个人的需求和环境都是独一无二的,盲目跟随他人的配置可能并不适合你。 44 | 45 | 至于为什么不提供一个自动化的环境配置程序,诚实地说,这主要是因为我有些懒惰。编写这样的程序需要花费大量的时间和精力,而且往往难以满足所有人的需求。 46 | 47 | 如果你不想深入自定义名称、月份等参数(这需要修改代码,尽管只是几个参数,但前提是你需要有足够的专业知识来阅读和理解代码),你可以选择使用我预先打包好的程序。我已经尽可能地将其配置得简单易用,希望能够满足你的基本需求。 48 | 49 | 感谢你的理解和支持!希望我的建议对你有所帮助。 50 | 51 | ``` bash 52 | # This file may be used to create an environment using: 53 | # $ conda create --name --file 54 | # platform: win-64 55 | altgraph=0.17.3=py39haa95532_0 56 | blas=1.0=mkl 57 | bottleneck=1.3.5=py39h080aedc_0 58 | brotli=1.0.9=h2bbff1b_7 59 | brotli-bin=1.0.9=h2bbff1b_7 60 | ca-certificates=2023.12.12=haa95532_0 61 | contourpy=1.2.0=py39h59b6b97_0 62 | cycler=0.11.0=pyhd3eb1b0_0 63 | et_xmlfile=1.1.0=py39haa95532_0 64 | fonttools=4.25.0=pyhd3eb1b0_0 65 | freetype=2.12.1=ha860e81_0 66 | future=0.18.3=py39haa95532_0 67 | giflib=5.2.1=h8cc25b3_3 68 | icc_rt=2022.1.0=h6049295_2 69 | icu=73.1=h6c2663c_0 70 | importlib_resources=6.1.1=py39haa95532_1 71 | intel-openmp=2023.1.0=h59b6b97_46320 72 | joblib=1.2.0=py39haa95532_0 73 | jpeg=9e=h2bbff1b_1 74 | kiwisolver=1.4.4=py39hd77b12b_0 75 | krb5=1.20.1=h5b6d351_0 76 | lerc=3.0=hd77b12b_0 77 | libbrotlicommon=1.0.9=h2bbff1b_7 78 | libbrotlidec=1.0.9=h2bbff1b_7 79 | libbrotlienc=1.0.9=h2bbff1b_7 80 | libclang=14.0.6=default_hb5a9fac_1 81 | libclang13=14.0.6=default_h8e68704_1 82 | libdeflate=1.17=h2bbff1b_1 83 | libpng=1.6.39=h8cc25b3_0 84 | libpq=12.15=h906ac69_1 85 | libtiff=4.5.1=hd77b12b_0 86 | libwebp=1.3.2=hbc33d0d_0 87 | libwebp-base=1.3.2=h2bbff1b_0 88 | lz4-c=1.9.4=h2bbff1b_0 89 | matplotlib=3.8.0=py39haa95532_0 90 | matplotlib-base=3.8.0=py39h4ed8f06_0 91 | mkl=2023.1.0=h6b88ed4_46358 92 | mkl-service=2.4.0=py39h2bbff1b_1 93 | mkl_fft=1.3.8=py39h2bbff1b_0 94 | mkl_random=1.2.4=py39h59b6b97_0 95 | munkres=1.1.4=py_0 96 | numexpr=2.8.7=py39h2cd9be0_0 97 | numpy=1.26.3=py39h055cbcc_0 98 | numpy-base=1.26.3=py39h65a83cf_0 99 | openjpeg=2.4.0=h4fc8c34_0 100 | openpyxl=3.0.10=py39h2bbff1b_0 101 | openssl=3.0.12=h2bbff1b_0 102 | packaging=23.1=py39haa95532_0 103 | pandas=1.2.4=pypi_0 104 | pefile=2022.5.30=py39haa95532_0 105 | pillow=10.0.1=py39h045eedc_0 106 | pip=23.3.1=py39haa95532_0 107 | ply=3.11=py39haa95532_0 108 | pyinstaller=5.13.2=py39h2bbff1b_0 109 | pyinstaller-hooks-contrib=2022.14=py39haa95532_0 110 | pyparsing=3.0.9=py39haa95532_0 111 | pyqt=5.15.10=py39hd77b12b_0 112 | pyqt5-sip=12.13.0=py39h2bbff1b_0 113 | python=3.9.18=h1aa4202_0 114 | python-dateutil=2.8.2=pyhd3eb1b0_0 115 | python-tzdata=2023.3=pyhd3eb1b0_0 116 | pytz=2023.3.post1=py39haa95532_0 117 | pywin32=305=py39h2bbff1b_0 118 | pywin32-ctypes=0.2.0=py39haa95532_1000 119 | qt-main=5.15.2=h19c9488_10 120 | scikit-learn=1.3.0=py39h4ed8f06_0 121 | scipy=1.11.4=py39h309d312_0 122 | seaborn=0.12.2=py39haa95532_0 123 | setuptools=68.2.2=py39haa95532_0 124 | sip=6.7.12=py39hd77b12b_0 125 | six=1.16.0=pyhd3eb1b0_1 126 | sqlite=3.41.2=h2bbff1b_0 127 | tbb=2021.8.0=h59b6b97_0 128 | threadpoolctl=2.2.0=pyh0d69192_0 129 | tk=8.6.12=h2bbff1b_0 130 | tomli=2.0.1=py39haa95532_0 131 | tornado=6.3.3=py39h2bbff1b_0 132 | tzdata=2023d=h04d1e81_0 133 | vc=14.2=h21ff451_1 134 | vs2015_runtime=14.27.29016=h5e58377_2 135 | wheel=0.41.2=py39haa95532_0 136 | wordcloud=1.9.2=py39h2bbff1b_0 137 | xz=5.4.5=h8cc25b3_0 138 | zipp=3.17.0=py39haa95532_0 139 | zlib=1.2.13=h8cc25b3_0 140 | zstd=1.5.5=hd43e919_0 141 | absl-py==0.15.0 142 | altgraph @ file:///C:/b/abs_f2edualeyv/croot/altgraph_1670426107695/work 143 | astunparse==1.6.3 144 | baidu-aip==4.16.13 145 | Bottleneck @ file:///C:/Windows/Temp/abs_3198ca53-903d-42fd-87b4-03e6d03a8381yfwsuve8/croots/recipe/bottleneck_1657175565403/work 146 | cachetools==5.3.2 147 | certifi==2023.11.17 148 | chardet==3.0.4 149 | charset-normalizer==3.3.2 150 | clang==5.0 151 | click==8.1.7 152 | colorama==0.4.6 153 | contourpy @ file:///C:/b/abs_853rfy8zse/croot/contourpy_1700583617587/work 154 | cycler @ file:///tmp/build/80754af9/cycler_1637851556182/work 155 | et-xmlfile==1.1.0 156 | filelock==3.9.0 157 | fire==0.5.0 158 | flatbuffers==1.12 159 | fonttools==4.25.0 160 | fsspec==2023.4.0 161 | future @ file:///C:/b/abs_3dcibf18zi/croot/future_1677599891380/work 162 | gast==0.4.0 163 | google-auth==2.25.2 164 | google-auth-oauthlib==1.2.0 165 | google-pasta==0.2.0 166 | grpcio==1.60.0 167 | h11==0.9.0 168 | h2==3.2.0 169 | h5py==3.1.0 170 | hpack==3.0.0 171 | hstspreload==2024.1.5 172 | httpcore==0.9.1 173 | httpx==0.13.3 174 | hyperframe==5.2.0 175 | icon-font-to-png==0.4.1 176 | idna==2.10 177 | importlib-metadata==7.0.0 178 | importlib-resources @ file:///C:/b/abs_d0dmp77t95/croot/importlib_resources-suite_1704281892795/work 179 | install==1.3.5 180 | jieba==0.42.1 181 | Jinja2==3.1.2 182 | joblib==1.3.2 183 | Keras-Preprocessing==1.1.2 184 | kiwisolver @ file:///C:/b/abs_88mdhvtahm/croot/kiwisolver_1672387921783/work 185 | libclang==16.0.6 186 | libretranslatepy==2.1.1 187 | lxml==5.1.0 188 | Markdown==3.5.1 189 | MarkupSafe==2.1.3 190 | matplotlib @ file:///C:/b/abs_e26vnvd5s1/croot/matplotlib-suite_1698692153288/work 191 | mkl-fft @ file:///C:/b/abs_19i1y8ykas/croot/mkl_fft_1695058226480/work 192 | mkl-random @ file:///C:/b/abs_edwkj1_o69/croot/mkl_random_1695059866750/work 193 | mkl-service==2.4.0 194 | mpmath==1.3.0 195 | munkres==1.1.4 196 | networkx==3.0 197 | numexpr @ file:///C:/b/abs_5fucrty5dc/croot/numexpr_1696515448831/work 198 | numpy @ file:///C:/b/abs_16b2j7ad8n/croot/numpy_and_numpy_base_1704311752418/work/dist/numpy-1.26.3-cp39-cp39-win_amd64.whl#sha256=02e606e23ca31bb00a40d147fd1ce4dd7d241395346a4196592d5abe54a333bc 199 | oauthlib==3.2.2 200 | openpyxl==3.0.10 201 | opt-einsum==3.3.0 202 | packaging @ file:///C:/b/abs_28t5mcoltc/croot/packaging_1693575224052/work 203 | palettable==3.3.3 204 | pandas==1.2.4 205 | pefile @ file:///C:/b/abs_feg_7trsni/croot/pefile_1670877329726/work 206 | Pillow==9.3.0 207 | ply==3.11 208 | pyasn1==0.5.1 209 | pyasn1-modules==0.3.0 210 | pyinstaller @ file:///C:/b/abs_b94gi_3vjm/croot/pyinstaller_1703109616045/work 211 | pyinstaller-hooks-contrib @ file:///C:/b/abs_c2hemrb3nh/croot/pyinstaller-hooks-contrib_1670877320457/work 212 | pyparsing @ file:///C:/Users/BUILDE~1/AppData/Local/Temp/abs_7f_7lba6rl/croots/recipe/pyparsing_1661452540662/work 213 | PyQt5==5.15.10 214 | PyQt5-sip @ file:///C:/b/abs_c0pi2mimq3/croot/pyqt-split_1698769125270/work/pyqt_sip 215 | python-dateutil==2.8.2 216 | pytz==2023.3.post1 217 | pywin32==305.1 218 | pywin32-ctypes @ file:///C:/ci/pywin32-ctypes_1607553594546/work 219 | requests==2.31.0 220 | requests-oauthlib==1.3.1 221 | rfc3986==1.5.0 222 | rsa==4.9 223 | scikit-learn==1.3.2 224 | scipy==1.11.4 225 | seaborn @ file:///C:/b/abs_68ltdkoyoo/croot/seaborn_1673479199997/work 226 | sip @ file:///C:/b/abs_edevan3fce/croot/sip_1698675983372/work 227 | six==1.15.0 228 | sniffio==1.3.0 229 | stylecloud==0.5.2 230 | sympy==1.12 231 | tensorboard==2.15.1 232 | tensorboard-data-server==0.7.2 233 | tensorflow-estimator==2.15.0 234 | tensorflow-gpu==2.6.0 235 | tensorflow-io-gcs-filesystem==0.31.0 236 | termcolor==1.1.0 237 | threadpoolctl==3.2.0 238 | tinycss==0.4 239 | tomli @ file:///C:/Windows/TEMP/abs_ac109f85-a7b3-4b4d-bcfd-52622eceddf0hy332ojo/croots/recipe/tomli_1657175513137/work 240 | torch==2.1.2+cu118 241 | torchaudio==2.1.2+cu118 242 | torchvision==0.16.2+cu118 243 | tornado @ file:///C:/b/abs_0cbrstidzg/croot/tornado_1696937003724/work 244 | translate==3.6.1 245 | typing-extensions==3.7.4.3 246 | tzdata==2023.3 247 | urllib3==2.1.0 248 | Werkzeug==3.0.1 249 | wordcloud @ file:///C:/b/abs_66ccn47hik/croot/wordcloud_1687301655958/work 250 | wrapt==1.12.1 251 | zipp==3.17.0 252 | ``` 253 | 254 | 如需配置环境可参考上方文档。 255 | 256 | ## 运行代码 257 | 258 | clone后在clone的文件夹下打开cmd,输入以下代码: 259 | 260 | ```bash 261 | python 262 | main.py 263 | ``` 264 | 265 | 随后根据软件提示使用即可。 266 | 267 | ## 运行程序 268 | 269 | 如果有不懂代码的朋友想使用此软件,本软件也贴心的准备好了打包好的程序,只需要前往此链接,下载压缩包后解压根目录下的**橙子的聊天记录分析器**压缩包,进入**橙子的聊天记录分析器**文件夹中启动**橙子的聊天记录分析器.exe**即可。 270 | 271 | **请注意,此百度网盘链接里还有完整的使用教程,大家可以阅读一下** 272 | 273 | 为了帮助大家使用,其中还包含了大佬的爬取微信聊天记录的软件(留痕),请大家多多支持大佬的工作。 274 | 275 | ```bash 276 | 链接:https://pan.baidu.com/s/1zRRj7sT3snTyQsQFfwD_RA?pwd=0714 277 | 提取码:0714 278 | --来自百度网盘超级会员V6的分享 279 | ``` 280 | 281 | 欢迎大家使用此软件分析聊天记录。 282 | 283 | 另外为了方便大家爬取解密微信聊天记录在此附上大佬的git仓库,用于爬取数据: 284 | 285 | ```bash 286 | https://github.com/LC044/WeChatMsg 287 | ``` 288 | 289 | 2024.1.20 橙子 290 | 291 | >2024.1.25更新 292 | >2024.1.26更新 293 | >2024.1.27更新 294 | >2024.1.30更新 295 | >2024.2.3更新 296 | >2024.2.7更新 -------------------------------------------------------------------------------- /calenda.py: -------------------------------------------------------------------------------- 1 | # 最后编辑: 2 | # 晋晨曦 2024.1.26 15.46 3 | # qq:2950171570 4 | # email:Jin0714@outlook.com 回复随缘 5 | from calendar import monthrange 6 | from numpy import ones 7 | 8 | 9 | def generate_calendar(year, month): 10 | """ 11 | 返回日历样式的numpy数组 12 | :param year: 年份 13 | :param month: 月份 14 | :return: numpy数组 15 | """ 16 | first_day_weekday, month_days = monthrange(year, month) 17 | rows_needed = ((first_day_weekday + month_days - 1) // 7) + 1 18 | calendar_array = ones((rows_needed, 7), dtype=int) 19 | day_counter = 1 20 | 21 | for week in range(rows_needed): 22 | for day in range(7): 23 | if week > 0 or day >= first_day_weekday: 24 | if day_counter <= month_days: 25 | calendar_array[week][day] = 0 26 | day_counter += 1 27 | 28 | return calendar_array 29 | 30 | 31 | def get_month_dates(year1, month1, year2, month2): 32 | """ 33 | 返回时间范围 34 | :param year1: 开始年 35 | :param month1: 开始月 36 | :param year2: 结束年 37 | :param month2: 结束月 38 | :return: 时间范围 39 | """ 40 | first_date = f"{year1}-{str(month1).zfill(2)}-01" 41 | 42 | last_day = monthrange(year2, month2)[1] 43 | last_date = f"{year2}-{str(month2).zfill(2)}-{last_day}" 44 | 45 | return first_date, last_date 46 | -------------------------------------------------------------------------------- /console.py: -------------------------------------------------------------------------------- 1 | from ctypes import WinDLL 2 | 3 | 4 | def no_con(): 5 | kernel32 = WinDLL("kernel32") 6 | user32 = WinDLL("user32") 7 | HWND = kernel32.GetConsoleWindow() 8 | if HWND != 0: 9 | user32.ShowWindow(HWND, 0) 10 | -------------------------------------------------------------------------------- /data_process.py: -------------------------------------------------------------------------------- 1 | # 最后编辑: 2 | # 晋晨曦 2024.1.26 15:46 3 | # qq:2950171570 4 | # email:Jin0714@outlook.com 回复随缘 5 | from re import sub 6 | 7 | 8 | def process_data(df): 9 | """ 10 | 处理数据 11 | :param df:原始数据 12 | :return:处理好的数据 13 | """ 14 | 15 | # 提取数据 16 | columns = ["StrTime", "StrContent", "IsSender"] 17 | df = df[columns].copy() 18 | df.rename(columns={"StrTime": "time", "StrContent": "data"}, inplace=True) 19 | 20 | # 清洗数据 21 | df = df.dropna(subset=["data"]) 22 | df_x = df.copy() 23 | df["data"] = df["data"].astype(str) 24 | 25 | # 分类数据 26 | is_sender_1 = df["IsSender"] == 1 27 | is_sender_0 = df["IsSender"] == 0 28 | j_df = df[is_sender_1].copy() 29 | n_df = df[is_sender_0].copy() 30 | j_df = j_df.drop("IsSender", axis=1) 31 | n_df = n_df.drop("IsSender", axis=1) 32 | df_x = df_x.drop("IsSender", axis=1) 33 | 34 | df_x = df_x[df_x["data"].apply(not_start_with_msg)] 35 | df_x["data"] = df_x["data"].apply(remove_bracketed_text_and_count_all) 36 | df_x = df_x[df_x["data"].apply(len) > 0] 37 | 38 | j_df["time"] = j_df["time"].str.replace("/", "-") 39 | n_df["time"] = n_df["time"].str.replace("/", "-") 40 | df_x["time"] = df_x["time"].str.replace("/", "-") 41 | 42 | j_df = j_df.reset_index(drop=True) 43 | n_df = n_df.reset_index(drop=True) 44 | df_x = df_x.reset_index(drop=True) 45 | 46 | return j_df, n_df, df_x 47 | 48 | 49 | def not_start_with_msg(value): 50 | """ 51 | 判断是否不以<开头 52 | :param value: 文本 53 | :return: 不以<开头为true,否则flase 54 | """ 55 | return not value.startswith("<") 56 | 57 | 58 | def remove_bracketed_text_and_count_all(s): 59 | """ 60 | 删除所有聊天记录[]中文字 61 | :param s:语句 62 | :return:删除后的语句 63 | """ 64 | return sub(r"\[.*?\]", "", s) 65 | -------------------------------------------------------------------------------- /draw.py: -------------------------------------------------------------------------------- 1 | # 最后编辑: 2 | # 晋晨曦 2024.2.2 17.13 3 | # qq:2950171570 4 | # email:Jin0714@outlook.com 回复随缘 5 | import matplotlib.pyplot as plt 6 | from pandas import Series 7 | from stylecloud import gen_stylecloud 8 | from os import path 9 | from os import remove 10 | from seaborn import heatmap 11 | from math import ceil 12 | 13 | 14 | class draw_data: 15 | def __init__(self, name1, name2): 16 | """ 17 | 构造函数,初始化一些可能需要的属性。 18 | """ 19 | self.name1 = name1 20 | self.name2 = name2 21 | pass 22 | 23 | def __str__(self): 24 | """ 25 | 字符串表示,用于打印对象时提供有用的信息。 26 | """ 27 | return "draw_data类实例,用于可视化数据" 28 | pass 29 | 30 | # 绘制emoji 31 | 32 | def split_dict(self, original_dict, size): 33 | """ 34 | 划分字典 35 | :param original_dict: 初始字典 36 | :param size: 划分数量 37 | :return: 一个列表,包含划分后的字典 38 | """ 39 | keys = list(original_dict.keys()) 40 | split_dicts = [] 41 | 42 | for i in range(0, len(keys), size): 43 | subset_keys = keys[i : i + size] 44 | new_dict = {key: original_dict[key] for key in subset_keys} 45 | split_dicts.append(new_dict) 46 | 47 | return split_dicts 48 | 49 | def draw_emoji(self, dict1, dict2, max_count): 50 | """ 51 | 批量画emoji图的驱动函数 52 | :param dict1:name1emoji字典 53 | :param dict2:name2emoji字典 54 | :return:无 55 | """ 56 | length = len(dict1) 57 | num_20 = length // 20 58 | if length % 20 != 0: 59 | num_20 += 1 60 | j = self.split_dict(dict1, 20) 61 | n = self.split_dict(dict2, 20) 62 | for x in range(num_20): 63 | self.draw_emoji_tool(j[x], n[x], x, max_count) 64 | 65 | def draw_emoji_tool(self, dict1, dict2, num, max_count): 66 | """ 67 | 画emoji图的工作函数 68 | :param dict1:name1emoji字典 69 | :param dict2:name2emoji字典 70 | :param num:第几个图 71 | :return:无 72 | """ 73 | # 提取键和值 74 | 75 | keys = list(dict1.keys()) 76 | values1 = list(dict1.values()) 77 | values2 = list(dict2.values()) 78 | 79 | plt.figure(figsize=(10, 6)) 80 | 81 | x = range(len(keys)) 82 | width = 0.35 83 | 84 | plt.ylim(0, max_count) 85 | plt.bar( 86 | [i - width / 2 for i in x], 87 | values1, 88 | width=width, 89 | label=self.name1, 90 | color="orange", 91 | edgecolor="black", 92 | ) 93 | plt.bar( 94 | [i + width / 2 for i in x], 95 | values2, 96 | width=width, 97 | label=self.name2, 98 | color="yellow", 99 | edgecolor="black", 100 | ) 101 | 102 | title = "emoji统计!num " + str(num + 1) + " !" 103 | plt.xlabel("emoji类型") 104 | plt.ylabel("频率") 105 | plt.title(title) 106 | plt.xticks(x, keys) 107 | plt.legend() 108 | 109 | filepath = "./用户数据/data/src/emoji/" + title + ".png" 110 | plt.savefig(filepath, format="png") 111 | 112 | plt.tight_layout() 113 | 114 | # 绘制表情包 115 | 116 | def union_bqb(self, j_df, n_df): 117 | """ 118 | 扩写df 119 | :param j_df: 晋晨曦df 120 | :param n_df: 宁静df 121 | :return: 返回结果 122 | """ 123 | data_union = Series(list(set(j_df["data"]).union(set(n_df["data"])))) 124 | 125 | df1_extended = data_union.to_frame(name="data").merge( 126 | j_df, on="data", how="left" 127 | ) 128 | df2_extended = data_union.to_frame(name="data").merge( 129 | n_df, on="data", how="left" 130 | ) 131 | 132 | df1_extended["count"].fillna(0, inplace=True) 133 | df2_extended["count"].fillna(0, inplace=True) 134 | 135 | df1_extended.sort_values(by="data", inplace=True) 136 | df2_extended.sort_values(by="data", inplace=True) 137 | return df1_extended, df2_extended 138 | 139 | def split_dataframe(self, df, n_parts): 140 | """ 141 | 划分表情包 142 | :param df:划分对象 143 | :param n_parts:划分几部分 144 | :return:划分后的df列表 145 | """ 146 | part_size = len(df) // n_parts 147 | 148 | split_dfs = [df[i * part_size : (i + 1) * part_size] for i in range(n_parts)] 149 | 150 | return split_dfs 151 | 152 | def draw_bqb(self, bqb_j, bqb_n, max_count): 153 | """ 154 | 画表情包的图 155 | :param bqb_j: name1表情包df 156 | :param bqb_n: name2表情包df 157 | :return: 无 158 | """ 159 | # 画种类 160 | self.draw_bqb_kinds(bqb_j, bqb_n) 161 | # 画数量 162 | self.draw_bqb_count(bqb_j, bqb_n, max_count) 163 | # 细分画 164 | bqb_j, bqb_n = self.union_bqb(bqb_j, bqb_n) 165 | count = len(bqb_n) // 28 166 | if len(bqb_n) % 28 != 0: 167 | count += 1 168 | dfs_j = self.split_dataframe(bqb_j, count) 169 | dfs_l = self.split_dataframe(bqb_n, count) 170 | for i in range(count): 171 | self.draw_bqb_details(dfs_j[i], dfs_l[i], i, max_count) 172 | 173 | def draw_bqb_kinds(self, df1, df2): 174 | """ 175 | 画图表情包种类 176 | :param df1:name1df 177 | :param df2:name2df 178 | :return:无 179 | """ 180 | unique_count_df1 = df1["data"].nunique() 181 | unique_count_df2 = df2["data"].nunique() 182 | 183 | labels = [self.name1, self.name2] 184 | counts = [unique_count_df1, unique_count_df2] 185 | 186 | plt.figure(figsize=(10, 6)) 187 | 188 | plt.bar(labels, counts, color=["orange", "yellow"], edgecolor="black") 189 | 190 | title = "表情包种数统计!" 191 | plt.title(title) 192 | plt.xlabel("对象") 193 | plt.ylabel("用的表情包种数") 194 | filepath = "./用户数据/data/src/表情包/" + title + ".png" 195 | plt.savefig(filepath, format="png") 196 | 197 | def draw_bqb_count(self, df1, df2, max_count): 198 | """ 199 | 表情包数目 200 | :param df1:name1数目 201 | :param df2:name2数目 202 | :return: 203 | """ 204 | total_count_df1 = df1["count"].sum() 205 | total_count_df2 = df2["count"].sum() 206 | 207 | labels = [self.name1, self.name2] 208 | counts = [total_count_df1, total_count_df2] 209 | 210 | plt.figure(figsize=(10, 6)) 211 | 212 | plt.bar(labels, counts, color=["orange", "yellow"], edgecolor="black") 213 | 214 | title = "表情包数量统计!" 215 | plt.title(title) 216 | plt.xlabel("对象") 217 | plt.ylabel("用的表情包数量") 218 | filepath = "./用户数据/data/src/表情包/" + title + ".png" 219 | plt.savefig(filepath, format="png") 220 | 221 | def draw_bqb_details(self, df1, df2, num, max_count): 222 | """ 223 | 画表情包图 224 | :param df1: name1df子集 225 | :param df2: name2df子集 226 | :param num: 第几个 227 | :return: 无 228 | """ 229 | index_data = list(df1["data"]) 230 | 231 | counts_df1 = { 232 | data: df1[df1["data"] == data]["count"].sum() for data in index_data 233 | } 234 | counts_df2 = { 235 | data: df2[df2["data"] == data]["count"].sum() for data in index_data 236 | } 237 | 238 | x_labels = range(1, len(index_data) + 1) 239 | 240 | counts1 = [counts_df1.get(data, 0) for data in index_data] 241 | counts2 = [counts_df2.get(data, 0) for data in index_data] 242 | 243 | plt.figure(figsize=(20, 6)) 244 | 245 | plt.ylim(0, max_count) 246 | 247 | plt.bar( 248 | [x + 0.05 for x in x_labels], 249 | counts1, 250 | color="orange", 251 | width=0.3, 252 | label=self.name1, 253 | edgecolor="black", 254 | ) 255 | plt.bar( 256 | [x + 0.35 for x in x_labels], 257 | counts2, 258 | color="yellow", 259 | width=0.3, 260 | label=self.name2, 261 | edgecolor="black", 262 | ) 263 | 264 | title = "不同表情包使用频率 num " + str(num + 1) + " !" 265 | plt.title(title) 266 | plt.xlabel("表情包编号") 267 | plt.ylabel("频率") 268 | 269 | plt.xticks([x + 0.2 for x in x_labels], [str(x) for x in x_labels]) 270 | 271 | plt.legend() 272 | 273 | filepath = "./用户数据/data/src/表情包/" + title + ".png" 274 | plt.savefig(filepath, format="png") 275 | 276 | # 绘制词云 277 | 278 | def draw_word_cloud(self, df, shape, mode): 279 | """ 280 | 画词云 281 | :param df: 数据 282 | :param shape: 形状 283 | :param mode: 模式 284 | :return: 无 285 | """ 286 | if len(df) >= 200: 287 | df_using = df.head(200) 288 | else: 289 | df_using = df 290 | f_path = "temp.csv" 291 | df_using.to_csv(f_path, index=False) 292 | output_path = "./用户数据/data/src/word/" + mode + "词云.png" 293 | gen_stylecloud( 294 | file_path=f_path, 295 | size=1920, 296 | icon_name=shape, 297 | palette="colorbrewer.diverging.Spectral_11", 298 | background_color="black", 299 | max_words=len(df_using), 300 | max_font_size=120, 301 | font_path="仓耳与墨 W03.TTF", 302 | output_name=output_path, 303 | ) 304 | if path.exists(f_path): 305 | remove(f_path) 306 | else: 307 | print("完蛋") 308 | 309 | # 绘制热力图和变化趋势 310 | 311 | def draw_heatmap_all(self, rili_dfs, title, masks, length, months, max_count): 312 | """ 313 | 统调子图函数 314 | :param rili_dfs: 日历df 315 | :param title: 标题 316 | :param masks: 遮罩 317 | :return: 无 318 | """ 319 | for i in range(0, length): 320 | self.draw_heatmap_small(rili_dfs[i], title, masks[i], months[i], max_count) 321 | pass 322 | 323 | def draw_heatmap_small(self, rili_df, title, mask, month, max_count): 324 | """ 325 | 画小图像 326 | :param rili_df:日历 327 | :param title:标题 328 | :param mask:遮罩 329 | :param month:月份 330 | :return: 331 | """ 332 | year = month[0] 333 | time = month[1] 334 | plt.figure(figsize=(5, 5)) 335 | plt.title(title + "的聊天热力图 " + str(year) + " " + str(time) + "月 版!!") 336 | data = rili_df 337 | data = data.fillna(0) 338 | data = data.astype(int) 339 | heatmap( 340 | data=data, 341 | mask=mask, 342 | vmax=max_count, 343 | vmin=0, 344 | cmap="YlOrRd", 345 | linewidths=0.5, 346 | linecolor="white", 347 | cbar=True, 348 | cbar_kws={"label": "信息条数"}, 349 | ) 350 | counter = 1 351 | for y in range(data.shape[0]): 352 | for x in range(data.shape[1]): 353 | if not mask[y, x]: # 如果格子未被遮罩 354 | plt.text( 355 | x + 0.5, 356 | y + 0.5, 357 | str(counter), 358 | ha="center", 359 | va="center", 360 | color="black", 361 | ) 362 | counter += 1 363 | plt.yticks([]) 364 | plt.tight_layout() 365 | filepath = ( 366 | "./用户数据/data/src/热力图/" 367 | + title 368 | + "的聊天热力图 " 369 | + str(year) 370 | + str(time) 371 | + "月 版!!.png" 372 | ) 373 | plt.savefig(filepath, format="png") 374 | 375 | def draw_heatmap_big(self, rili_dfs, title, masks, length, months, max_count): 376 | """ 377 | 画热力图总览 378 | :param rili_dfs: 日历 379 | :param title: 标题 380 | :param masks: 遮罩 381 | :param length: 数量 382 | :param months: 月份 383 | :param max_count: 最大计数 384 | """ 385 | rows = ceil(length / 4) 386 | cols = 4 387 | fig, axes = plt.subplots(rows, cols, figsize=(10, 5 * rows)) 388 | 389 | if rows == 1 or cols == 1: 390 | axes = axes.reshape(rows, cols) 391 | 392 | for i in range(rows * cols): 393 | row = i // cols 394 | col = i % cols 395 | ax = axes[row, col] 396 | 397 | # 如果 i 小于 length,则绘制子图,否则隐藏该子图 398 | if i < length: 399 | data = rili_dfs[i] 400 | data = data.fillna(0) 401 | data = data.astype(int) 402 | heatmap( 403 | data=data, 404 | mask=masks[i], 405 | vmax=max_count, 406 | vmin=0, 407 | cmap="YlOrRd", 408 | linewidths=0.5, 409 | linecolor="white", 410 | ax=ax, 411 | cbar=False, 412 | ) 413 | counter = 1 414 | for y in range(data.shape[0]): 415 | for x in range(data.shape[1]): 416 | if not masks[i][y, x]: # 如果格子未被遮罩 417 | ax.text( 418 | x + 0.5, 419 | y + 0.5, 420 | str(counter), 421 | ha="center", 422 | va="center", 423 | color="black", 424 | ) 425 | counter += 1 426 | ax.set_title(f"{months[i][0]} 年 {months[i][1]} 月") 427 | ax.set_yticklabels([]) 428 | ax.set_aspect("equal") 429 | else: 430 | ax.axis("off") 431 | 432 | fig.text(0.5, 0.01, title + " 的聊天热力图总览!!", ha="center", fontsize=15) 433 | cbar_ax = fig.add_axes([0.2, 0.90, 0.6, 0.07]) 434 | norm = plt.Normalize(vmin=0, vmax=max_count) 435 | sm = plt.cm.ScalarMappable(cmap="YlOrRd", norm=norm) 436 | fig.colorbar(sm, cax=cbar_ax, orientation="horizontal", label="信息条数") 437 | plt.subplots_adjust(bottom=0.1) 438 | filepath = "./用户数据/data/src/热力图/" + title + "聊天热力图总览.png" 439 | plt.savefig(filepath, format="png") 440 | 441 | def draw_heat_how(self, df, title, max_count): 442 | """ 443 | 热度变化趋势 444 | :param df: 数据 445 | :param title: 标题 446 | :return: 无 447 | """ 448 | plt.figure(figsize=(15, 6)) 449 | plt.ylim(0, max_count) 450 | plt.plot(df.index, df["counts"], marker="o") 451 | 452 | plt.title(title + "聊天热度变化趋势") 453 | plt.xlabel("时间") 454 | plt.ylabel("热度") 455 | 456 | # plt.grid(True) 457 | filepath = "./用户数据/data/src/热力图/" + title + "聊天热度变化趋势.png" 458 | plt.savefig(filepath, format="png") 459 | 460 | def draw_time_heat(self, time_df, title, max_count): 461 | """ 462 | 画时间热力图 463 | :param time_df: 图数据 464 | :param title: 标题 465 | :return: 无 466 | """ 467 | plt.figure(figsize=(10, 3)) 468 | plt.title(title + "的聊天时间分布热力图!!") 469 | data = time_df 470 | data = data.fillna(0) 471 | data = data.astype(int) 472 | heatmap( 473 | data=data, 474 | vmax=max_count, 475 | vmin=0, 476 | cmap="YlOrRd", 477 | linewidths=0.5, 478 | linecolor="white", 479 | cbar=True, 480 | cbar_kws={"label": "信息条数", "orientation": "horizontal"}, 481 | ) 482 | # counter = 0 # 初始化计数器 483 | # for y in range(data.shape[0]): 484 | # for x in range(data.shape[1]): 485 | # plt.text( 486 | # x + 0.5, 487 | # y + 0.5, 488 | # str(counter), 489 | # ha="center", 490 | # va="center", 491 | # color="black", 492 | # ) 493 | # counter += 1 494 | plt.yticks([]) 495 | plt.tight_layout() 496 | filepath = "./用户数据/data/src/time/" + title + "的聊天时间分布热力图!!.png" 497 | plt.savefig(filepath, format="png") 498 | 499 | # 情绪分析 500 | 501 | def draw_emo(self, df, mode): 502 | """ 503 | 绘制饼图 504 | :param df: 数据 505 | :param mode: 模式 506 | :return: 无 507 | """ 508 | df["percentage"] = df["counts"] / df["counts"].sum() * 100 509 | colors = {0: "#CFE3C8", 1: "#FFD686", 2: "#E59069"} 510 | fig, ax = plt.subplots(figsize=(10, 10)) 511 | wedges, texts, autotexts = ax.pie( 512 | df["percentage"], 513 | startangle=140, 514 | autopct="%1.1f%%", 515 | colors=[colors[rank] for rank in df["rank"]], 516 | ) 517 | 518 | legend_labels = ["负面", "中立", "正面"] 519 | ax.legend( 520 | wedges, 521 | legend_labels, 522 | title="情绪的颜色对应", 523 | loc="center left", 524 | bbox_to_anchor=(0, 0), 525 | ) 526 | 527 | plt.title(mode + "情绪占比") 528 | plt.axis("equal") 529 | filepath = "./用户数据/data/src/emo/" + mode + "的情绪分析图!!.png" 530 | plt.savefig(filepath, format="png") 531 | -------------------------------------------------------------------------------- /getMsg.py: -------------------------------------------------------------------------------- 1 | # 最后编辑: 2 | # 晋晨曦 2024.1.20 20:28 3 | # qq:2950171570 4 | # email:Jin0714@outlook.com 回复随缘 5 | from pandas import read_csv 6 | 7 | 8 | def read_msg(path): 9 | """ 10 | 读取数据 11 | :param path: 数据位置 12 | :return: df 13 | """ 14 | lemon = read_csv(path) 15 | return lemon 16 | -------------------------------------------------------------------------------- /icon/icon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oorangeeee/Message_Analysis/2dc1283915f2905e401c4e407f1985bae47c32a9/icon/icon.ico -------------------------------------------------------------------------------- /icon/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oorangeeee/Message_Analysis/2dc1283915f2905e401c4e407f1985bae47c32a9/icon/icon.png -------------------------------------------------------------------------------- /logo/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oorangeeee/Message_Analysis/2dc1283915f2905e401c4e407f1985bae47c32a9/logo/logo.png -------------------------------------------------------------------------------- /logo/logo2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oorangeeee/Message_Analysis/2dc1283915f2905e401c4e407f1985bae47c32a9/logo/logo2.png -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # 最后编辑: 2 | # 晋晨曦 2024.2.2 17.13 3 | # qq:2950171570 4 | # email:Jin0714@outlook.com 回复随缘 5 | from matplotlib import rcParams 6 | from os import makedirs 7 | from show_gui import ShowGui 8 | from console import no_con 9 | 10 | 11 | def main(): 12 | """ 13 | 主函数 14 | :return: 无 15 | """ 16 | 17 | # 初始化程序 18 | makedirs("用户数据/api", exist_ok=True) 19 | makedirs("用户数据/data", exist_ok=True) 20 | makedirs("用户数据/data/bqb", exist_ok=True) 21 | makedirs("用户数据/data/emoji", exist_ok=True) 22 | makedirs("用户数据/data/src", exist_ok=True) 23 | makedirs("用户数据/data/word", exist_ok=True) 24 | makedirs("用户数据/data/src/emo", exist_ok=True) 25 | makedirs("用户数据/data/src/emoji", exist_ok=True) 26 | makedirs("用户数据/data/src/time", exist_ok=True) 27 | makedirs("用户数据/data/src/word", exist_ok=True) 28 | makedirs("用户数据/data/src/表情包", exist_ok=True) 29 | makedirs("用户数据/data/src/热力图", exist_ok=True) 30 | 31 | rcParams["font.family"] = str("SimHei") 32 | 33 | sh = ShowGui() 34 | 35 | no_con() 36 | 37 | sh.show() 38 | 39 | 40 | if __name__ == "__main__": 41 | main() 42 | -------------------------------------------------------------------------------- /save.py: -------------------------------------------------------------------------------- 1 | # 最后编辑: 2 | # 晋晨曦 2024.1.20 20:28 3 | # qq:2950171570 4 | # email:Jin0714@outlook.com 回复随缘 5 | class save_data: 6 | def __init__(self): 7 | """ 8 | 构造函数,初始化一些可能需要的属性。 9 | """ 10 | pass 11 | 12 | def __str__(self): 13 | """ 14 | 字符串表示,用于打印对象时提供有用的信息。 15 | """ 16 | return "draw类实例,用于保存数据" 17 | pass 18 | 19 | def save_data_all(self, data, path): 20 | """ 21 | 保存所有数据 22 | :param data: 数据 23 | :param path: 路径 24 | :return: 25 | """ 26 | for d, p in zip(data, path): 27 | d.to_excel(p, index=False) 28 | pass 29 | -------------------------------------------------------------------------------- /show_gui.py: -------------------------------------------------------------------------------- 1 | import tkinter as tk 2 | from tkinter import filedialog 3 | from tkinter import messagebox 4 | from tkinter import PhotoImage 5 | from ctypes import windll 6 | from functools import partial 7 | import getMsg as r 8 | import solve 9 | import data_process as dp 10 | from webbrowser import open_new_tab 11 | 12 | 13 | class ShowGui: 14 | def __init__(self): 15 | try: 16 | windll.shcore.SetProcessDpiAwareness(1) 17 | except (AttributeError, ValueError): 18 | pass 19 | self.root = tk.Tk() 20 | self.root.title("橙子作品之聊天记录分析") 21 | self.root.geometry("1600x1200") 22 | self.root.iconbitmap('./icon/icon.ico') 23 | self.shape1 = "fas fa-dog" 24 | self.shape2 = "far fa-lemon" 25 | self.shape3 = "fas fa-paw" 26 | self.init_pages_start() 27 | pass 28 | 29 | def show(self): 30 | self.page_start.pack(fill="both", expand=True) 31 | self.root.mainloop() 32 | 33 | def init_pages_start(self): 34 | self.page_start = tk.Frame(self.root) 35 | self.center_frame_start = tk.Frame(self.page_start) 36 | self.center_frame_start.place(relx=0.5, rely=0.3, anchor=tk.CENTER) 37 | 38 | self.logo = PhotoImage(file="./logo/logo.png") 39 | self.logo2 = PhotoImage(file="./logo/logo2.png") 40 | tk.Label(self.center_frame_start, image=self.logo).pack() 41 | tk.Label(self.root, image=self.logo2).pack(side="top", anchor="nw") 42 | 43 | tk.Button( 44 | self.center_frame_start, 45 | text="启动程序", 46 | command=self.show_page_choice_path, 47 | font=("SimHei", 16), 48 | ).pack() 49 | 50 | def init_pages_choice_path(self): 51 | self.page_choice_path = tk.Frame(self.root) 52 | self.center_frame_choice_path = tk.Frame(self.page_choice_path) 53 | self.center_frame_choice_path.place(relx=0.5, rely=0.4, anchor=tk.CENTER) 54 | tk.Button( 55 | self.center_frame_choice_path, 56 | text="选择聊天记录", 57 | command=self.load_date, 58 | font=("SimHei", 16), 59 | ).pack() 60 | 61 | def init_pages_load_name(self): 62 | self.page_load_name = tk.Frame(self.root) 63 | self.center_frame_load_name = tk.Frame(self.page_load_name) 64 | self.center_frame_load_name.place(relx=0.5, rely=0.4, anchor=tk.CENTER) 65 | tk.Label( 66 | self.center_frame_load_name, 67 | text="请输入两个分析对象的名字", 68 | font=("SimHei", 16), 69 | ).pack(pady=(0, 20)) 70 | tk.Label( 71 | self.center_frame_load_name, 72 | text="分析对象1的名字(你的名字,如:橙子先生):", 73 | font=("SimHei", 16), 74 | ).pack() 75 | name_entry_one = tk.Entry(self.center_frame_load_name) 76 | name_entry_one.pack(pady=5) 77 | tk.Label( 78 | self.center_frame_load_name, 79 | text="分析对象2的名字(对方的名字,如:柠檬女士):", 80 | font=("SimHei", 16), 81 | ).pack() 82 | name_entry_two = tk.Entry(self.center_frame_load_name) 83 | name_entry_two.pack(pady=5) 84 | tk.Button( 85 | self.center_frame_load_name, 86 | text="确认", 87 | command=partial(self.save_name, name_entry_one, name_entry_two), 88 | font=("SimHei", 16), 89 | ).pack(pady=10) 90 | 91 | def initpages_process_data(self): 92 | self.page_process_data = tk.Frame(self.root) 93 | self.center_frame_process_data = tk.Frame(self.page_process_data) 94 | self.center_frame_process_data.place(relx=0.5, rely=0.4, anchor=tk.CENTER) 95 | tk.Label( 96 | self.center_frame_process_data, 97 | text=f"将分析从 {self.s_date} 到 {self.e_date} 的聊天记录", 98 | font=("SimHei", 16), 99 | ).pack(pady=(0, 20)) 100 | tk.Button( 101 | self.center_frame_process_data, 102 | text="开始分析", 103 | command=self.Go, 104 | font=("SimHei", 16), 105 | ).pack() 106 | 107 | def initpages_process_heat(self): 108 | self.s.get_max_count_date() 109 | self.page_process_heat = tk.Frame(self.root) 110 | self.center_frame_process_heat = tk.Frame(self.page_process_heat) 111 | self.center_frame_process_heat.place(relx=0.5, rely=0.4, anchor=tk.CENTER) 112 | tk.Label( 113 | self.center_frame_process_heat, 114 | text="聊天热度分析\n现在开始分析聊天热度\n请选择分析谁的聊天记录?", 115 | font=("SimHei", 16), 116 | ).pack(pady=(0, 20)) 117 | tk.Label( 118 | self.center_frame_process_heat, 119 | text="-----------------------", 120 | font=("SimHei", 16), 121 | ).pack(pady=(0, 10)) 122 | tk.Button( 123 | self.center_frame_process_heat, 124 | text=self.name1, 125 | command=partial(self.process_heat, 1), 126 | font=("SimHei", 16), 127 | ).pack(pady=(0, 10)) 128 | tk.Label( 129 | self.center_frame_process_heat, 130 | text="-----------------------", 131 | font=("SimHei", 16), 132 | ).pack(pady=(0, 10)) 133 | tk.Button( 134 | self.center_frame_process_heat, 135 | text=self.name2, 136 | command=partial(self.process_heat, 2), 137 | font=("SimHei", 16), 138 | ).pack(pady=(0, 10)) 139 | tk.Label( 140 | self.center_frame_process_heat, 141 | text="-----------------------", 142 | font=("SimHei", 16), 143 | ).pack(pady=(0, 10)) 144 | tk.Button( 145 | self.center_frame_process_heat, 146 | text=self.name1 + "和" + self.name2, 147 | command=partial(self.process_heat, 3), 148 | font=("SimHei", 16), 149 | ).pack(pady=(0, 10)) 150 | tk.Label( 151 | self.center_frame_process_heat, 152 | text="-----------------------", 153 | font=("SimHei", 16), 154 | ).pack(pady=(0, 10)) 155 | tk.Button( 156 | self.center_frame_process_heat, 157 | text="我全都要!(推荐)", 158 | command=partial(self.process_heat, 4), 159 | font=("SimHei", 16), 160 | ).pack() 161 | tk.Label( 162 | self.center_frame_process_heat, 163 | text="-----------------------", 164 | font=("SimHei", 16), 165 | ).pack(pady=(0, 10)) 166 | tk.Button( 167 | self.center_frame_process_heat, 168 | text="跳过", 169 | command=partial(self.process_heat, 5), 170 | font=("SimHei", 16), 171 | ).pack() 172 | 173 | def initpages_process_time(self): 174 | self.s.get_max_count_time() 175 | self.page_process_time = tk.Frame(self.root) 176 | self.center_frame_process_time = tk.Frame(self.page_process_time) 177 | self.center_frame_process_time.place(relx=0.5, rely=0.4, anchor=tk.CENTER) 178 | tk.Label( 179 | self.center_frame_process_time, 180 | text="聊天时间分析\n现在开始分析聊天时间分布\n请选择分析谁的聊天记录?", 181 | font=("SimHei", 16), 182 | ).pack(pady=(0, 20)) 183 | tk.Label( 184 | self.center_frame_process_time, 185 | text="-----------------------", 186 | font=("SimHei", 16), 187 | ).pack(pady=(0, 10)) 188 | tk.Button( 189 | self.center_frame_process_time, 190 | text=self.name1, 191 | command=partial(self.process_time, 1), 192 | font=("SimHei", 16), 193 | ).pack(pady=(0, 10)) 194 | tk.Label( 195 | self.center_frame_process_time, 196 | text="-----------------------", 197 | font=("SimHei", 16), 198 | ).pack(pady=(0, 10)) 199 | tk.Button( 200 | self.center_frame_process_time, 201 | text=self.name2, 202 | command=partial(self.process_time, 2), 203 | font=("SimHei", 16), 204 | ).pack(pady=(0, 10)) 205 | tk.Label( 206 | self.center_frame_process_time, 207 | text="-----------------------", 208 | font=("SimHei", 16), 209 | ).pack(pady=(0, 10)) 210 | tk.Button( 211 | self.center_frame_process_time, 212 | text=self.name1 + "和" + self.name2, 213 | command=partial(self.process_time, 3), 214 | font=("SimHei", 16), 215 | ).pack(pady=(0, 10)) 216 | tk.Label( 217 | self.center_frame_process_time, 218 | text="-----------------------", 219 | font=("SimHei", 16), 220 | ).pack(pady=(0, 10)) 221 | tk.Button( 222 | self.center_frame_process_time, 223 | text="我全都要!(推荐)", 224 | command=partial(self.process_time, 4), 225 | font=("SimHei", 16), 226 | ).pack() 227 | tk.Label( 228 | self.center_frame_process_time, 229 | text="-----------------------", 230 | font=("SimHei", 16), 231 | ).pack(pady=(0, 10)) 232 | tk.Button( 233 | self.center_frame_process_time, 234 | text="跳过", 235 | command=partial(self.process_time, 5), 236 | font=("SimHei", 16), 237 | ).pack() 238 | 239 | def initpages_process_biaoqingbao(self): 240 | self.page_process_bqb = tk.Frame(self.root) 241 | self.center_frame_process_bqb = tk.Frame(self.page_process_bqb) 242 | self.center_frame_process_bqb.place(relx=0.5, rely=0.4, anchor=tk.CENTER) 243 | tk.Label( 244 | self.center_frame_process_bqb, 245 | text="现在开始分析表情包", 246 | font=("SimHei", 16), 247 | ).pack(pady=(0, 10)) 248 | tk.Button( 249 | self.center_frame_process_bqb, 250 | text="开始分析表情包", 251 | command=partial(self.process_bqb, 1), 252 | font=("SimHei", 16), 253 | ).pack() 254 | tk.Label( 255 | self.center_frame_process_bqb, 256 | text="-----------------------", 257 | font=("SimHei", 16), 258 | ).pack(pady=(0, 10)) 259 | tk.Button( 260 | self.center_frame_process_bqb, 261 | text="跳过", 262 | command=partial(self.process_bqb, 0), 263 | font=("SimHei", 16), 264 | ).pack() 265 | 266 | def initpages_process_emoji(self): 267 | self.page_process_emoji = tk.Frame(self.root) 268 | self.center_frame_process_emoji = tk.Frame(self.page_process_emoji) 269 | self.center_frame_process_emoji.place(relx=0.5, rely=0.4, anchor=tk.CENTER) 270 | tk.Label( 271 | self.center_frame_process_emoji, 272 | text="现在开始分析emoji", 273 | font=("SimHei", 16), 274 | ).pack(pady=(0, 10)) 275 | tk.Button( 276 | self.center_frame_process_emoji, 277 | text="开始分析emoji", 278 | command=partial(self.process_emoji, 1), 279 | font=("SimHei", 16), 280 | ).pack() 281 | tk.Label( 282 | self.center_frame_process_emoji, 283 | text="-----------------------", 284 | font=("SimHei", 16), 285 | ).pack(pady=(0, 10)) 286 | tk.Button( 287 | self.center_frame_process_emoji, 288 | text="跳过", 289 | command=partial(self.process_emoji, 0), 290 | font=("SimHei", 16), 291 | ).pack() 292 | 293 | def initpages_process_word_all(self): 294 | self.page_process_word_all = tk.Frame(self.root) 295 | self.center_frame_process_word_all = tk.Frame(self.page_process_word_all) 296 | self.center_frame_process_word_all.place(relx=0.5, rely=0.4, anchor=tk.CENTER) 297 | tk.Label( 298 | self.center_frame_process_word_all, 299 | text="现在开始分析词频", 300 | font=("SimHei", 16), 301 | ).pack(pady=(0, 10)) 302 | tk.Button( 303 | self.center_frame_process_word_all, 304 | text="自定义词云形状", 305 | command=partial(self.process_word_all, 1), 306 | font=("SimHei", 16), 307 | ).pack() 308 | tk.Label( 309 | self.center_frame_process_word_all, 310 | text="-----------------------", 311 | font=("SimHei", 16), 312 | ).pack(pady=(0, 10)) 313 | tk.Button( 314 | self.center_frame_process_word_all, 315 | text="使用默认词云形状", 316 | command=partial(self.process_word_all, 2), 317 | font=("SimHei", 16), 318 | ).pack() 319 | tk.Label( 320 | self.center_frame_process_word_all, 321 | text="-----------------------", 322 | font=("SimHei", 16), 323 | ).pack(pady=(0, 10)) 324 | tk.Button( 325 | self.center_frame_process_word_all, 326 | text="跳过", 327 | command=partial(self.process_word_all, 0), 328 | font=("SimHei", 16), 329 | ).pack() 330 | 331 | def initpages_process_word_1(self): 332 | self.page_process_word_1 = tk.Frame(self.root) 333 | self.center_frame_process_word_1 = tk.Frame(self.page_process_word_1) 334 | self.center_frame_process_word_1.place(relx=0.5, rely=0.4, anchor=tk.CENTER) 335 | tk.Label( 336 | self.center_frame_process_word_1, 337 | text="进入下面的网站,点击“查看图标”,找到喜欢的形状,点击右上角的复制,然后粘贴到下面的输入栏即可\n如果还是不明白,就去查看使用教程中的词云形状自定义教程", 338 | font=("SimHei", 16), 339 | ).pack(pady=(0, 10)) 340 | link_label = tk.Label( 341 | self.center_frame_process_word_1, 342 | text="点击打开网站,选取词云形状", 343 | fg="blue", 344 | cursor="hand2", 345 | font=("SimHei", 16), 346 | ) 347 | link_label.pack() 348 | link_label.bind( 349 | "", lambda e: self.open_link("https://fa5.dashgame.com/#/") 350 | ) 351 | tk.Label( 352 | self.center_frame_process_word_1, 353 | text="请输入三个形状编码", 354 | font=("SimHei", 16), 355 | ).pack(pady=(0, 10)) 356 | tk.Label( 357 | self.center_frame_process_word_1, 358 | text=f"{self.name1}的词云形状:", 359 | font=("SimHei", 16), 360 | ).pack() 361 | shape_entry1 = tk.Entry(self.center_frame_process_word_1) 362 | shape_entry1.pack(pady=5) 363 | tk.Label( 364 | self.center_frame_process_word_1, 365 | text=f"{self.name2}的词云形状:", 366 | font=("SimHei", 16), 367 | ).pack() 368 | shape_entry2 = tk.Entry(self.center_frame_process_word_1) 369 | shape_entry2.pack(pady=5) 370 | tk.Label( 371 | self.center_frame_process_word_1, 372 | text="全部聊天记录的词云形状:", 373 | font=("SimHei", 16), 374 | ).pack() 375 | shape_entry3 = tk.Entry(self.center_frame_process_word_1) 376 | shape_entry3.pack(pady=5) 377 | tk.Button( 378 | self.center_frame_process_word_1, 379 | text="确认", 380 | command=partial(self.save_shape, shape_entry1, shape_entry2, shape_entry3), 381 | font=("SimHei", 16), 382 | ).pack(pady=10) 383 | tk.Label( 384 | self.center_frame_process_word_1, 385 | text="请注意一定要输入正确的词云代码,例如:far fa-lemon,如果输入错误会导致程序出现未知错误", 386 | font=("SimHei", 16), 387 | ).pack() 388 | 389 | def initpages_process_word_2(self): 390 | self.s.change_shape(self.shape1, self.shape2, self.shape3) 391 | self.page_process_word_2 = tk.Frame(self.root) 392 | self.center_frame_process_word_2 = tk.Frame(self.page_process_word_2) 393 | self.center_frame_process_word_2.place(relx=0.5, rely=0.4, anchor=tk.CENTER) 394 | tk.Label( 395 | self.center_frame_process_word_2, 396 | text="词频分析\n现在开始分析词频并生成词云\n请选择分析谁的聊天记录?", 397 | font=("SimHei", 16), 398 | ).pack(pady=(0, 20)) 399 | tk.Label( 400 | self.center_frame_process_word_2, 401 | text="-----------------------", 402 | font=("SimHei", 16), 403 | ).pack(pady=(0, 10)) 404 | tk.Button( 405 | self.center_frame_process_word_2, 406 | text=self.name1, 407 | command=partial(self.process_word, 1), 408 | font=("SimHei", 16), 409 | ).pack(pady=(0, 10)) 410 | tk.Label( 411 | self.center_frame_process_word_2, 412 | text="-----------------------", 413 | font=("SimHei", 16), 414 | ).pack(pady=(0, 10)) 415 | tk.Button( 416 | self.center_frame_process_word_2, 417 | text=self.name2, 418 | command=partial(self.process_word, 2), 419 | font=("SimHei", 16), 420 | ).pack(pady=(0, 10)) 421 | tk.Label( 422 | self.center_frame_process_word_2, 423 | text="-----------------------", 424 | font=("SimHei", 16), 425 | ).pack(pady=(0, 10)) 426 | tk.Button( 427 | self.center_frame_process_word_2, 428 | text=self.name1 + "和" + self.name2, 429 | command=partial(self.process_word, 3), 430 | font=("SimHei", 16), 431 | ).pack(pady=(0, 10)) 432 | tk.Label( 433 | self.center_frame_process_word_2, 434 | text="-----------------------", 435 | font=("SimHei", 16), 436 | ).pack(pady=(0, 10)) 437 | tk.Button( 438 | self.center_frame_process_word_2, 439 | text="我全都要!(推荐)", 440 | command=partial(self.process_word, 4), 441 | font=("SimHei", 16), 442 | ).pack() 443 | 444 | def initpages_process_emo_all(self): 445 | self.page_process_emo_all = tk.Frame(self.root) 446 | self.center_frame_process_emo_all = tk.Frame(self.page_process_emo_all) 447 | self.center_frame_process_emo_all.place(relx=0.5, rely=0.4, anchor=tk.CENTER) 448 | tk.Label( 449 | self.center_frame_process_emo_all, 450 | text="你是否想分析每一句聊天记录的情感倾向来判断谁在聊天中带来了更多的正能量,谁又经常在聊天中诉苦?!\n请注意,此部分需要您自行获得百度智能云中自然语言分析中的情感倾向分析API,这部分请自行百度。\n新用户有50万次免费使用,但是QPS只有2,也就是一秒只能分析两句话,速度会很慢。\n请注意本部分需要联网", 451 | font=("SimHei", 16), 452 | ).pack(pady=(0, 10)) 453 | tk.Button( 454 | self.center_frame_process_emo_all, 455 | text="想!", 456 | command=partial(self.process_emo, 1), 457 | font=("SimHei", 16), 458 | ).pack() 459 | tk.Label( 460 | self.center_frame_process_emo_all, 461 | text="-----------------------", 462 | font=("SimHei", 16), 463 | ).pack(pady=(0, 10)) 464 | tk.Button( 465 | self.center_frame_process_emo_all, 466 | text="不想", 467 | command=partial(self.process_emo, 0), 468 | font=("SimHei", 16), 469 | ).pack() 470 | tk.Label( 471 | self.center_frame_process_emo_all, 472 | text="请注意:如果你要进行情感分析,请一定确保自己的api次数足够,或者自己的账户余额足够,否则当次数用完会前功尽弃", 473 | font=("SimHei", 16), 474 | ).pack(pady=(0, 10)) 475 | 476 | def initpages_process_emo_1(self): 477 | self.page_process_emo_1 = tk.Frame(self.root) 478 | self.center_frame_process_emo_1 = tk.Frame(self.page_process_emo_1) 479 | self.center_frame_process_emo_1.place(relx=0.5, rely=0.4, anchor=tk.CENTER) 480 | tk.Label( 481 | self.center_frame_process_emo_1, 482 | text="请输入您API的QPS", 483 | font=("SimHei", 16), 484 | ).pack(pady=(0, 10)) 485 | qps_entry = tk.Entry(self.center_frame_process_emo_1) 486 | qps_entry.pack(pady=5) 487 | tk.Label( 488 | self.center_frame_process_emo_1, 489 | text="请输入App_ID", 490 | font=("SimHei", 16), 491 | ).pack(pady=(0, 10)) 492 | api_entry1 = tk.Entry(self.center_frame_process_emo_1) 493 | api_entry1.pack(pady=5) 494 | tk.Label( 495 | self.center_frame_process_emo_1, 496 | text="请输入API_KEY", 497 | font=("SimHei", 16), 498 | ).pack() 499 | api_entry2 = tk.Entry(self.center_frame_process_emo_1) 500 | api_entry2.pack(pady=5) 501 | tk.Label( 502 | self.center_frame_process_emo_1, 503 | text="请输入SECRET_KEY", 504 | font=("SimHei", 16), 505 | ).pack() 506 | api_entry3 = tk.Entry(self.center_frame_process_emo_1) 507 | api_entry3.pack(pady=5) 508 | tk.Button( 509 | self.center_frame_process_emo_1, 510 | text="确定", 511 | command=partial( 512 | self.save_api, qps_entry, api_entry1, api_entry2, api_entry3 513 | ), 514 | font=("SimHei", 16), 515 | ).pack(pady=10) 516 | tk.Label( 517 | self.center_frame_process_emo_1, 518 | text="请注意一定要输入正确的api,如果输入错误会导致程序出现未知错误", 519 | font=("SimHei", 16), 520 | ).pack() 521 | 522 | def initpages_process_emo_2(self): 523 | self.page_process_emo_2 = tk.Frame(self.root) 524 | self.center_frame_process_emo_2 = tk.Frame(self.page_process_emo_2) 525 | self.center_frame_process_emo_2.place(relx=0.5, rely=0.4, anchor=tk.CENTER) 526 | tk.Label( 527 | self.center_frame_process_emo_2, 528 | text="情感分析\n现在开始分析情感\n请选择分析谁的聊天记录?", 529 | font=("SimHei", 16), 530 | ).pack(pady=(0, 20)) 531 | tk.Label( 532 | self.center_frame_process_emo_2, 533 | text="-----------------------", 534 | font=("SimHei", 16), 535 | ).pack(pady=(0, 10)) 536 | tk.Button( 537 | self.center_frame_process_emo_2, 538 | text=self.name1, 539 | command=partial(self.process_emo_do, 1), 540 | font=("SimHei", 16), 541 | ).pack(pady=(0, 10)) 542 | tk.Label( 543 | self.center_frame_process_emo_2, 544 | text="-----------------------", 545 | font=("SimHei", 16), 546 | ).pack(pady=(0, 10)) 547 | tk.Button( 548 | self.center_frame_process_emo_2, 549 | text=self.name2, 550 | command=partial(self.process_emo_do, 2), 551 | font=("SimHei", 16), 552 | ).pack(pady=(0, 10)) 553 | tk.Label( 554 | self.center_frame_process_emo_2, 555 | text="-----------------------", 556 | font=("SimHei", 16), 557 | ).pack(pady=(0, 10)) 558 | tk.Button( 559 | self.center_frame_process_emo_2, 560 | text=self.name1 + "和" + self.name2, 561 | command=partial(self.process_emo_do, 3), 562 | font=("SimHei", 16), 563 | ).pack(pady=(0, 10)) 564 | tk.Label( 565 | self.center_frame_process_emo_2, 566 | text="-----------------------", 567 | font=("SimHei", 16), 568 | ).pack(pady=(0, 10)) 569 | tk.Button( 570 | self.center_frame_process_emo_2, 571 | text="我全都要!(推荐)", 572 | command=partial(self.process_emo_do, 4), 573 | font=("SimHei", 16), 574 | ).pack() 575 | 576 | def initpages_process_end(self): 577 | self.page_process_end = tk.Frame(self.root) 578 | self.center_frame_process_end = tk.Frame(self.page_process_end) 579 | self.center_frame_process_end.place(relx=0.5, rely=0.4, anchor=tk.CENTER) 580 | tk.Label( 581 | self.center_frame_process_end, 582 | text="恭喜!分析完成!可以退出啦!\n本软件由橙子先生一人独立开发,免费分享给大家使用!\n如果大家想的话,可以给橙子先生或者柠檬女士点个关注,这会让我们有成就感\n并且我预计会在未来不定期分享一些自己写的好玩的程序,谢谢大家。", 583 | font=("SimHei", 16), 584 | ).pack() 585 | link_label_1 = tk.Label( 586 | self.center_frame_process_end, 587 | text="点击打开橙子先生小红书主页", 588 | fg="blue", 589 | cursor="hand2", 590 | font=("SimHei", 16), 591 | ) 592 | link_label_1.pack() 593 | link_label_1.bind( 594 | "", 595 | lambda e: self.open_link( 596 | "https://www.xiaohongshu.com/user/profile/60f2fd48000000000100aaff" 597 | ), 598 | ) 599 | link_label_2 = tk.Label( 600 | self.center_frame_process_end, 601 | text="点击打开橙子先生bilibili主页", 602 | fg="blue", 603 | cursor="hand2", 604 | font=("SimHei", 16), 605 | ) 606 | link_label_2.pack() 607 | link_label_2.bind( 608 | "", 609 | lambda e: self.open_link( 610 | "https://space.bilibili.com/316695110?spm_id_from=333.999.0.0" 611 | ), 612 | ) 613 | link_label_3 = tk.Label( 614 | self.center_frame_process_end, 615 | text="点击打开柠檬女士小红书主页", 616 | fg="blue", 617 | cursor="hand2", 618 | font=("SimHei", 16), 619 | ) 620 | link_label_3.pack() 621 | link_label_3.bind( 622 | "", 623 | lambda e: self.open_link( 624 | "https://www.xiaohongshu.com/user/profile/5a79c3c64eacab6e6f5400c1?channelType=web_engagement_notification_page&channelTabId=mentions" 625 | ), 626 | ) 627 | tk.Label( 628 | self.center_frame_process_end, 629 | text="PS:结果都保存在 用户数据/data 文件夹中,无论是导出的表格还是图片都在里面\n图片在 用户数据/data/src 文件夹里\n祝大家生活愉快!", 630 | font=("SimHei", 14), 631 | ).pack() 632 | 633 | def show_page_choice_path(self): 634 | self.init_pages_choice_path() 635 | self.page_start.pack_forget() 636 | self.page_choice_path.pack(fill="both", expand=True) 637 | 638 | def show_page_load_name(self): 639 | self.init_pages_load_name() 640 | self.page_choice_path.pack_forget() 641 | self.page_load_name.pack(fill="both", expand=True) 642 | 643 | def show_page_process_data(self): 644 | self.initpages_process_data() 645 | self.page_load_name.pack_forget() 646 | self.page_process_data.pack(fill="both", expand=True) 647 | 648 | def show_page_process_heat(self): 649 | self.initpages_process_heat() 650 | self.page_process_data.pack_forget() 651 | self.page_process_heat.pack(fill="both", expand=True) 652 | 653 | def show_page_process_time(self): 654 | self.initpages_process_time() 655 | self.page_process_heat.pack_forget() 656 | self.page_process_time.pack(fill="both", expand=True) 657 | 658 | def show_page_process_bqb(self): 659 | self.initpages_process_biaoqingbao() 660 | self.page_process_time.pack_forget() 661 | self.page_process_bqb.pack(fill="both", expand=True) 662 | 663 | def show_page_process_emoji(self): 664 | self.initpages_process_emoji() 665 | self.page_process_bqb.pack_forget() 666 | self.page_process_emoji.pack(fill="both", expand=True) 667 | 668 | def show_page_process_word_all(self): 669 | self.initpages_process_word_all() 670 | self.page_process_emoji.pack_forget() 671 | self.page_process_word_all.pack(fill="both", expand=True) 672 | 673 | def show_page_process_word_1(self): 674 | self.initpages_process_word_1() 675 | self.page_process_word_all.pack_forget() 676 | self.page_process_word_1.pack(fill="both", expand=True) 677 | 678 | def show_page_process_word_2_from1(self): 679 | self.initpages_process_word_2() 680 | self.page_process_word_1.pack_forget() 681 | self.page_process_word_2.pack(fill="both", expand=True) 682 | 683 | def show_page_process_word_2_fromall(self): 684 | self.initpages_process_word_2() 685 | self.page_process_word_all.pack_forget() 686 | self.page_process_word_2.pack(fill="both", expand=True) 687 | 688 | def show_page_process_emo_all(self): 689 | self.initpages_process_emo_all() 690 | self.page_process_word_2.pack_forget() 691 | self.page_process_emo_all.pack(fill="both", expand=True) 692 | 693 | def show_page_process_emo_all_from_all(self): 694 | self.initpages_process_emo_all() 695 | self.page_process_word_all.pack_forget() 696 | self.page_process_emo_all.pack(fill="both", expand=True) 697 | 698 | def show_page_process_emo_1(self): 699 | self.initpages_process_emo_1() 700 | self.page_process_emo_all.pack_forget() 701 | self.page_process_emo_1.pack(fill="both", expand=True) 702 | 703 | def show_page_process_emo_2(self): 704 | self.initpages_process_emo_2() 705 | self.page_process_emo_1.pack_forget() 706 | self.page_process_emo_2.pack(fill="both", expand=True) 707 | 708 | def show_page_end_emo_all(self): 709 | self.initpages_process_end() 710 | self.page_process_emo_all.pack_forget() 711 | self.page_process_end.pack(fill="both", expand=True) 712 | 713 | def show_page_end_emo_2(self): 714 | self.initpages_process_end() 715 | self.page_process_emo_2.pack_forget() 716 | self.page_process_end.pack(fill="both", expand=True) 717 | 718 | def open_link(self, url): 719 | open_new_tab(url) 720 | 721 | def save_name(self, name_entry1, name_entry2): 722 | self.name1 = name_entry1.get() 723 | self.name2 = name_entry2.get() 724 | self.process_data() 725 | self.show_page_process_data() 726 | 727 | def save_shape(self, shape_entry1, shape_entry2, shape_entry3): 728 | self.shape1 = shape_entry1.get() 729 | self.shape2 = shape_entry2.get() 730 | self.shape3 = shape_entry3.get() 731 | self.show_page_process_word_2_from1() 732 | 733 | def save_api(self, qps_entry, api_entry1, api_entry2, api_entry3): 734 | self.api1 = api_entry1.get() 735 | self.api2 = api_entry2.get() 736 | self.api3 = api_entry3.get() 737 | self.QPS = qps_entry.get() 738 | self.s.get_api(self.QPS, self.api1, self.api2, self.api3) 739 | self.show_page_process_emo_2() 740 | 741 | def load_date(self): 742 | while True: 743 | file_path = filedialog.askopenfilename( 744 | parent=self.page_choice_path, filetypes=[("CSV files", "*.csv")] 745 | ) 746 | if file_path: 747 | if file_path.endswith(".csv"): 748 | self.df = r.read_msg(file_path) 749 | p_l = tk.Label( 750 | self.root, 751 | text=f"聊天记录文件为:{file_path}", 752 | bg="yellow", 753 | font=("SimHei", 12), 754 | width=500, 755 | height=1, 756 | ) 757 | p_l.pack(side=tk.BOTTOM) 758 | break 759 | else: 760 | messagebox.showerror("错误", "请选择一个CSV文件", parent=self.root) # 显示错误消息 761 | else: 762 | break 763 | self.show_page_load_name() 764 | 765 | def process_data(self): 766 | self.j_df, self.n_df, self.all_df = dp.process_data(self.df) 767 | self.s_date = self.all_df.iloc[0]["time"] 768 | self.e_date = self.all_df.iloc[-1]["time"] 769 | 770 | def Go(self): 771 | self.s = solve.solve(self.j_df, self.n_df, self.all_df, self.name1, self.name2) 772 | self.show_page_process_heat() 773 | 774 | def process_heat(self, choice): 775 | if choice == 1: 776 | self.s.process_heat(self.name1) 777 | messagebox.showinfo("分析结果", "分析完毕,结果保存在 ./用户数据/data/src/热力图 中,请自行查看!") 778 | elif choice == 2: 779 | self.s.process_heat(self.name2) 780 | messagebox.showinfo("分析结果", "分析完毕,结果保存在 ./用户数据/data/src/热力图 中,请自行查看!") 781 | elif choice == 3: 782 | self.s.process_heat(self.name1 + self.name2) 783 | messagebox.showinfo("分析结果", "分析完毕,结果保存在 ./用户数据/data/src/热力图 中,请自行查看!") 784 | elif choice == 4: 785 | self.s.process_heat(self.name1) 786 | self.s.process_heat(self.name2) 787 | self.s.process_heat(self.name1 + self.name2) 788 | messagebox.showinfo("分析结果", "分析完毕,结果保存在 ./用户数据/data/src/热力图 中,请自行查看!") 789 | self.show_page_process_time() 790 | 791 | 792 | def process_time(self, choice): 793 | if choice == 1: 794 | self.s.process_time(self.name1) 795 | messagebox.showinfo("分析结果", "分析完毕,结果保存在 ./用户数据/data/src/time 中,请自行查看!") 796 | elif choice == 2: 797 | self.s.process_time(self.name2) 798 | messagebox.showinfo("分析结果", "分析完毕,结果保存在 ./用户数据/data/src/time 中,请自行查看!") 799 | elif choice == 3: 800 | self.s.process_time(self.name1 + self.name2) 801 | messagebox.showinfo("分析结果", "分析完毕,结果保存在 ./用户数据/data/src/time 中,请自行查看!") 802 | elif choice == 4: 803 | self.s.process_time(self.name1) 804 | self.s.process_time(self.name2) 805 | self.s.process_time(self.name1 + self.name2) 806 | messagebox.showinfo("分析结果", "分析完毕,结果保存在 ./用户数据/data/src/time 中,请自行查看!") 807 | self.show_page_process_bqb() 808 | 809 | def process_emoji(self, choice): 810 | if choice == 1: 811 | self.s.process_emoji() 812 | messagebox.showinfo("分析结果", "分析完毕,结果保存在 ./用户数据/data/src/emoji 中,请自行查看!") 813 | self.show_page_process_word_all() 814 | 815 | def process_bqb(self, choice): 816 | if choice == 1: 817 | self.s.process_biaoqingbao() 818 | messagebox.showinfo("分析结果", "分析完毕,结果保存在 ./用户数据/data/src/表情包 中,请自行查看!") 819 | self.show_page_process_emoji() 820 | 821 | def process_word_all(self, choice): 822 | if choice == 1: 823 | self.show_page_process_word_1() 824 | elif choice == 2: 825 | self.show_page_process_word_2_fromall() 826 | else: 827 | self.show_page_process_emo_all_from_all() 828 | 829 | def process_word(self, choice): 830 | if choice == 1: 831 | self.s.process_words(self.name1) 832 | messagebox.showinfo("分析结果", "分析完毕,结果保存在 ./用户数据/data/src/word 中,请自行查看!") 833 | elif choice == 2: 834 | self.s.process_words(self.name2) 835 | messagebox.showinfo("分析结果", "分析完毕,结果保存在 ./用户数据/data/src/word 中,请自行查看!") 836 | elif choice == 3: 837 | self.s.process_words(self.name1 + self.name2) 838 | messagebox.showinfo("分析结果", "分析完毕,结果保存在 ./用户数据/data/src/word 中,请自行查看!") 839 | elif choice == 4: 840 | self.s.process_words(self.name1) 841 | self.s.process_words(self.name2) 842 | self.s.process_words(self.name1 + self.name2) 843 | messagebox.showinfo("分析结果", "分析完毕,结果保存在 ./用户数据/data/src/word 中,请自行查看!") 844 | self.show_page_process_emo_all() 845 | 846 | def process_emo(self, choice): 847 | if choice == 1: 848 | self.show_page_process_emo_1() 849 | else: 850 | self.show_page_end_emo_all() 851 | 852 | def process_emo_do(self, choice): 853 | if choice == 1: 854 | self.s.process_emo(self.name1) 855 | messagebox.showinfo("分析结果", "分析完毕,结果保存在 ./用户数据/data/src/emo 中,请自行查看!") 856 | elif choice == 2: 857 | self.s.process_emo(self.name2) 858 | messagebox.showinfo("分析结果", "分析完毕,结果保存在 ./用户数据/data/src/emo 中,请自行查看!") 859 | elif choice == 3: 860 | self.s.process_emo(self.name1 + self.name2) 861 | messagebox.showinfo("分析结果", "分析完毕,结果保存在 ./用户数据/data/src/emo 中,请自行查看!") 862 | elif choice == 4: 863 | self.s.process_emo(self.name1) 864 | self.s.process_emo(self.name2) 865 | self.s.process_emo(self.name1 + self.name2) 866 | messagebox.showinfo("分析结果", "分析完毕,结果保存在 ./用户数据/data/src/emo 中,请自行查看!") 867 | self.show_page_end_emo_2() 868 | -------------------------------------------------------------------------------- /solve.py: -------------------------------------------------------------------------------- 1 | # 最后编辑: 2 | # 晋晨曦 2024.2.2 17.13 3 | # qq:2950171570 4 | # email:Jin0714@outlook.com 回复随缘 5 | from jieba import cut 6 | from re import findall 7 | from re import sub 8 | from re import search 9 | from collections import defaultdict 10 | import pandas as pd 11 | from aip import AipNlp 12 | from time import sleep 13 | 14 | import draw 15 | import save 16 | import calenda as ca 17 | 18 | 19 | class solve: 20 | def __init__(self, j_df, n_df, all_df, name1, name2): 21 | """ 22 | 构造函数 23 | :param j_df: name1聊天记录 24 | :param n_df: name2聊天记录 25 | :param all_df: 全部聊天记录 26 | :param name1: 主分析人 27 | :param name2: 聊天对象 28 | """ 29 | self.emoji_j = defaultdict(int) 30 | self.emoji_l = defaultdict(int) 31 | self.d = draw.draw_data(name1, name2) 32 | self.sa = save.save_data() 33 | self.j_df = j_df 34 | self.n_df = n_df 35 | self.all_df = all_df 36 | self.j_df_clean = pd.DataFrame() 37 | self.n_df_clean = pd.DataFrame() 38 | self.all_df_clean = pd.DataFrame() 39 | self.words = pd.DataFrame() 40 | self.client = AipNlp("None", "None", "None") 41 | self.clean_data() 42 | self.name1 = name1 43 | self.name2 = name2 44 | self.shape1 = "fas fa-dog" 45 | self.shape2 = "fas fa-dog" 46 | self.shape3 = "fas fa-paw" 47 | self.s_year, self.s_month, self.e_year, self.e_month = self.find_time( 48 | self.all_df 49 | ) 50 | self.months = self.find_month() 51 | self.months_size = len(self.months) 52 | pass 53 | 54 | def __str__(self): 55 | """ 56 | 字符串表示,用于打印对象时提供有用的信息。 57 | """ 58 | return "solve类实例,用于分析数据" 59 | pass 60 | 61 | def not_start_with_msg_words(self, value): 62 | """ 63 | 判断是否不以<开头 64 | :param value: 文本 65 | :return: 不以<开头为true,否则flase 66 | """ 67 | return not value.startswith("<") 68 | 69 | def remove_bracketed_text_and_count_words(self, s): 70 | """ 71 | 删除所有聊天记录[]中文字 72 | :param s:语句 73 | :return:删除后的语句 74 | """ 75 | return sub(r"\[.*?\]", "", s) 76 | 77 | def clean_data(self): 78 | """ 79 | 清洗数据 80 | :return:无 81 | """ 82 | self.j_df_clean = self.j_df[ 83 | self.j_df["data"].apply(self.not_start_with_msg_words) 84 | ].copy() 85 | self.j_df_clean.loc[:, "data"] = self.j_df_clean["data"].apply( 86 | self.remove_bracketed_text_and_count_words 87 | ) 88 | self.j_df_clean = self.j_df_clean[self.j_df_clean["data"].apply(len) > 0] 89 | 90 | self.n_df_clean = self.n_df[ 91 | self.n_df["data"].apply(self.not_start_with_msg_words) 92 | ].copy() 93 | self.n_df_clean.loc[:, "data"] = self.n_df_clean["data"].apply( 94 | self.remove_bracketed_text_and_count_words 95 | ) 96 | self.n_df_clean = self.n_df_clean[self.n_df_clean["data"].apply(len) > 0] 97 | 98 | self.all_df_clean = self.all_df.copy() 99 | 100 | def find_time(self, df): 101 | """ 102 | 找出时间范围的年月 103 | :param df: 数据 104 | :return: 年月 105 | """ 106 | earliest_time = df.iloc[0]["time"] 107 | latest_time = df.iloc[-1]["time"] 108 | earliest_year, earliest_month, _ = earliest_time.split("-") 109 | latest_year, latest_month, _ = latest_time.split("-") 110 | return ( 111 | int(earliest_year), 112 | int(earliest_month), 113 | int(latest_year), 114 | int(latest_month), 115 | ) 116 | 117 | def find_month(self): 118 | """ 119 | 找到分析的月份列表 120 | :return: 月份列表 121 | """ 122 | months = [] 123 | 124 | # 当前的年份和月份 125 | current_year, current_month = self.s_year, self.s_month 126 | 127 | # 循环直到当前年月等于结束年月 128 | while (current_year, current_month) <= (self.e_year, self.e_month): 129 | # 将当前年月添加到列表中 130 | months.append((current_year, current_month)) 131 | 132 | # 如果当前月份是12月,则进入下一年的1月 133 | if current_month == 12: 134 | current_year += 1 135 | current_month = 1 136 | else: 137 | # 否则,月份增加1 138 | current_month += 1 139 | 140 | return months 141 | 142 | # 表情包分析 143 | 144 | def not_start_with_msg(self, value): 145 | """ 146 | 判断是否不以<开头 147 | :param value: 文本 148 | :return: 不以<开头为true,否则flase 149 | """ 150 | return not value.startswith("<") 151 | 152 | def start_with_msg(self, value): 153 | """ 154 | 判断是否以<开头 155 | :param value: 文本 156 | :return: 不以<开头为true,否则flase 157 | """ 158 | return value.startswith("<") 159 | 160 | def extract_androidmd5(self, text): 161 | """ 162 | 提取图片文件的androidmd5码用于分析图片种类 163 | :param text:图片文件 164 | :return:提取结果 165 | """ 166 | match = search(r'androidmd5="([^"]*)"', text) 167 | return match.group(1) if match else None 168 | 169 | def process_biaoqingbao(self): 170 | """ 171 | 分析表情包 172 | :return: 处理后的数据和结果 173 | """ 174 | # 分离表情包 175 | j_df_bqb = self.j_df[self.j_df["data"].apply(self.start_with_msg)] 176 | n_df_bqb = self.n_df[self.n_df["data"].apply(self.start_with_msg)] 177 | self.j_df = self.j_df[self.j_df["data"].apply(self.not_start_with_msg)] 178 | self.n_df = self.n_df[self.n_df["data"].apply(self.not_start_with_msg)] 179 | 180 | # 处理数据 181 | j_df_bqb = j_df_bqb.copy() 182 | j_df_bqb["data"] = j_df_bqb["data"].apply(self.extract_androidmd5) 183 | 184 | n_df_bqb = n_df_bqb.copy() 185 | n_df_bqb["data"] = n_df_bqb["data"].apply(self.extract_androidmd5) 186 | 187 | # 统计表情包 188 | value_counts = j_df_bqb["data"].value_counts() 189 | j_df_bqb = value_counts.reset_index() 190 | j_df_bqb.columns = ["data", "count"] 191 | value_counts = n_df_bqb["data"].value_counts() 192 | n_df_bqb = value_counts.reset_index() 193 | n_df_bqb.columns = ["data", "count"] 194 | 195 | j_df_bqb = j_df_bqb.sort_values(by="count", ascending=False) 196 | n_df_bqb = n_df_bqb.sort_values(by="count", ascending=False) 197 | 198 | save_data = [j_df_bqb, n_df_bqb] 199 | save_path = [ 200 | "./用户数据/data/bqb/" + self.name1 + "_bqb.xlsx", 201 | "./用户数据/data/bqb/" + self.name2 + "_bqb.xlsx", 202 | ] 203 | self.sa.save_data_all(save_data, save_path) 204 | 205 | max_count_j = max(j_df_bqb["count"]) 206 | max_count_n = max(n_df_bqb["count"]) 207 | max_count = max(max_count_j, max_count_n) 208 | 209 | self.d.draw_bqb(j_df_bqb, n_df_bqb, int(max_count + 5)) 210 | 211 | # emoji分析 212 | 213 | def remove_bracketed_text_and_count_j(self, s): 214 | """ 215 | 删除所有name1聊天记录[]中文字,统计emoji 216 | :param s:语句 217 | :return:删除后的语句 218 | """ 219 | # 使用正则表达式找到所有被 "[]" 包围的文字 220 | bracketed_texts = findall(r"\[(.*?)\]", s) 221 | 222 | # 更新统计字典 223 | for text in bracketed_texts: 224 | self.emoji_j[text] += 1 225 | 226 | # 删除被 "[]" 包围的文字 227 | return sub(r"\[.*?\]", "", s) 228 | 229 | def remove_bracketed_text_and_count_l(self, s): 230 | """ 231 | 删除所有name2聊天记录[]中文字,统计emoji 232 | :param s:语句 233 | :return:删除后的语句 234 | """ 235 | # 使用正则表达式找到所有被 "[]" 包围的文字 236 | bracketed_texts = findall(r"\[(.*?)\]", s) 237 | 238 | # 更新统计字典 239 | for text in bracketed_texts: 240 | self.emoji_l[text] += 1 241 | 242 | # 删除被 "[]" 包围的文字 243 | return sub(r"\[.*?\]", "", s) 244 | 245 | def sort_dicts(self, dict1, dict2): 246 | """ 247 | 将量字典归为并集,并排序 248 | :param dict1: 字典一 249 | :param dict2: 字典二 250 | :return:每个字典的全集并排序 251 | """ 252 | # 合并两个字典的键并去重 253 | all_keys = set(dict1.keys()) | set(dict2.keys()) 254 | # 对键进行排序 255 | sorted_keys = sorted(all_keys) 256 | # 创建两个新字典,按排序后的键存放键对 257 | sorted_dict1 = {key: dict1.get(key, None) for key in sorted_keys} 258 | sorted_dict2 = {key: dict2.get(key, None) for key in sorted_keys} 259 | 260 | return sorted_dict1, sorted_dict2 261 | 262 | def process_emoji(self): 263 | """ 264 | 统计两个人的emoji使用情况 265 | :return: 返回处理好的聊天记录和得到的分析数据 266 | """ 267 | # 统计和删除emoji 268 | self.j_df.loc[:, "data"] = self.j_df["data"].apply( 269 | self.remove_bracketed_text_and_count_j 270 | ) 271 | self.n_df.loc[:, "data"] = self.n_df["data"].apply( 272 | self.remove_bracketed_text_and_count_l 273 | ) 274 | 275 | # 清洗数据 276 | self.j_df = self.j_df[self.j_df["data"].apply(len) > 0] 277 | self.n_df = self.n_df[self.n_df["data"].apply(len) > 0] 278 | 279 | # 合并两个字典的键并去重 280 | all_keys = set(self.emoji_j.keys()).union(set(self.emoji_l.keys())) 281 | for key in all_keys: 282 | self.emoji_j.setdefault(key, 0) 283 | self.emoji_l.setdefault(key, 0) 284 | 285 | # 转化为df 286 | emoji_df_j = pd.DataFrame(list(self.emoji_j.items()), columns=["data", "count"]) 287 | emoji_df_l = pd.DataFrame(list(self.emoji_l.items()), columns=["data", "count"]) 288 | 289 | # 排序 290 | emoji_df_j = emoji_df_j.sort_values(by="count", ascending=False) 291 | emoji_df_l = emoji_df_l.sort_values(by="count", ascending=False) 292 | 293 | # 按照一个顺序排列 294 | emoji_j_sorted, emoji_l_sorted = self.sort_dicts(self.emoji_j, self.emoji_l) 295 | 296 | # 保存 297 | save_data = [emoji_df_j, emoji_df_l] 298 | save_path = [ 299 | "./用户数据/data/emoji/" + self.name1 + "_emoji.xlsx", 300 | "./用户数据/data/emoji/" + self.name2 + "_emoji.xlsx", 301 | ] 302 | self.sa.save_data_all(save_data, save_path) 303 | 304 | max_count = max(max(emoji_df_j["count"]), max(emoji_df_l["count"])) 305 | 306 | self.d.draw_emoji(emoji_j_sorted, emoji_l_sorted, int(max_count + 5)) 307 | 308 | # 词语分析 309 | 310 | def change_shape(self, shape1, shape2, shape3): 311 | self.shape1 = shape1 312 | self.shape2 = shape2 313 | self.shape3 = shape3 314 | 315 | def process_words(self, mode): 316 | """ 317 | 分析语句 318 | :param mode: 分析模式 319 | :return: 无 320 | """ 321 | 322 | if mode == self.name1 + self.name2: 323 | data_words = self.all_df_clean["data"].copy() 324 | shape = self.shape3 325 | title = "两个人" 326 | pass 327 | elif mode == self.name2: 328 | data_words = self.n_df_clean["data"].copy() 329 | shape = self.shape2 330 | title = mode 331 | pass 332 | elif mode == self.name1: 333 | data_words = self.j_df_clean["data"].copy() 334 | shape = self.shape1 335 | title = mode 336 | pass 337 | else: 338 | print("参数错误,退出") 339 | return 340 | ans = {} 341 | for d in data_words: 342 | words = cut(d, cut_all=False) 343 | for w in words: 344 | if w in ans and len(w) > 1: 345 | ans[w] += 1 346 | elif len(w) > 1: 347 | ans[w] = 1 348 | sorted_ans = sorted(ans.items(), key=lambda x: x[1], reverse=True) 349 | ans.clear() 350 | for data in sorted_ans: 351 | ans[data[0]] = data[1] 352 | self.words = pd.DataFrame(list(ans.items()), columns=["data", "counts"]) 353 | # self.words = self.words[self.words["data"].apply(lambda s: s != "主人")] 354 | self.words.dropna(subset=["data"]) 355 | self.words = self.words[self.words["data"].apply(len) > 1] 356 | sava_data = [self.words.copy()] 357 | sava_path = ["./用户数据/data/word/" + title + "_words_counts.xlsx"] 358 | self.sa.save_data_all(sava_data, sava_path) 359 | self.d.draw_word_cloud(self.words.copy(), shape, title) 360 | 361 | # 分析热度 362 | 363 | def get_max_count_date(self): 364 | date_df = self.all_df.copy() 365 | date_df["time"] = pd.to_datetime(date_df["time"]) 366 | date_df["date"] = date_df["time"].dt.date 367 | s_date, e_date = ca.get_month_dates( 368 | self.s_year, self.s_month, self.e_year, self.e_month 369 | ) 370 | date_range = pd.date_range(start=s_date, end=e_date) 371 | date_counts = date_df.groupby("date").size().reindex(date_range, fill_value=0) 372 | date_counts = date_counts.to_frame() 373 | date_counts = date_counts.reset_index() 374 | date_counts.columns = ["time", "counts"] 375 | date_counts["week_day"] = date_counts["time"].apply(lambda s: s.weekday() + 1) 376 | date_counts.sort_values(by="time") 377 | date_counts["month"] = date_counts["time"].dt.month 378 | date_counts["year"] = date_counts["time"].dt.year 379 | date_dfs = [group for _, group in date_counts.groupby(["year", "month"])] 380 | max_count = max(date_counts["counts"]) + 50 381 | self.max_heat_date = max_count 382 | 383 | def make_rili_df(self, date_counts): 384 | """ 385 | 生成日历df 386 | :param date_counts: 每天的count 387 | :return: 结果 388 | """ 389 | days = ["0", "星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"] 390 | columns_date_df = [days[i] for i in range(1, 8)] 391 | df_new = pd.DataFrame(columns=columns_date_df) 392 | temp_row = [0] * 7 393 | is_has = False 394 | for _, row_old in date_counts.iterrows(): 395 | time = row_old["week_day"] - 1 # 将时间1-7转换为索引0-6 396 | count = row_old["counts"] 397 | temp_row[time] = count 398 | is_has = True 399 | if time == 6: # 如果填充到了星期日,添加行到df_new 400 | df_new = pd.concat( 401 | [df_new, pd.DataFrame([temp_row], columns=columns_date_df)], 402 | ignore_index=True, 403 | ) 404 | temp_row = [0] * 7 405 | is_has = False 406 | # 处理最后一行 407 | if is_has: # 如果最后一行不是全0 408 | df_new = pd.concat( 409 | [df_new, pd.DataFrame([temp_row], columns=columns_date_df)], 410 | ignore_index=True, 411 | ) 412 | # 输出新的DataFrame 413 | return df_new 414 | 415 | def make_masks( 416 | self, 417 | ): 418 | """ 419 | 制作遮罩 420 | :return: 结果 421 | """ 422 | ans = [] 423 | for year, month in self.months: 424 | now_rili = ca.generate_calendar(year, month) 425 | ans.append(now_rili) 426 | return ans 427 | 428 | def process_heat(self, mode): 429 | """ 430 | 分析聊天热度 431 | :param mode: 分析模式 432 | :return: 无 433 | """ 434 | if mode == self.name1 + self.name2: 435 | date_df = self.all_df.copy() 436 | title = "两个人" 437 | pass 438 | elif mode == self.name1: 439 | date_df = self.j_df.copy() 440 | title = mode 441 | pass 442 | elif mode == self.name2: 443 | date_df = self.n_df.copy() 444 | title = mode 445 | pass 446 | else: 447 | print("参数错误,退出") 448 | return 449 | date_df["time"] = pd.to_datetime(date_df["time"]) 450 | date_df["date"] = date_df["time"].dt.date 451 | s_date, e_date = ca.get_month_dates( 452 | self.s_year, self.s_month, self.e_year, self.e_month 453 | ) 454 | date_range = pd.date_range(start=s_date, end=e_date) 455 | date_counts = date_df.groupby("date").size().reindex(date_range, fill_value=0) 456 | date_counts = date_counts.to_frame() 457 | date_counts = date_counts.reset_index() 458 | date_counts.columns = ["time", "counts"] 459 | date_counts["week_day"] = date_counts["time"].apply(lambda s: s.weekday() + 1) 460 | 461 | date_counts.sort_values(by="time") 462 | date_counts["month"] = date_counts["time"].dt.month 463 | date_counts["year"] = date_counts["time"].dt.year 464 | date_dfs = [group for _, group in date_counts.groupby(["year", "month"])] 465 | rili_dfs = [self.make_rili_df(df) for df in date_dfs] 466 | mask = self.make_masks() 467 | 468 | date_counts_no_zeros = date_counts[ 469 | date_counts["counts"].apply(lambda s: s != 0) 470 | ] 471 | 472 | self.d.draw_heat_how(date_counts_no_zeros, title, self.max_heat_date) 473 | 474 | self.d.draw_heatmap_big( 475 | rili_dfs, 476 | title, 477 | mask, 478 | self.months_size, 479 | self.months, 480 | self.max_heat_date, 481 | ) 482 | self.d.draw_heatmap_all( 483 | rili_dfs, 484 | title, 485 | mask, 486 | self.months_size, 487 | self.months, 488 | self.max_heat_date, 489 | ) 490 | 491 | # 分析聊天时间 492 | 493 | def get_max_count_time(self): 494 | hour_df = self.all_df.copy() 495 | hour_df["time"] = pd.to_datetime(hour_df["time"]) 496 | hour_df["hour"] = hour_df["time"].dt.hour 497 | hour_counts = hour_df.groupby("hour").size().reindex(range(0, 24), fill_value=0) 498 | hour_counts = hour_counts.to_frame() 499 | hour_counts = hour_counts.reset_index() 500 | hour_counts.columns = ["hour", "counts"] 501 | hour_counts.sort_values(by="hour") 502 | columns_date_df = [str(i) for i in range(0, 24)] 503 | hour_df_image = pd.DataFrame(columns=columns_date_df) 504 | temp_row = [0] * 24 505 | for index, row in hour_counts.iterrows(): 506 | temp_row[index] = row["counts"] 507 | hour_df_image = pd.concat( 508 | [hour_df_image, pd.DataFrame([temp_row], columns=columns_date_df)], 509 | ignore_index=True, 510 | ) 511 | max_count = hour_df_image.max(axis=1).values[0] 512 | self.max_count_time = max_count 513 | 514 | def process_time(self, mode): 515 | """ 516 | 分析聊天热度时间 517 | :param mode: 分析模式 518 | :return: 无 519 | """ 520 | if mode == self.name1 + self.name2: 521 | hour_df = self.all_df.copy() 522 | title = "两个人" 523 | pass 524 | elif mode == self.name1: 525 | hour_df = self.j_df.copy() 526 | title = mode 527 | pass 528 | elif mode == self.name2: 529 | hour_df = self.n_df.copy() 530 | title = mode 531 | pass 532 | else: 533 | print("参数错误,退出") 534 | return 535 | hour_df["time"] = pd.to_datetime(hour_df["time"]) 536 | hour_df["hour"] = hour_df["time"].dt.hour 537 | hour_counts = hour_df.groupby("hour").size().reindex(range(0, 24), fill_value=0) 538 | hour_counts = hour_counts.to_frame() 539 | hour_counts = hour_counts.reset_index() 540 | hour_counts.columns = ["hour", "counts"] 541 | hour_counts.sort_values(by="hour") 542 | columns_date_df = [str(i) for i in range(0, 24)] 543 | hour_df_image = pd.DataFrame(columns=columns_date_df) 544 | temp_row = [0] * 24 545 | for index, row in hour_counts.iterrows(): 546 | temp_row[index] = row["counts"] 547 | hour_df_image = pd.concat( 548 | [hour_df_image, pd.DataFrame([temp_row], columns=columns_date_df)], 549 | ignore_index=True, 550 | ) 551 | self.d.draw_time_heat(hour_df_image, title, self.max_count_time) 552 | 553 | # 分析情感 554 | 555 | def analyse_word(self, s): 556 | """ 557 | 分析情感 558 | :param s: 559 | :return: 560 | """ 561 | # print("-------") 562 | # print(s) 563 | sleep(self.QPS) 564 | result = self.client.sentimentClassify(s) # 调用api 565 | # print("-------") 566 | return result 567 | 568 | def save_emotion(self, mode): 569 | """ 570 | 生成情感分析文件 571 | :return: 无 572 | """ 573 | if mode == self.name1 + self.name2: 574 | emo_df = self.all_df_clean.copy() 575 | title = "全部" 576 | pass 577 | elif mode == self.name1: 578 | emo_df = self.j_df_clean.copy() 579 | title = mode 580 | pass 581 | elif mode == self.name2: 582 | emo_df = self.n_df_clean.copy() 583 | title = mode 584 | pass 585 | else: 586 | print("参数错误,退出") 587 | return 588 | emo_df["emo"] = emo_df["data"].apply(self.analyse_word) 589 | path = "./用户数据/data/" + title + "情感分析.xlsx" 590 | emo_df.to_excel(path, index=False) 591 | 592 | def is_items(self, s): 593 | """ 594 | 清洗错误数据 595 | :param s: 文本 596 | :return: 是否正确 597 | """ 598 | return "items" in s 599 | 600 | def get_sentiment(self, s): 601 | """ 602 | 获取倾向 603 | :param s:结论 604 | :return: 倾向 605 | """ 606 | match = search(r"'sentiment': (\d+)", s) 607 | return int(match.group(1)) if match else None 608 | 609 | def get_api(self, q, a1, a2, a3): 610 | QPS = int(q) 611 | if QPS >= 20: 612 | self.QPS = 0 613 | else: 614 | self.QPS = 1.0 / QPS 615 | self.a1 = a1 616 | self.a2 = a2 617 | self.a3 = a3 618 | 619 | def process_emo(self, mode): 620 | """ 621 | 分析情感 622 | :param mode: 分析模式 623 | :return: 无 624 | """ 625 | if mode == self.name1 + self.name2: 626 | title = "全部" 627 | pass 628 | elif mode == self.name1: 629 | title = self.name1 630 | pass 631 | elif mode == self.name2: 632 | title = self.name2 633 | pass 634 | else: 635 | print("参数错误,退出") 636 | return 637 | App_ID = self.a1 638 | API_KEY = self.a2 639 | SECRET_KEY = self.a3 640 | api_path = "./用户数据/api/api.txt" 641 | text = App_ID + "\n" + API_KEY + "\n" + SECRET_KEY + "\n" 642 | with open(api_path, "w") as file: 643 | file.write(text) 644 | self.client = AipNlp(App_ID, API_KEY, SECRET_KEY) 645 | print(App_ID) 646 | print(API_KEY) 647 | print(SECRET_KEY) 648 | self.save_emotion(mode) 649 | path = "./用户数据/data/" + title + "情感分析.xlsx" 650 | emo_df = pd.read_excel(path) 651 | emo_df = emo_df[emo_df["emo"].apply(self.is_items)] 652 | emo_df["emo_rank"] = emo_df["emo"].apply(self.get_sentiment) 653 | emo_rank_counts = ( 654 | emo_df.groupby("emo_rank").size().reindex(range(0, 3), fill_value=0) 655 | ) 656 | emo_rank_counts = emo_rank_counts.to_frame() 657 | emo_rank_counts = emo_rank_counts.reset_index() 658 | emo_rank_counts.columns = ["rank", "counts"] 659 | emo_rank_counts.sort_values(by="rank") 660 | self.d.draw_emo(emo_rank_counts, title) 661 | 662 | # 保存数据 663 | def save_kinds_of_data(self): 664 | """ 665 | 保存数据 666 | :return:无 667 | """ 668 | d_data = [self.n_df.copy(), self.j_df.copy(), self.all_df.copy()] 669 | d_path = [ 670 | "./用户数据/data/" + self.name2 + ".xlsx", 671 | "./用户数据/data/" + self.name1 + ".xlsx", 672 | "./用户数据/data/all.xlsx", 673 | ] 674 | self.sa.save_data_all(d_data, d_path) 675 | -------------------------------------------------------------------------------- /仓耳与墨 W03.TTF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oorangeeee/Message_Analysis/2dc1283915f2905e401c4e407f1985bae47c32a9/仓耳与墨 W03.TTF -------------------------------------------------------------------------------- /使用教程/使用教程.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oorangeeee/Message_Analysis/2dc1283915f2905e401c4e407f1985bae47c32a9/使用教程/使用教程.pdf -------------------------------------------------------------------------------- /使用教程/词云形状自定义教程.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oorangeeee/Message_Analysis/2dc1283915f2905e401c4e407f1985bae47c32a9/使用教程/词云形状自定义教程.pdf --------------------------------------------------------------------------------