├── .editorconfig ├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── custom.md │ └── feature_request.md ├── .gitignore ├── LICENSE ├── README.md ├── images ├── grafana.jpeg ├── influxdb.jpeg ├── nginx_log_processing_monitoring.png ├── structure_of_the_data.png └── web_log_anlyzst.jpg └── script ├── analyzer_multiprocessing.py ├── analyzer_threading.py └── log_distribution.py /.editorconfig: -------------------------------------------------------------------------------- 1 | # top-most EditorConfig file 2 | root = true 3 | 4 | # setting for all files 5 | [*] 6 | # character set 7 | charset = utf-8 8 | # indented style 9 | end_of_line = lf 10 | # whether to insert a blank line at the end of a file 11 | insert_final_newline = true 12 | # whether to delete the space at the end of a line 13 | trim_trailing_whitespace = false 14 | 15 | # python 16 | [*.py] 17 | indent_style = space 18 | indent_size = 4 19 | 20 | # shell 21 | [*.sh] 22 | indent_style = space 23 | indent_size = 4 24 | 25 | # javascript 26 | [*.js] 27 | indent_style = space 28 | indent_size = 2 29 | 30 | # makefile 31 | [*.md] 32 | indent_style = tab 33 | 34 | # json or .travis.yml 35 | [*.json] 36 | indent_style = space 37 | indent_size = 2 38 | 39 | # yml 40 | [*.yml}] 41 | indent_style = space 42 | indent_size = 2 43 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | 5 | --- 6 | 7 | **Describe the bug** 8 | A clear and concise description of what the bug is. 9 | 10 | **To Reproduce** 11 | Steps to reproduce the behavior: 12 | 1. Go to '...' 13 | 2. Click on '....' 14 | 3. Scroll down to '....' 15 | 4. See error 16 | 17 | **Expected behavior** 18 | A clear and concise description of what you expected to happen. 19 | 20 | **Screenshots** 21 | If applicable, add screenshots to help explain your problem. 22 | 23 | **Desktop (please complete the following information):** 24 | - OS: [e.g. iOS] 25 | - Browser [e.g. chrome, safari] 26 | - Version [e.g. 22] 27 | 28 | **Smartphone (please complete the following information):** 29 | - Device: [e.g. iPhone6] 30 | - OS: [e.g. iOS8.1] 31 | - Browser [e.g. stock browser, safari] 32 | - Version [e.g. 22] 33 | 34 | **Additional context** 35 | Add any other context about the problem here. 36 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/custom.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Custom issue template 3 | about: Describe this issue template's purpose here. 4 | 5 | --- 6 | 7 | 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | 5 | --- 6 | 7 | **Is your feature request related to a problem? Please describe.** 8 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 9 | 10 | **Describe the solution you'd like** 11 | A clear and concise description of what you want to happen. 12 | 13 | **Describe alternatives you've considered** 14 | A clear and concise description of any alternative solutions or features you've considered. 15 | 16 | **Additional context** 17 | Add any other context or screenshots about the feature request here. 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | env/ 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *,cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # IPython Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # dotenv 81 | .env 82 | 83 | # virtualenv 84 | venv/ 85 | ENV/ 86 | 87 | # Spyder project settings 88 | .spyderproject 89 | 90 | # Rope project settings 91 | .ropeproject 92 | ### JetBrains template 93 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 94 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 95 | 96 | # User-specific stuff: 97 | .idea/workspace.xml 98 | .idea/tasks.xml 99 | .idea/dictionaries 100 | .idea/vcs.xml 101 | .idea/jsLibraryMappings.xml 102 | 103 | # Sensitive or high-churn files: 104 | .idea/dataSources.ids 105 | .idea/dataSources.xml 106 | .idea/dataSources.local.xml 107 | .idea/sqlDataSources.xml 108 | .idea/dynamic.xml 109 | .idea/uiDesigner.xml 110 | 111 | # Gradle: 112 | .idea/gradle.xml 113 | .idea/libraries 114 | 115 | # Mongo Explorer plugin: 116 | .idea/mongoSettings.xml 117 | 118 | ## File-based project format: 119 | *.iws 120 | 121 | ## Plugin-specific files: 122 | 123 | # IntelliJ 124 | /out/ 125 | 126 | # mpeltonen/sbt-idea plugin 127 | .idea_modules/ 128 | 129 | # JIRA plugin 130 | atlassian-ide-plugin.xml 131 | 132 | # Crashlytics plugin (for Android Studio and IntelliJ) 133 | com_crashlytics_export_strings.xml 134 | crashlytics.properties 135 | crashlytics-build.properties 136 | fabric.properties 137 | 138 | # Vscode dot dir 139 | .vscode/ 140 | 141 | # Mac dot file 142 | .DS_Store 143 | 144 | # Pyenv python file 145 | .python-version 146 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Escape 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | Raspi-X 4 | 5 |

6 | 7 |

8 | web-log-analyst 🐌 Nginx日志分析处理监控 9 |

10 | 11 |

12 | love_girlfriend 13 | love_girlfriend 14 | love_girlfriend 15 |

16 | 17 | ## 项目目录 18 | 19 | - [1. 功能描述](https://github.com/EscapeLife/web-log-analyst#1-%E5%8A%9F%E8%83%BD%E6%8F%8F%E8%BF%B0) 20 | - [2. 项目流程图](https://github.com/EscapeLife/web-log-analyst#2-%E9%A1%B9%E7%9B%AE%E6%B5%81%E7%A8%8B%E5%9B%BE) 21 | - [3. 模块分析过程](https://github.com/EscapeLife/web-log-analyst#3%E6%A8%A1%E5%9D%97%E5%88%86%E6%9E%90%E8%BF%87%E7%A8%8B) 22 | - [3.1 解析日志文件](https://github.com/EscapeLife/web-log-analyst#31-%E8%A7%A3%E6%9E%90%E6%97%A5%E5%BF%97%E6%96%87%E4%BB%B6) 23 | - [3.2 分析日志文件](https://github.com/EscapeLife/web-log-analyst#32-%E5%88%86%E6%9E%90%E6%97%A5%E5%BF%97%E6%96%87%E4%BB%B6) 24 | - [3.3 数据结构](https://github.com/EscapeLife/web-log-analyst#33-%E6%95%B0%E6%8D%AE%E7%BB%93%E6%9E%84) 25 | - [3.4 展示日志文件](https://github.com/EscapeLife/web-log-analyst#34-%E5%B1%95%E7%A4%BA%E6%97%A5%E5%BF%97%E6%96%87%E4%BB%B6) 26 | - [4. 效果展示](https://github.com/EscapeLife/web-log-analyst#4-%E6%95%88%E6%9E%9C%E5%B1%95%E7%A4%BA) 27 | - [5. 快速构建]() 28 | 29 | ## 1. 功能描述 30 | 31 | > **小工具功能用途的简单描述** 32 | 33 | 一个简单的日志分发、存储、监控、展示的小工具,当然你也可以将任何你关注的信息,集中到这里进行展示。其中 `log_distribution.py` 用来分发日志数据,变相的实现了多日志监控的效果;`analyzer_threading.py` 和 `analyzer_multiprocessing.py` 是利用进程以及线程的特性,对日志进行了收集、聚合、发送的效果(在 `python` 中,`threading` 多线程都是运行在一个进程当中的,所以对于多核 `CPU` 并没有发挥其性能;相对而言,`multiprocessing` 多进程可以充分利用操作系统的性能)。将数据存储在 `influxdb` 流式数据库中,并利用 `grafana` 进行`展示,当然这里你也可以使用其他的数据库展示工具都是可以的。 34 | 35 | ## 2. 项目流程图 36 | 37 | > **通过流程图直观的解释项目的分析过程** 38 | 39 | ![Nginx 日志分析处理监控流程图](./images/nginx_log_processing_monitoring.png) 40 | 41 | ## 3.模块分析过程 42 | 43 | > **注释:没有提供所需的 Nginx 日志文件,使用时需要结合自己的真实适用场景进行合理规划。** 44 | 45 | ### 3.1 解析日志文件 46 | 47 | 对于 `Web` 服务器的日志输出,是由我们指定的,既可以更多的记录站点的信息也可以大致的做一下记录。当然,最重要信息肯定也是必不可少的。如下,列出了比较重要的日志记录信息。 48 | 49 | - `IP` 地址 50 | - 用户访问时间戳 51 | - 请求方式 52 | - `URL` 地址 53 | - 浏览器版本 54 | - 服务器返回状态 55 | - 返回字节长度 56 | - `referer` 57 | - `UserAgent` 58 | 59 | ### 3.2 分析日志文件 60 | 61 | 通过对于 `Nginx` 日志的分析,我们可以得到很多东西,不只是我下面表格列出的这几条。分析得到的结果,可以有助于我们来优化网站性能、统计网站访问量以及实时展示有很大的帮助。 62 | 63 | | 编号 | 获取到的内容 | 如果统计 | 如何展示 | 64 | | ---- | ---------------- | -------- | ---------------------- | 65 | | 1 | 平均时间的流量 | sum | line char | 66 | | 2 | 平均响应时间 | sum | line char | 67 | | 3 | top 10 URL | count | bar chat | 68 | | 4 | top 10 IP | count | bar char | 69 | | 5 | top 10 UserAgent | count | bar char | 70 | | 6 | 响应状态的分布 | group by | pie chat or stack chat | 71 | | 7 | UserAgent 的分布 | group by | pie chat or stack chat | 72 | | 8 | HTTP 版本的分布 | group by | pie chat or stack chat | 73 | 74 | ### 3.3 数据结构 75 | 76 | 为了便于理解,下图实例了一下,对应的数据结构便于理解和学习。 77 | 78 | ![数据结构](./images/structure_of_the_data.png) 79 | 80 | ### 3.4 展示日志文件 81 | 82 | #### 3.4.1 influxdb 83 | 84 | **用途** 85 | 86 | - `InfluxDB` 用 `Go` 语言编写的一个开源分布式时序、事件和指标数据库,和传统是数据库相比有不少不同的地方 87 | - 类似的数据库有 `Elasticsearch`、`Graphite` 等 88 | - 一般用来储存实时数据,配合一套 `UI` 界面来展示信息 89 | 90 | **安装** 91 | 92 | ```bash 93 | # 配置yum安装源 94 | cat < **使用 Docker 进行快速构建** 153 | 154 | ```bash 155 | # 生成数据 156 | $ python3 script/log_distribution.py data/grpc.access.log egg/1.log egg/2.log egg/3.log 157 | 158 | # 解析数据并推送到influxdb里面 159 | $ python3 script/analyzer_multiprocessing.py egg/1.log egg/2.log egg/3.log 160 | $ python3 script/analyzer_threading.py egg/1.log egg/2.log egg/3.log 161 | ``` 162 | 163 | ## 6. 联系方式 164 | 165 |

166 | WX 167 |

168 | 169 | - **💭 [Name][^1] 💭** 170 | - 🐠 **[`EscapeLife`](https://www.escapelife.site)** 😏 171 | - **💭 [Induction][^2] 💭** 172 | - 🏦 **[`Focusing P.A.I`](https://www.paodingai.com)** 😂 173 | - **💭 [Email][^3] 💭** 174 | - 📫 **[`wenpanhappy@gmail.com`](https://www.escapelife.site)** 🤔 175 | - **💭 [Myblog][^4] 💭** 176 | - 🍺 **[`https://www.escapelife.site`](https://www.escapelife.site)** 😚 177 | - **💭 [License][^5] 💭** 178 | - 🚧 [**`Apache License, Version 2.0`**](http://www.apache.org/licenses/LICENSE-2.0.html)😝 179 | 180 | [^1]: This is my nickname. 181 | [^2]: This is my company address. 182 | [^3]: This is my public mailbox. 183 | [^4]: This is my blog. 184 | [^5]: The open source protocol used by the repository. 185 | -------------------------------------------------------------------------------- /images/grafana.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EscapeLife/web-log-analyst/ae7561236fe44cac950e44f401256e6ac35c05f7/images/grafana.jpeg -------------------------------------------------------------------------------- /images/influxdb.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EscapeLife/web-log-analyst/ae7561236fe44cac950e44f401256e6ac35c05f7/images/influxdb.jpeg -------------------------------------------------------------------------------- /images/nginx_log_processing_monitoring.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EscapeLife/web-log-analyst/ae7561236fe44cac950e44f401256e6ac35c05f7/images/nginx_log_processing_monitoring.png -------------------------------------------------------------------------------- /images/structure_of_the_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EscapeLife/web-log-analyst/ae7561236fe44cac950e44f401256e6ac35c05f7/images/structure_of_the_data.png -------------------------------------------------------------------------------- /images/web_log_anlyzst.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EscapeLife/web-log-analyst/ae7561236fe44cac950e44f401256e6ac35c05f7/images/web_log_anlyzst.jpg -------------------------------------------------------------------------------- /script/analyzer_multiprocessing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import sys 4 | import datetime 5 | import threading 6 | import requests 7 | import multiprocessing 8 | 9 | compile = re.compile(r'(?P\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) .* .* \[(?P