├── .gitignore ├── LICENSE ├── README.md ├── agent.php ├── init.sh └── logstash.php /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | .idea/* 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # php-logstash 2 | php实现的轻量级日志文件监控 3 | 4 | 说明 5 | ------ 6 | 7 | 通过这个轻巧的脚本可以很容易的将日志送到 elasticsearch 中,并且本地测试处理能力基本保持在接近1w/s的速度。 8 | 9 | 脚本主要实现两个功能,输入和输出。 10 | 11 | ### 输入 12 | 13 | php agent.php --listen=case.log 用来监听访问日志的变更 14 | 15 | 或者使用命令 tailf case.log | php agent.php --listen 来监听来自 stdin 的输入。 16 | 17 | 该功能会持续将监听到的变更记入Redis队列中同时格式化将要记录的Log。 18 | 19 | ### 输出 20 | 21 | php agent.php --indexer 用来建立索引,该脚本每秒约索引8千左右,也可开多个并行处理。 22 | 23 | 该功能会持续将Redis队列中的数据导入 ElasticSearch 数据库中。 24 | 25 | ### 调试 26 | 27 | php logstash.php --build=1 在本地生成的 case.log 中追加一条log。 28 | 29 | ## 依赖 30 | 31 | * PHP 5.4.0 + 32 | * redis 扩展 33 | * curl 扩展 34 | 35 | ## 使用方法说明 36 | 37 | ### 输入方式 38 | 39 | php agent.php --listen= 从头读取文件并持续监听 40 | 41 | tail -F case.log | php agent.php --listen 监听 Stdin 传入的数据 42 | 43 | ### 索引方式 44 | 45 | php agent.php --indexer 46 | 47 | 可将以上命令放置在shell中执行 48 | 49 | ``` 50 | #/bin/bash 51 | nohup tail -F access.log | php agent.php --listen & 52 | nohup php agent.php --listen=case.log & 53 | nohup php agent.php --indexer & 54 | ``` 55 | 56 | 调试方式 57 | 58 | 程序提供了一个指令用来模拟日志写入 59 | 60 | ``` 61 | php logstash.php --build= #生成的log条目数,默认20万条 62 | 63 | 文件保存为case.log并且在同级目录下,可用命令 64 | 65 | tail -F case.log | php agent.php --listen 或 66 | 67 | php agent.php --listen=case.log 68 | 69 | 70 | 测日志监听状态,并从redis中查看结果,或重新定义parser方法在内部中断调试日志解析过程 71 | ``` 72 | 73 | 全部指令 74 | 75 | ``` 76 | agent.php --listen= #将脚本设置为输入模式,用来监听日志文件输入 77 | 78 | agent.php --listen #不指定文件将监听来自 stdin 的输入 79 | 80 | agent.php --indexer #将脚本设置为索引模式,用来将队列的数据发送到 ElasticSearch 服务器 81 | 82 | agent.php --status #查看队列情况和处理速度 83 | 84 | ``` 85 | 86 | 87 | 88 | ## 配置文件 89 | 90 | ``` 91 | 全部配置文件如下,默认均有默认值 92 | [ 93 | 'redis' => 'tcp://127.0.0.1:6379', # redis地址,支持认证不支持数组。认证tcp://auth:密码@127.0.0.1:6379 94 | 'type' => 'log' # redis 队列key,及es的index type 95 | 'agent_log' => __DIR__ .'/agent.log', # 日志保存地址 96 | 'input_sync_memory' => 5*1024*1024 # 输入信息到达指定内存后同步 97 | 'input_sync_second' => 5 # 输入信息等待超过指定秒数后同步,以上2个条件共同触发 98 | 'parser' => [$this,'parser'] # 自定义输入端日志的处理格式,默认与程序提供的logformat json一致 99 | 100 | 'elastic' => 'http://127.0.0.1:9200' # elastic search通信地址,支持数组,可配置多个随机访问 101 | # 支持密码 程序采用 http auth_basic 认证方式 102 | # 使用密码 http://user:pssword@127.0.0.1:9200 103 | 'prefix' => 'phplogstash', # es 默认索引前缀名字为 phplogstash-2015.12.12 104 | 'shards' => '5', # es 分片数量 105 | 'replicas' => '2', # es 副本数量 106 | ]; 107 | ``` 108 | 109 | 110 | ## 日志格式 111 | 112 | 程序默认使用如下Nginx的log_format,设置步骤如下 113 | 114 | 1、将如下 log_format 规则放置在 nginx 的 http 配置内 115 | 116 | ``` 117 | log_format json '{"timestamp":"$time_iso8601",' 118 | '"host":"$server_addr",' 119 | '"server":"$server_name",' 120 | '"client":"$http_x_forwarded_for",' 121 | '"size":$body_bytes_sent,' 122 | '"responsetime":$upstream_response_time,' 123 | '"domain":"$host",' 124 | '"method":"$request_method",' 125 | '"url":"$uri",' 126 | '"requesturi":"$request_uri",' 127 | '"via":"$server_protocol",' 128 | '"request":"$request",' 129 | '"uagent":"$http_user_agent",' 130 | '"referer":"$http_referer",' 131 | '"status":"$status"}'; 132 | 133 | 如果是内网机器需要使用该变量获取真实IP $http_x_forwarded_for 134 | 135 | 2、将如下置放在 server 的配置内。 136 | 137 | access_log web_accesslog.json json 138 | ``` 139 | 140 | 生成的日志格式入如下,默认build的也是这种格式 141 | 142 | ``` 143 | { 144 | "timestamp": "2015-12-18T14:24:26+08:00", 145 | "host": "10.10.23.139", 146 | "message": "0", 147 | "server": "localhost", 148 | "client": "127.0.0.1", 149 | "size": 197, 150 | "responsetime": 0.010, 151 | "domain": "www.localhost.com", 152 | "method": "GET", 153 | "url": "/index.php", 154 | "requesturi": "/controller/action?arg1=1&arg2=2", 155 | "via": "HTTP/1.1", 156 | "request": "GET /controller/action?arg1=1&arg2=2 HTTP/1.1", 157 | "uagent": "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", 158 | "referer": "-", 159 | "status": "200" 160 | } 161 | ``` 162 | 163 | 默认的 parser 会把 request 的请求分解成resquesturi与args,然后提交给elasticsearch方便汇总查看,如果不需要这么详细的拆分请直接使用request字段即可。 164 | 165 | ``` 166 | Array 167 | ( 168 | [timestamp] => 2015-12-18T14:24:26+08:00 169 | [host] => 10.10.23.139 170 | [message] => 0 171 | [server] => localhost 172 | [client] => 127.0.0.1 173 | [size] => 197 174 | [responsetime] => 0.01 175 | [domain] => www.localhost.com 176 | [method] => GET 177 | [url] => /index.php 178 | [requesturi] => /controller/action?arg1=1&arg2=2 179 | [via] => HTTP/1.1 180 | [uagent] => Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) 181 | [referer] => - 182 | [status] => 200 183 | [resquesturi] => /controller/action 184 | [args] => Array 185 | ( 186 | [arg1] => 1 187 | [arg2] => 2.7.1 188 | ) 189 | ) 190 | ``` 191 | 192 | ## 日志的归档与轮转 193 | 194 | 以下是 logrotate 的配置信息,放置在 /etc/logrotate.d 目录下,即可每日将.json 后缀的日志进行归档。 195 | 196 | ``` 197 | /data/logs/*.json { 198 | daily 199 | missingok 200 | rotate 10 201 | dateext 202 | compress 203 | delaycompress 204 | notifempty 205 | create 644 www-data.www-data 206 | sharedscripts 207 | prerotate 208 | if [ -d /etc/logrotate.d/httpd-prerotate ]; then \ 209 | run-parts /etc/logrotate.d/httpd-prerotate; \ 210 | fi \ 211 | endscript 212 | postrotate 213 | invoke-rc.d nginx rotate >/dev/null 2>&1 214 | endscript 215 | } 216 | ``` 217 | 218 | 以上配置为Ubuntu APT 安装的脚本,自定义路径安装的请使用如下配置 219 | 220 | ``` 221 | /data/logs/*.json { 222 | daily 223 | missingok 224 | rotate 10 225 | dateext 226 | compress 227 | delaycompress 228 | notifempty 229 | create 644 www-data.www-data 230 | sharedscripts 231 | postrotate 232 | /usr/local/nginx/sbin/nginx -s reload 233 | endscript 234 | } 235 | ``` 236 | 237 | 238 | ##测试 239 | 240 | ``` 241 | 242 | 插入测试 每秒1-2.5w每秒插入 243 | QPS为负表示队列增加速度,为正表示队列处理速度。 244 | ========time========|========keys========|===QPS====| 245 | 2015-12-21 14:17:02 | 0 | NA | 246 | ========time========|========keys========|===QPS====| 247 | 2015-12-21 14:17:03 | 0 | NA | 248 | ========time========|========keys========|===QPS====| 249 | 2015-12-21 14:17:04 | 0 | NA | 250 | ========time========|========keys========|===QPS====| 251 | 2015-12-21 14:17:05 | 18901 | NA | 252 | ========time========|========keys========|===QPS====| 253 | 2015-12-21 14:17:06 | 46972 | -28071/s | 254 | ========time========|========keys========|===QPS====| 255 | 2015-12-21 14:17:07 | 75618 | -28646/s | 256 | ========time========|========keys========|===QPS====| 257 | 2015-12-21 14:17:08 | 102306 | -26688/s | 258 | ========time========|========keys========|===QPS====| 259 | 2015-12-21 14:17:09 | 130091 | -27785/s | 260 | ========time========|========keys========|===QPS====| 261 | 2015-12-21 14:17:10 | 155433 | -25342/s | 262 | ========time========|========keys========|===QPS====| 263 | 2015-12-21 14:17:11 | 181863 | -26430/s | 264 | ========time========|========keys========|===QPS====| 265 | 2015-12-21 14:17:12 | 199479 | -17616/s | 266 | ========time========|========keys========|===QPS====| 267 | 2015-12-21 14:17:13 | 199479 | 0/s | 268 | ========time========|========keys========|===QPS====| 269 | 2015-12-21 14:17:14 | 199479 | 0/s | 270 | ========time========|========keys========|===QPS====| 271 | 2015-12-21 14:17:15 | 199479 | 0/s | 272 | ========time========|========keys========|===QPS====| 273 | 2015-12-21 14:17:16 | 199479 | 0/s | 274 | ========time========|========keys========|===QPS====| 275 | 2015-12-21 14:17:17 | 199479 | 0/s | 276 | ========time========|========keys========|===QPS====| 277 | 2015-12-21 14:17:18 | 200030 | -551/s | 278 | ========time========|========keys========|===QPS====| 279 | 索引测试 8k-1w每秒 280 | ========time========|========keys========|===QPS====| 281 | 2015-12-21 14:17:47 | 200030 | 0/s | 282 | ========time========|========keys========|===QPS====| 283 | 2015-12-21 14:17:48 | 197692 | 2338/s | 284 | ========time========|========keys========|===QPS====| 285 | 2015-12-21 14:17:49 | 186923 | 10769/s | 286 | ========time========|========keys========|===QPS====| 287 | 2015-12-21 14:17:50 | 178367 | 8556/s | 288 | ========time========|========keys========|===QPS====| 289 | 2015-12-21 14:17:51 | 168894 | 9473/s | 290 | ========time========|========keys========|===QPS====| 291 | 2015-12-21 14:17:52 | 158028 | 10866/s | 292 | ========time========|========keys========|===QPS====| 293 | 2015-12-21 14:17:53 | 149483 | 8545/s | 294 | ========time========|========keys========|===QPS====| 295 | 2015-12-21 14:17:54 | 139930 | 9553/s | 296 | ========time========|========keys========|===QPS====| 297 | 2015-12-21 14:17:55 | 129157 | 10773/s | 298 | ========time========|========keys========|===QPS====| 299 | 2015-12-21 14:17:56 | 120599 | 8558/s | 300 | ========time========|========keys========|===QPS====| 301 | 2015-12-21 14:17:57 | 111956 | 8643/s | 302 | ========time========|========keys========|===QPS====| 303 | 2015-12-21 14:17:58 | 101151 | 10805/s | 304 | ========time========|========keys========|===QPS====| 305 | 2015-12-21 14:17:59 | 91715 | 9436/s | 306 | ========time========|========keys========|===QPS====| 307 | 2015-12-21 14:18:00 | 83253 | 8462/s | 308 | ========time========|========keys========|===QPS====| 309 | 2015-12-21 14:18:01 | 72841 | 10412/s | 310 | ========time========|========keys========|===QPS====| 311 | 2015-12-21 14:18:02 | 62831 | 10010/s | 312 | ========time========|========keys========|===QPS====| 313 | 2015-12-21 14:18:03 | 55254 | 7577/s | 314 | ========time========|========keys========|===QPS====| 315 | 2015-12-21 14:18:04 | 44376 | 10878/s | 316 | ========time========|========keys========|===QPS====| 317 | 2015-12-21 14:18:05 | 33947 | 10429/s | 318 | ========time========|========keys========|===QPS====| 319 | 2015-12-21 14:18:06 | 26726 | 7221/s | 320 | ========time========|========keys========|===QPS====| 321 | 2015-12-21 14:18:07 | 16451 | 10275/s | 322 | ========time========|========keys========|===QPS====| 323 | 2015-12-21 14:18:08 | 5504 | 10947/s | 324 | ========time========|========keys========|===QPS====| 325 | 2015-12-21 14:18:09 | 0 | 5504/s | 326 | ========time========|========keys========|===QPS====| 327 | 2015-12-21 14:18:10 | 0 | NA | 328 | ========time========|========keys========|===QPS====| 329 | 2015-12-21 14:18:11 | 0 | NA | 330 | ========time========|========keys========|===QPS====| 331 | 2015-12-21 14:18:12 | 0 | NA | 332 | 333 | ``` 334 | 335 | 336 | ##异常处理 337 | 338 | 脚本启动后会持续监听日志输入,在没有PHP致命错误、无内存泄露的情况下,该脚本不会中断。 339 | 340 | 即使redis连接断开脚本也不会退出,而是以1秒一次的重连等待redis恢复,所以理论上该脚本不会异常中断。 341 | -------------------------------------------------------------------------------- /agent.php: -------------------------------------------------------------------------------- 1 | 'tcp://127.0.0.1:6379', 8 | ]; 9 | 10 | 11 | (new LogStash())->handler($cfg)->run(); 12 | ?> -------------------------------------------------------------------------------- /init.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | nohup php agent.php --listen=case.log > nohup.log 2>&1 & 3 | nohup php agent.php --indexer > nohup.log 2>&1 & 4 | -------------------------------------------------------------------------------- /logstash.php: -------------------------------------------------------------------------------- 1 | cmd = 'listen'; 28 | foreach($key as $v){ 29 | $v = str_replace('::','',$v); 30 | if(isset($opt[$v])){ 31 | $this->cmd = $v; 32 | $this->args = $opt[$v]; 33 | break; 34 | } 35 | } 36 | 37 | $this->default_value($cfg,'redis' ,'tcp://127.0.0.1:6379'); 38 | $this->default_value($cfg,'agent_log',__DIR__ .'/agent.log'); 39 | $this->default_value($cfg,'type','log'); 40 | $this->default_value($cfg,'input_sync_memory',5*1024*1024); 41 | $this->default_value($cfg,'input_sync_second',5); 42 | $this->default_value($cfg,'parser',[$this,'parser']); 43 | $this->default_value($cfg,'log_level','product'); 44 | 45 | $this->default_value($cfg,'elastic',['http://127.0.0.1:9200']); 46 | $this->default_value($cfg,'prefix','phplogstash'); 47 | $this->default_value($cfg,'shards',5); 48 | $this->default_value($cfg,'replicas',1); 49 | $this->config = $cfg; 50 | $this->redis(); 51 | return $this; 52 | } 53 | 54 | function run(){ 55 | $cmd = $this->cmd; 56 | $this->$cmd($this->args); 57 | } 58 | 59 | 60 | /** 61 | * redis 链接 62 | */ 63 | private function redis(){ 64 | $cfg = $this->getRedisHost(); 65 | try { 66 | $redis = new redis(); 67 | $redis->pconnect($cfg['host'],$cfg['port'],0); 68 | if($cfg['auth']) $redis->auth($cfg['auth']); 69 | $this->redis = $redis; 70 | $this->redis->ping(); 71 | }catch (Exception $e){ 72 | $this->log($e->getMessage() .' now retrying'); 73 | sleep(1); //如果报错等一秒再重连 74 | $this->redis(); 75 | } 76 | } 77 | 78 | /** 79 | * 获取stdin输入 80 | */ 81 | function listen(){ 82 | $this->begin = time(); 83 | if($this->args){ 84 | $this->log('begin in file mode' . $this->begin,'debug'); 85 | if(!file_exists($this->args))exit(' file not found'); 86 | 87 | while(true){ 88 | $handle = fopen($this->args,'r'); 89 | if ($handle){ 90 | fseek($handle, $this->file_pointer); 91 | while($line = trim(fgets($handle))){ 92 | $this->message[] = call_user_func($this->config['parser'],$line); 93 | $this->inputAgent(); 94 | } 95 | $this->file_pointer = ftell($handle); 96 | fclose($handle); 97 | $this->log('listen for in ' . $this->args . ', file pointer ' . $this->file_pointer, 'debug'); 98 | $this->inputAgent(); 99 | sleep(1); 100 | }else{ 101 | $this->file_pointer = 0; 102 | } 103 | } 104 | }else{ 105 | $this->log('begin in tail mode ' . $this->begin,'debug'); 106 | while($line = fgets(STDIN)){ 107 | $line = trim($line); 108 | if($line){ 109 | if($log = call_user_func($this->config['parser'],$line)){ 110 | $this->message[] = $log; 111 | } 112 | $this->inputAgent(); 113 | } 114 | } 115 | } 116 | } 117 | 118 | 119 | function indexer(){ 120 | $this->begin = time(); 121 | $this->esCurl('/_template/'.$this->config['prefix'],json_encode($this->esIndices()),'PUT'); 122 | while (true) { 123 | while($msg = $this->redis->rPop($this->config['type'])){ 124 | if (false !== $msg) { 125 | $this->message[] = $msg; 126 | $this->indexerAgent(); 127 | } 128 | } 129 | sleep(1); 130 | $this->indexerAgent(); 131 | $this->log('waiting for queue','debug'); 132 | } 133 | } 134 | 135 | /** 136 | * 获取队列信息 137 | */ 138 | function status(){ 139 | $this->redis(); 140 | $qps=0; 141 | while(true){ 142 | echo $this->strPad('time') .'|'. 143 | $this->strPad('keys',10) .'|'. 144 | $this->strPad('QPS',10).'|'. 145 | PHP_EOL; 146 | 147 | $size = $this->redis->lSize($this->config['type']); 148 | if($qps == 0){ 149 | $current_qps = 'N/A'; 150 | }else{ 151 | $current_qps = $qps - $size.'/s'; 152 | } 153 | $qps = $size; 154 | echo $this->strPad(date('Y-m-d H:i:s'),20,' ') .'|'. 155 | $this->strPad($size,10,' ').'|'. 156 | $this->strPad($current_qps,10,' ').'|'. 157 | PHP_EOL; 158 | sleep(1); 159 | } 160 | } 161 | 162 | private function strPad($input,$len=20,$pad_str='='){ 163 | return str_pad($input,$len,$pad_str,STR_PAD_BOTH); 164 | } 165 | 166 | /** 167 | * 创建假数据 168 | * 需要内存128MB 169 | */ 170 | public function build($args){ 171 | $all_count = !empty($args) ? $args : 200000; 172 | $start = microtime(true); 173 | $s = ""; 174 | echo "start" . PHP_EOL; 175 | for($i=0;$i<$all_count;$i++){ 176 | $s .= '{"timestamp":"'.date('c').'","host":"10.10.23.139","message":"'.$i.'","server":"v10.gaonengfun.com","client":"39.164.172.250","size":197,"responsetime":0.010,"domain":"v10.gaonengfun.com","method":"GET","url":"/index.php","requesturi":"/task/ballot?appkey=1&v=2.7.1&ch=xiaomi","via":"HTTP/1.1","request":"GET /task/ballot?appkey=1&v=2.7.1&ch=xiaomi HTTP/1.1","uagent":"NaoDong android 2.7.1","referer":"-","status":"200"}'.PHP_EOL; 177 | $rate = number_format(($i/$all_count * 100),0); 178 | if(memory_get_usage(true)/1024/1024 >= 50){ 179 | file_put_contents('case.log',$s,FILE_APPEND); 180 | $s=''; 181 | } 182 | } 183 | if($s){ 184 | file_put_contents('case.log',$s,FILE_APPEND); 185 | } 186 | echo 'all complete '. (microtime(true)-$start) .' seconds'.PHP_EOL; 187 | } 188 | 189 | 190 | /** 191 | * 获取redis配置 192 | * @return array 193 | */ 194 | private function getRedisHost(){ 195 | $cfg = $this->config['redis']; 196 | $parse_url = parse_url($cfg); 197 | return [ 198 | 'host' => $parse_url['host'], 199 | 'port' => $parse_url['port'], 200 | 'auth' => isset($parse_url['user']) ? $parse_url['pass'] : null, 201 | ]; 202 | } 203 | 204 | /** 205 | * 获取es配置 206 | * @return array 207 | */ 208 | private function getEsHost(){ 209 | if(is_array($this->config['elastic'])){ 210 | $rand = mt_rand(0,count($this->config['elastic'])-1); 211 | $cfg = $this->config['elastic'][$rand]; 212 | }else{ 213 | $cfg = $this->config['elastic']; 214 | } 215 | $parse_url = parse_url($cfg); 216 | return [ 217 | 'url' => $parse_url['scheme'] .'://'. $parse_url['host'] .':'. $parse_url['port'], 218 | 'user' => isset($parse_url['user']) ? $parse_url['user'] : null, 219 | 'pass' => isset($parse_url['pass']) ? $parse_url['pass'] : null, 220 | ]; 221 | } 222 | 223 | private function esCurl($url,$data='',$method='POST'){ 224 | $cfg = $this->getEsHost(); 225 | $ch = curl_init(); 226 | curl_setopt($ch, CURLOPT_URL, $cfg['url'].$url); 227 | curl_setopt($ch, CURLOPT_HEADER, 0); 228 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 229 | curl_setopt($ch,CURLOPT_TIMEOUT,300); 230 | //curl_setopt ($ch, CURLOPT_PROXY, 'http://192.168.1.40:8888'); 231 | 232 | if($cfg['user'] || $cfg['pass']){ 233 | curl_setopt($ch, CURLOPT_USERPWD, "{$cfg['user']}:{$cfg['pass']}"); 234 | } 235 | 236 | $method = strtoupper($method); 237 | curl_setopt($ch, CURLOPT_CUSTOMREQUEST, $method); 238 | if($data){ 239 | curl_setopt ( $ch, CURLOPT_POSTFIELDS, $data ); 240 | } 241 | 242 | 243 | $body = curl_exec($ch); 244 | $code = curl_getinfo($ch,CURLINFO_HTTP_CODE); 245 | 246 | 247 | 248 | 249 | 250 | if(curl_error($ch) || $code > 201){ 251 | $this->log('ElasticSearch error ' .PHP_EOL. 252 | 'code: ' .$code. PHP_EOL. 253 | $cfg['url'] .$url .' '. $method .PHP_EOL. 254 | 'Curl error:' . curl_error($ch) .PHP_EOL. 255 | 'body: ' .$body .PHP_EOL. 256 | 'data: '.mb_substr($data,0,400) 257 | ); 258 | } 259 | 260 | curl_close($ch); 261 | unset($code,$err,$data); 262 | 263 | return json_decode($body,true); 264 | } 265 | 266 | private function esIndices(){ 267 | $string_not_analyzed = ['type'=>'string','index'=>'not_analyzed','doc_values'=>true]; 268 | //put /d curl -XPUT localhost:9200/_template/template_1 -d 269 | $indices['template'] = $this->config['prefix'].'-*'; 270 | $indices['settings']['index'] = [ 271 | 'number_of_shards' => $this->config['shards'], 272 | 'number_of_replicas' => $this->config['replicas'], 273 | 'refresh_interval'=>'5s' 274 | ]; 275 | 276 | $indices['mappings']['_default_']['dynamic_templates'][]['string_fields'] = [ 277 | 'match_mapping_type' => 'string', 278 | 'mapping' => [ 279 | 'index' => 'analyzed', 280 | 'omit_norms' => true, 281 | 'type' => 'string', 282 | 'fields' =>[ 283 | 'raw' => [ 284 | 'index' => 'not_analyzed', 285 | 'ignore_above' => 256, 286 | 'doc_values' => true, 287 | 'type' => 'string' 288 | ], 289 | ], 290 | ], 291 | ]; 292 | 293 | $indices['mappings']['_default_']['dynamic_templates'][]['float_fields'] = [ 294 | 'match_mapping_type' => 'float', 295 | 'mapping' => [ 296 | 'doc_values' => true, 297 | 'type' => 'float', 298 | ], 299 | ]; 300 | 301 | $indices['mappings']['_default_']['dynamic_templates'][]['double_fields'] = [ 302 | 'match_mapping_type' => 'double', 303 | 'mapping' => [ 304 | 'doc_values' => true, 305 | 'type' => 'double', 306 | ], 307 | ]; 308 | 309 | $indices['mappings']['_default_']['dynamic_templates'][]['byte_fields'] = [ 310 | 'match_mapping_type' => 'byte', 311 | 'mapping' => [ 312 | 'doc_values' => true, 313 | 'type' => 'byte', 314 | ], 315 | ]; 316 | 317 | $indices['mappings']['_default_']['dynamic_templates'][]['short_fields'] = [ 318 | 'match_mapping_type' => 'short', 319 | 'mapping' => [ 320 | 'doc_values' => true, 321 | 'type' => 'short', 322 | ], 323 | ]; 324 | 325 | $indices['mappings']['_default_']['dynamic_templates'][]['integer_fields'] = [ 326 | 'match_mapping_type' => 'integer', 327 | 'mapping' => [ 328 | 'doc_values' => true, 329 | 'type' => 'integer', 330 | ], 331 | ]; 332 | 333 | $indices['mappings']['_default_']['dynamic_templates'][]['long_fields'] = [ 334 | 'match_mapping_type' => 'long', 335 | 'mapping' => [ 336 | 'doc_values' => true, 337 | 'type' => 'long', 338 | ], 339 | ]; 340 | 341 | $indices['mappings']['_default_']['dynamic_templates'][]['date_fields'] = [ 342 | 'match_mapping_type' => 'date', 343 | 'mapping' => [ 344 | 'doc_values' => true, 345 | 'type' => 'date', 346 | ], 347 | ]; 348 | 349 | $indices['mappings']['_default_']['dynamic_templates'][]['geo_point_fields'] = [ 350 | 'match_mapping_type' => 'geo_point', 351 | 'mapping' => [ 352 | 'doc_values' => true, 353 | 'type' => 'geo_point', 354 | ], 355 | ]; 356 | 357 | $indices['mappings']['_default_']['_all'] = [ 358 | 'enabled' => true, 359 | 'omit_norms'=> true, 360 | ]; 361 | $indices['mappings']['_default_']['properties'] = [ 362 | 'timestamp' => ['type'=>'date','doc_values' => true], 363 | 'client' =>['type'=>'ip'], 364 | 'host' => ['type'=>'string','index'=>'not_analyzed'], 365 | 'size' => ['type'=>'integer','doc_values'=>true], 366 | 'responsetime' => ['type'=>'float','doc_values'=>true], 367 | 'request' => ['type'=>'string'], 368 | 'status' => ['type'=>'integer','doc_values'=>true], 369 | 'args' => ['type'=>'object'], 370 | ]; 371 | return $indices; 372 | } 373 | 374 | /** 375 | * 默认的处理log的方法 376 | * @param $message 377 | * @return mixed 378 | */ 379 | /** 380 | * 默认的处理log的方法 381 | * @param $message 382 | * @return mixed 383 | */ 384 | private function parser($message){ 385 | $message = str_replace(':-',':"-"',$message); 386 | $message = preg_replace('/\\\x[0-9a-f]{2}/i','?', $message); 387 | $json = json_decode($message,true); 388 | if($json['timestamp'] == ''){ 389 | $this->log('empty timestamp log:'. $message); 390 | return false; 391 | } 392 | 393 | list($request_url,$params) = explode('?',$json['requesturi']); 394 | 395 | $client = explode(',',$json['client']); 396 | if(count($client) > 1){ 397 | $json['client'] = array_shift($client); 398 | }elseif($json['client'] == '-'){ 399 | $json['client'] = '127.0.0.1'; 400 | } 401 | parse_str($params,$paramsOutput); 402 | $json['responsetime'] = floatval($json['responsetime']); 403 | $json['resquesturi'] = $request_url; 404 | $json['args'] = $paramsOutput; 405 | unset($request_url,$params,$paramsOutput,$client); 406 | return $json; 407 | } 408 | 409 | 410 | 411 | private function inputAgent(){ 412 | $current_usage = memory_get_usage(); 413 | $sync_second = $this->config['input_sync_second']; 414 | $sync_memory = $this->config['input_sync_memory']; 415 | $time = ($this->begin + $sync_second) - time() ; 416 | if((memory_get_usage() > $sync_memory) or ( $this->begin+$sync_second < time())){ 417 | 418 | try{ 419 | $this->redis->ping(); 420 | }catch(Exception $e){ 421 | $this->log('input Agent check redis :' . $e->getMessage(),'debug'); 422 | $this->redis(); 423 | } 424 | 425 | if(!empty($this->message)){ 426 | try{ 427 | $pipe = $this->redis->multi(Redis::PIPELINE); 428 | foreach($this->message as $pack){ 429 | $pipe->lPush($this->config['type'],json_encode($pack)); 430 | } 431 | $replies = $pipe->exec(); 432 | $this->log('count memory > '.$sync_memory.' current:'.$current_usage.' or time > '.$sync_second. 433 | ' current: '.$time.'s ','sync'); 434 | }catch (Exception $e){ 435 | $this->log('multi push error :' . $e->getMessage()); 436 | } 437 | } 438 | 439 | $this->begin = time(); //reset begin time 440 | unset($this->message); //reset message count 441 | } 442 | } 443 | 444 | private function indexerAgent(){ 445 | $current_usage = memory_get_usage(); 446 | $sync_second = $this->config['input_sync_second']; 447 | $sync_memory = $this->config['input_sync_memory']; 448 | $time = ($this->begin + $sync_second) - time() ; 449 | if((memory_get_usage() > $sync_memory) or ( $this->begin+$sync_second < time())){ 450 | $row = ''; 451 | if(!empty($this->message)){ 452 | foreach($this->message as $pack){ 453 | $json = json_decode($pack,true); 454 | if(!$json['timestamp']) continue; 455 | $date = date('Y.m.d',strtotime($json['timestamp'])); 456 | $type = $this->config['type']; 457 | $index = $this->config['prefix'].'-'.date('Y.m.d',strtotime($json['timestamp'])); 458 | $row .= json_encode(['create' => ['_index' => $index ,'_type' => $type]])."\n"; 459 | $row .= json_encode($json)."\n"; 460 | } 461 | if(!empty($row)){ 462 | $this->esCurl('/_bulk',$row); 463 | } 464 | } 465 | 466 | 467 | $this->log('count memory > '.$sync_memory.' current:'.$current_usage.' or time > '.$sync_second. 468 | ' current: '.$time.'s ','elasticsearch'); 469 | $this->begin = time(); //reset begin time 470 | unset($this->message,$current_usage,$sync_second,$sync_memory,$time); //reset message count 471 | } 472 | } 473 | 474 | private function default_value(&$arr,$k,$v = ''){ 475 | return $arr[$k] = isset($arr[$k]) ? $arr[$k] : $v; 476 | } 477 | 478 | private function log($msg,$level='warning'){ 479 | if($this->config['log_level'] == 'debug' || ($this->config['log_level'] != 'debug' and $level != 'debug')){ 480 | $message = '['.$level.'] ['.date('Y-m-d H:i:s').'] ['.(memory_get_usage()/1024/1024).'MB] ' .$msg. PHP_EOL; 481 | file_put_contents($this->config['agent_log'], $message, FILE_APPEND); 482 | } 483 | } 484 | } 485 | 486 | --------------------------------------------------------------------------------