├── .gitignore
├── .gitmodules
├── CPU
├── Intel_PAUSE指令变化如何影响MySQL的性能.md
├── Intel_PAUSE指令变化如何影响MySQL的性能
│ ├── 411f164cdedbddcc-image-20221026145848312.png
│ ├── 48c976f989747266-48c976f989747266f9892403794996c0.png
│ ├── 4dbd9dff9deacec0-4dbd9dff9deacec0e9911e3a7d025578.png
│ ├── 678d91ac8db34d0f-image-20221026153813774.png
│ ├── b84245c17e213de5-b84245c17e213de528f2ad8090d504f6.png
│ ├── cd145c494c074e01-cd145c494c074e01e9d2d1d5583a87a0.png
│ ├── e73c1371a02106a5-e73c1371a02106a52f8a13f89a9dd9ad.png
│ ├── eb8dc21830973f58-image-20221026153750159.png
│ ├── ed46d35161ea2835-ed46d35161ea28352acd4289a3e9ddad.png
│ ├── fdb459972926cff3-fdb459972926cff371f5f5ab703790bb.png
│ └── ffd66d9a6098979b-ffd66d9a6098979b555dfb00d3494255.png
├── 十年后数据库还是不敢拥抱NUMA.md
├── 十年后数据库还是不敢拥抱NUMA
│ ├── 1230c2cb6619ba39-uma-architecture.png
│ ├── 19749ce43337a539-image-20210525151622425.png
│ ├── 23453d96e86b616f-1623830161880-c4c74f4d-785e-4274-a579-5d1aa8b5e990.png
│ ├── 2bad7da356c6e69a-1620967573650-b8400c2f-7b48-4502-b7d5-6c050e557126.png
│ ├── 2dc4a58d610355d0-1620954918277-c669bd74-df58-4d69-8185-a93f37046972.png
│ ├── 30c855d972b2e0ed-numa-architecture.png
│ ├── 4a4f5be382df065a-1620956491058-09a1ebc6-c248-41db-9def-67b4f489c4f4.png
│ ├── 689e47eb1c7cf6e7-1620966121309-a264fd7f-fe50-4fc6-940f-4cb603ec7874.png
│ ├── 7bc694ba82e6dc6e-1620977108922-a2f67827-cf00-43a0-bba1-4ba105a33201.png
│ ├── 8c797b537fc7dac4-1620956551990-6e376a3d-de40-4180-a05b-b21a9cbf33bc.png
│ ├── ade76e4ea9b19117-1620953504602-30988926-85d8-4af1-996d-f35aa5fede00.png
│ ├── c841e079b67b1156-39354-figure-3-184398.jpg
│ ├── d653f2b25e16c008-1620956524069-85ec2c06-ff55-48e9-8c26-96e738456ed4.png
│ ├── e05e5ea3036225b2-1620953709047-cbe4b59c-aa2b-4845-8b59-9ed6d07e3916.png
│ ├── edec60f9cad05a92-03-05-Broadwell_HCC_Architecture.svg
│ └── eed601bd96adeeff-image-20210525151537507.png
├── 记一次听风扇声音来定位性能.md
└── 记一次听风扇声音来定位性能
│ ├── 03c3d0990a16b711-image-20220705104403314.png
│ ├── e1f2ff5162e06184-8f04a1f57fe07692327b9269ba484ce4.jpg
│ └── f4c2373e91e8fb20-05-05_DPC_Bandwidth_Impact.svg
├── README.md
├── code
├── BDP1.excalidraw
├── HighUS.java
├── Test.java
├── nop.c
├── pause.c
├── ping.excalidraw
├── send.py
├── spin_lock
│ ├── LockAccumulator.class
│ ├── LockAccumulator.java
│ ├── SpinLockAccumulator.c
│ ├── SpinLockAccumulator.class
│ ├── SpinLockAccumulator.class.pause
│ ├── SpinLockAccumulator.h
│ ├── SpinLockAccumulator.java
│ ├── SpinLockNoPauseAccumulator.class
│ ├── SpinLockNoPauseAccumulator.java
│ └── libpause.so
├── sysbench.excalidraw
├── tcpping
└── timestamp
│ ├── drop_http_by_paws.py
│ ├── format_netstat.sh
│ ├── readme.md
│ ├── rst.py
│ ├── rst_0.py
│ ├── rst_ok.py
│ ├── rst_seq.py
│ ├── rst_ts.py
│ ├── tcp_check_req.bt
│ └── watch_netstat.py
├── network
├── 就是要你懂TCP--半连接队列和全连接队列.md
├── 就是要你懂TCP--半连接队列和全连接队列
│ ├── 01dc036aca4b445e-01dc036aca4b445ed86e3e295bf245b8.png
│ ├── 0c6bbb5d4a10f40c-0c6bbb5d4a10f40c8b3c4ba6cab82292.png
│ ├── 159a331ff8cdd4b8-159a331ff8cdd4b8994dfe6a209d035f.png
│ ├── 2452b0e753f83672-5f63b8e0-952c-47a2-8179-48793034f86b.png
│ ├── 2fbdd05162e9fd51-2fbdd05162e9fd51e803682b8a18cc51.png
│ ├── 3f5f1eeb0646a3af-3f5f1eeb0646a3af8afd6bbff2a9ea0b.png
│ ├── 77ed9ba81f70f794-77ed9ba81f70f7940546f0a22dabf010.png
│ ├── 9179e08ac24ce3d5-9179e08ac24ce3d53e74b92dbd044906.png
│ ├── a5616904df3a5055-a5616904df3a505572d99d557b534db2.png
│ ├── bcf463efeb677d57-bcf463efeb677d5749d8d7571274ee79.png
│ ├── c0849615ae525318-c0849615ae52531887ce6b0313d7d2d1.png
│ └── ec25ccb6cce8f554-ec25ccb6cce8f554b7ef6927f05bd530.png
├── 就是要你懂TCP--性能和发送接收Buffer的关系.md
└── 就是要你懂TCP--性能和发送接收Buffer的关系
│ ├── 028c3cfe690f4f2e-image10-5.png
│ ├── 05d6357ed53c1c16-05d6357ed53c1c16f0dd0454251916ef.png
│ ├── 0db5c3684a931490-0db5c3684a9314907f9158ac15b6ac71.png
│ ├── 0f3050cd98db40a3-0f3050cd98db40a352410a11a521e8b2.png
│ ├── 15b7d6852e44fc17-15b7d6852e44fc179d60d76f322695c7.png
│ ├── 1984258c03009217-1984258c0300921799476777f5f0a38a.png
│ ├── 1de3f2916346e390-1de3f2916346e390be55263d59f5730d.png
│ ├── 2e493d8dc32bb63f-2e493d8dc32bb63f2126375de6675351.png
│ ├── 3d9e77f8c9b0cab1-3d9e77f8c9b0cab1484c870d2c0d2473.png
│ ├── 3dcfd469fe1e2f7e-3dcfd469fe1e2f7e1d938a5289b83826.png
│ ├── 49e2635a7c4025d4-49e2635a7c4025d44b915a1f17dd272a.png
│ ├── 4af4765c045e9eed-4af4765c045e9eed2e36d9760d4a2aba.png
│ ├── 4e2b2e12c754f01a-4e2b2e12c754f01a2f99f9f47dd5fd8e.png
│ ├── 55cf9875d24d76a0-55cf9875d24d76a077c442327d54fa34.png
│ ├── 5ec50ecf25444e96-5ec50ecf25444e96d81fab975b5a79e6.png
│ ├── 67f280a1cf499ae3-67f280a1cf499ae388fc44d6418869a7.png
│ ├── 7ae26e844629258d-7ae26e844629258de173a05d5ad595f9.png
│ ├── b08fb4ce2162927b-b08fb4ce2162927bf9b6ce02cdc64ab0.svg
│ ├── d0e12e8bad876438-d0e12e8bad8764385549f9b391c62ab0.png
│ ├── d188530df31712e8-d188530df31712e8341f5687a960743a.png
│ ├── d385a7dad76ec403-d385a7dad76ec4031dfb6c096bca434b.png
│ ├── d7d3af2c03653e6c-d7d3af2c03653e6cf8ae2befa0022832.png
│ ├── da48878ce0c01bcd-da48878ce0c01bcdedb1e6d6a6cc6d1c.png
│ ├── ea04e40acda98667-ea04e40acda986675bf0ad0ea7b9b8ff.png
│ └── ff025f076a4a2bc2-ff025f076a4a2bc2b1b13d11f32a97d3.png
├── others
└── 【经验分享】Nginx问题排查必备知识.md
├── performance
├── 10+倍性能提升全过程.md
├── 10+倍性能提升全过程
│ ├── 05703c168e63e968-05703c168e63e96821ea9f921d83712b.png
│ ├── 2ae2cb8b0cb324b6-2ae2cb8b0cb324b68ca22c48c019e029.png
│ ├── 2bb7395a2cc6833c-2bb7395a2cc6833c9c7587b38402a301.png
│ ├── 2be2799d1eef982d-2be2799d1eef982d77e5c0a5c896a0e9.png
│ ├── 36ef4b16c3c400ab-36ef4b16c3c400abf6eb7e6b0fbb2f58.png
│ ├── 38bb043c85c7b500-38bb043c85c7b50007609484c7bf5698.png
│ ├── 4c1eff0f925f5997-4c1eff0f925f59977e2557acff5cf03b.png
│ ├── 6b24a854d91aba4d-6b24a854d91aba4dcdbd4f0155683d93.png
│ ├── 6ed62fd6b50ad278-6ed62fd6b50ad2785e5b57687d95ad6e.png
│ ├── 7eb2cbb4afc2c7d7-7eb2cbb4afc2c7d7007c35304c95342a.png
│ ├── 894bd736dd03060e-894bd736dd03060e89e3fa49cc98ae5e.png
│ ├── 8a4a97cb74724b8b-8a4a97cb74724b8baa3b90072a1914e0.png
│ ├── 91353fb9c88116be-91353fb9c88116be3ff109e3528a4651.png
│ ├── 99bf952b880f1724-99bf952b880f17243953da790ff0e710.png
│ ├── afacc681a9550cd0-afacc681a9550cd087838c2383be54c8.png
│ ├── b509b30218dd22e0-b509b30218dd22e03149985cf5e15f8e.png
│ └── fff502ca73e3112e-fff502ca73e3112e585560ffe4a4dbf1.gif
├── Nginx resueport 导致偶发性卡顿.md
├── Nginx reuseport 导致偶发性卡顿
│ ├── arch.jpg
│ ├── benchmark-pkg-cature1.png
│ ├── benchmark-pkg-cature2.png
│ ├── benchmark-pkg-cature3.png
│ ├── benchmark-pkg-cature4.png
│ ├── exp1-pkg-cature1.png
│ ├── exp2-pkg-cature1.png
│ ├── exp2-pkg-cature2.png
│ ├── log-bench
│ │ ├── access.log.txt
│ │ ├── client-runtime.txt
│ │ ├── nginx-case-client.pcap
│ │ ├── pidstat.txt
│ │ └── readme.md
│ ├── log-exp1
│ │ ├── access.log.txt
│ │ ├── client-runtime.txt
│ │ ├── nginx-case-client.pcap
│ │ └── readme.md
│ ├── log-exp2
│ │ ├── access.log.txt
│ │ ├── client-runtime.txt
│ │ ├── nginx-case-client.pcap
│ │ ├── pidstat.txt
│ │ └── readme.md
│ ├── nginx.conf
│ ├── reuseport-explained.jpg
│ └── script
│ │ ├── get_big_file.sh
│ │ └── get_small_file.sh
├── 一次春节大促性能压测不达标的瓶颈推演.md
└── 一次春节大促性能压测不达标的瓶颈推演
│ ├── 0bd20d87d4cbae11-image-20220623003026351.png
│ ├── 2f3b76be63d33151-2f3b76be63d331510eb6f2cecd91747f.png
│ ├── 6a289d1bba1e875d-6a289d1bba1e875d215032b6fdc7b084.png
│ ├── 80374e55936bc36b-80374e55936bc36bbd243f79fcdb5f8d.png
│ ├── 938ce314d19b47cb-938ce314d19b47cba99e2a09c753f606.png
│ ├── a479bad250c03aee-a479bad250c03aee41d58850afab9c14.png
│ └── e239a12a1c361226-e239a12a1c3612263736256c8efc06e4.png
├── tcpdump
├── libmariadb 与 libmysqlclient 连接 AnalyticDB 时配置 local_infile 不同导致连接失败的问题.zip
├── syn_tw_reset.pcap
├── toa.lua
└── toa_pcap
│ ├── tcp_options_252.pcap
│ ├── tcp_options_253.pcap
│ └── tcp_options_254_250.pcap
├── zsxq.png
├── 直播
└── 案例星球20240127直播.pdf
├── 站外案例简析集锦.md
└── 站外案例简析集锦
├── 00e78e6190986e7b-image-20230314100052628.png
├── 0e2a468150a155c8-image-20221125164254479.png
├── 19cfbbd63e749cfe-image-20230314095512464.png
├── 24e56165a9336a1b-image-20230220084137826.png
├── 456dbe574617f911-image-20230220085701693.png
├── 4e503055000530b5-image-20230220084714334.png
├── 52864c4410f483d1-image-20221125164508492.png
├── 64497804a4243f0a-640-20221112211814567.png
├── 6c9333df801d32bd-640-7178470.png
├── 6d82a7d91b6d852d-image-20220924091158877.png
├── 73776fa27bbcb6b2-image-20230314100259875.png
├── 7eb291e891284313-640-20220707152145610.png
├── 853f3166bd6a6741-image-20220924091212645.png
├── 8580d8a8e71510e2-image-20230220083826110.png
├── 96a580f723de3e92-640-8259033.png
├── 979f55f065bb45c9-640-8259052.jpeg
├── c7b6e89872f2a73f-640-7083886.jpeg
├── d5edcf4c833587de-image-20220707154232470.png
├── dbfa3765bf289d2c-image-20220706102446670.png
├── de0ccf287fc93eb7-image-20220707151642981.png
├── e8c9c2961a45aac4-image-20220706103130314.png
├── f1822dfadf71d254-image-20220707151656456.png
├── f26aabaa37d74fad-640-20221112211745593.jpeg
├── fe102ae62b0fb350-image-20230314095644496.png
├── fe98b4a0f5a9a664-image-20230314095237337.png
└── ffd4e48233a458e9-image-20220706113722680.png
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "HieBPF"]
2 | path = HieBPF
3 | url = https://github.com/plantegg/HieBPF.git
4 |
--------------------------------------------------------------------------------
/CPU/Intel_PAUSE指令变化如何影响MySQL的性能.md:
--------------------------------------------------------------------------------
1 |
2 | # Intel PAUSE指令变化如何影响MySQL的性能
3 |
4 | ## 导读
5 |
6 | x86、arm指令都很多,无论是应用程序员还是数据库内核研发大多时候都不需要对这些指令深入理解,但是 Pause 指令和数据库操作太紧密了,本文通过一次非常有趣的性能优化来引入对 Pause 指令的理解,期望可以事半功倍地搞清楚 CPU指令集是如何影响你的程序的。
7 |
8 | 文章分成两大部分,第一部分是 MySQL 集群的一次全表扫描性能优化过程; 第二部分是问题解决后的原理分析以及Pause指令的来龙去脉和优缺点以及应用场景分析。
9 |
10 | ## 业务结构
11 |
12 | 为理解方便做了部分简化:
13 |
14 | client -> Tomcat -> LVS -> MySQL(32 个 MySQLD实例集群,每个实例8Core)
15 |
16 | ## 场景描述
17 |
18 | 通过 client 压 Tomcat 和 MySQL 集群(对数据做分库分表),MySQL 集群是32个实例,每个业务 SQL 都需要经过 Tomcat 拆分成 256 个 SQL 发送给 32 个MySQL(每个MySQL上有8个分库),这 256 条下发给 MySQL 的 SQL 不是完全串行,但也不是完全并行,有一定的并行性。
19 |
20 | 业务 SQL 如下是一个简单的select sum求和,这个 SQL在每个MySQL上都很快(有索引)
21 |
22 | ```
23 | SELECT SUM(emp_arr_amt) FROM table_c WHERE INSUTYPE='310' AND Revs_Flag='Z' AND accrym='201910' AND emp_no='1050457';
24 |
25 | ```
26 |
27 | ## 监控指标说明
28 |
29 | - 后述或者截图中的逻辑RT/QPS是指 client 上看到的Tomcat的 RT 和 QPS;
30 | - RT :response time 请求响应时间,判断性能瓶颈的唯一指标;
31 | - 物理RT/QPS是指Tomcat看到的MySQL RT 和QPS(这里的 RT 是指到达Tomcat节点网卡的 RT ,所以还包含了网络消耗)
32 |
33 | ## 问题描述:
34 |
35 | 通过client压一个Tomcat节点+32个MySQL,QPS大概是430,Tomcat节点CPU跑满,MySQL RT 是0.5ms,增加一个Tomcat节点,QPS大概是700,Tomcat CPU接近跑满,MySQL RT 是0.6ms,到这里性能基本随着扩容线性增加,是符合预期的。
36 |
37 | 继续增加Tomcat节点来横向扩容性能,通过client压三个Tomcat节点+32个MySQL,QPS还是700,Tomcat节点CPU跑不满,MySQL RT 是0.8ms,这就严重不符合预期了。
38 |
39 | 性能压测原则:
40 |
41 | > 加并发QPS不再上升说明到了某个瓶颈,哪个环节RT增加最多瓶颈就在哪里
42 |
43 |
44 | 
45 |
46 | **到这里一切都还是符合我们的经验的,看起来就是 MySQL 有瓶颈(RT 增加明显)。**
47 |
48 | ## 排查 MySQL
49 |
50 | 现场DBA通过监控看到MySQL CPU不到20%,没有慢查询,并且尝试用client越过所有中间环节直接压其中一个MySQL,可以将 MySQL CPU 跑满,这时的QPS大概是38000(对应上面的场景client QPS为700的时候,单个MySQL上的QPS才跑到6000) 所以排除了MySQL的嫌疑(这个推理不够严谨为后面排查埋下了大坑)。
51 |
52 | 那么接下来的嫌疑在网络、LVS 等中间环节上。
53 |
54 | ## LVS和网络的嫌疑
55 |
56 | 首先通过大查询排除了带宽的问题,因为这里都是小包,pps到了72万,很自然想到了网关、LVS的限流之类的
57 |
58 | pps监控,这台物理机有4个MySQL实例上,pps 9万左右,9*32/4=72万
59 | 
60 |
61 | …………(省略巨长的分析、拉人、扯皮过程)
62 |
63 | 最终所有网络因素都被排除,核心证据是:做压测的时候反复从 Tomcat 上 ping 后面的MySQL,RT 跟没有压力的时候一样,也说明了网络没有问题(请思考这个 ping 的作用)。
64 |
65 | ## 问题的确认
66 |
67 | 尝试在Tomcat上打开日志,并将慢 SQL 阈值设置为100ms,这个时候确实能从日志中看到大量MySQL上的慢查询,因为这个SQL需要在Tomcat上做拆分成256个SQL,同时下发,一旦有一个SQL返回慢,整个请求就因为这个短板被拖累了。平均 RT 0.8ms,但是经常有超过100ms的话对整体影响还是很大的。
68 |
69 | 将Tomcat记录下来的慢查询(Tomcat增加了一个唯一id下发给MySQL)到MySQL日志中查找,果然发现MySQL上确实慢了,所以到这里基本确认是MySQL的问题,终于不用再纠结是否是网络问题了。
70 |
71 | 同时在Tomcat进行抓包,对网卡上的 RT 进行统计分析:
72 |
73 | 
74 |
75 | 上是Tomcat上抓到的每个sql的物理RT 平均值,上面是QPS 430的时候, RT 0.6ms,下面是3个server,QPS为700,但是 RT 上升到了0.9ms,基本跟Tomcat监控记录到的物理RT一致。如果MySQL上也有类似抓包计算 RT 时间的话可以快速排除网络问题。
76 |
77 | 网络抓包得到的 RT 数据更容易被所有人接受。尝试过在MySQL上抓包,但是因为LVS模块的原因,进出端口、ip都被修改过,所以没法分析一个流的响应时间。
78 |
79 | ## 重心再次转向MySQL
80 |
81 | 这个时候因为问题点基本确认,再去查看MySQL是否有问题的重心都不一样了,不再只是看看CPU和慢查询,这个问题明显更复杂一些。
82 |
83 | > 教训:CPU只是影响性能的一个因素,RT 才是结果,要追着 RT 跑,而不是只看 CPU
84 |
85 |
86 | 通过监控发现MySQL CPU虽然一直不高,但是经常看到running thread飙到100多,很快又降下去了,看起来像是突发性的并发查询请求太多导致了排队等待,每个MySQL实例是8Core的CPU,尝试将MySQL实例扩容到16Core(只是为了验证这个问题),QPS确实可以上升到1000(没有到达理想的1400)。
87 |
88 | 这是Tomcat上监控到的MySQL状态:
89 | 
90 |
91 | 同时在MySQL机器上通过vmstat也可以看到这种飙升:
92 | 
93 |
94 | 以上分析可以清晰看到虽然 MySQL 整体压力不大,但是似乎会偶尔来一波卡顿、running 任务飙升。
95 |
96 | 像这种短暂突发性的并发流量似乎监控都很难看到(基本都被平均掉了),只有一些实时性监控偶尔会采集到这种短暂突发性飙升,这也导致了一开始忽视了MySQL。
97 |
98 | 所以接下来的核心问题就是MySQL为什么会有这种飙升、这种飙升的影响到底是什么?
99 |
100 | ## perf top
101 |
102 | 直接用 perf 看下 MySQLD 进程,发现 ut_delay 高得不符合逻辑:
103 |
104 | 
105 |
106 | 展开看一下,基本是在优化器中做索引命中行数的选择:
107 |
108 |
109 |
110 | 跟直接在 MySQL 命令行中通过 show processlist看到的基本一致:
111 |
112 |
113 |
114 | 这是 MySQL 的优化器在对索引进行统计,统计的时候要加锁,thread running 抖动的时候通过 show processlist 看到很多 thread处于 statistics 状态。也就是高并发下加锁影响了 CPU 压不上去同时 RT 剧烈增加。
115 |
116 | 这里ut_delay 消耗了 28% 的 CPU 肯定太不正常了,于是将 innodb_spin_wait_delay 从 30 改成 6 后性能立即上去了,继续增加 Tomcat 节点,QPS也可以线性增加。
117 |
118 | > 耗CPU最高的调用函数栈是…`mutex_spin_wait`->`ut_delay`,属于锁等待的逻辑。InnoDB在这里用的是自旋锁,锁等待是通过调用 ut_delay 让 CPU做空循环在等锁的时候不释放CPU从而避免上下文切换,会消耗比较高的CPU。
119 |
120 |
121 | ## 最终的性能
122 |
123 | 调整参数 innodb_spin_wait_delay=6 后在4个Tomcat节点下,并发40时,QPS跑到了1700,物理RT:0.7,逻辑RT:19.6,cpu:90%,这个时候只需要继续扩容 Tomcat 节点的数量就可以增加QPS
124 | 
125 |
126 | 再跟调整前比较一下,innodb_spin_wait_delay=30,并发40时,QPS 500+,物理RT:2.6ms 逻辑RT:72.1ms cpu:37%
127 | 
128 |
129 | 再看看调整前压测的时候的vmstat和tsar --cpu,可以看到process running抖动明显
130 | 
131 |
132 | 对比修改delay后的process running就很稳定了,即使QPS大了3倍
133 | 
134 |
135 | ## 事后思考和分析
136 |
137 | 到这里问题得到了完美解决,但是不禁要问为什么?ut_delay 是怎么工作的? 和 innodb_spin_wait_delay 以及自旋锁的关系?
138 |
139 | ## 原理解析
140 |
141 | 既然调整 innodb_spin_wait_delay 就能解决这个问题,那就要先分析一下 innodb_spin_wait_delay 的作用
142 |
143 | ### 关于 innodb_spin_wait_delay
144 |
145 | innodb通过大量的自旋锁(比如 `InnoDB` [mutexes](https://dev.mysql.com/doc/refman/5.7/en/glossary.html#glos_mutex) and [rw-locks](https://dev.mysql.com/doc/refman/5.7/en/glossary.html#glos_rw_lock))来用高CPU消耗避免上下文切换,这是自旋锁的正确使用方式,在多核场景下,它们一起自旋抢同一个锁,容易造成[cache ping-pong](https://stackoverflow.com/questions/30684974/are-cache-line-ping-pong-and-false-sharing-the-same),进而多个CPU核之间会互相使对方缓存部分无效。所以这里[innodb通过增加 innodb_spin_wait_delay 和 Pause 配合来缓解cache ping-pong](https://dev.mysql.com/doc/refman/5.7/en/innodb-performance-spin_lock_polling.html),也就是本来通过CPU 高速自旋抢锁,换成了抢锁失败后 delay一下(Pause)但是不释放CPU,delay 时间到后继续抢锁,也就是把连续的自旋抢锁转换成了更稀疏的点状的抢锁(间隔的 delay是个随机数),这样不但避免了上下文切换也大大减少了cache ping-pong。
146 |
147 | ### 自旋锁如何减少了cache ping-pong
148 |
149 | 多线程竞争锁的时候,加锁失败的线程会“忙等待”,直到它拿到锁。什么叫“忙等待”呢?它并不意味着一直执行 CAS 函数,而是会与 CPU 紧密配合 ,它通过 CPU 提供的 `PAUSE` 指令,减少循环等待时的cache ping-pong和耗电量;对于单核 CPU,忙等待并没有意义,此时它会主动把线程休眠。
150 |
151 | ### X86 PAUSE 指令
152 |
153 | X86设计了Pause指令,也就是调用 Pause 指令的代码会抢着 CPU 不释放,但是CPU 会打个盹,比如 10个时钟周期,相对一次上下文切换是大几千个时钟周期。
154 |
155 | 这样应用一旦自旋抢锁失败可以先 Pause 一下,只是这个Pause 时间对于 MySQL 来说还不够久,所以需要增加参数 innodb_spin_wait_delay 来将休息时间放大一些。
156 |
157 | 在我们的这个场景下对每个 SQL的 RT 抖动非常敏感(放大256倍),所以过高的 delay 会导致部分SQL RT 变高。
158 |
159 | 函数 ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)) 用来执行这个delay:
160 |
161 | ```
162 | /***************************MySQL代码****************************//**
163 | Runs an idle loop on CPU. The argument gives the desired delay
164 | in microseconds on 100 MHz Pentium + Visual C++.
165 | @return dummy value */
166 | UNIV_INTERN
167 | ulint
168 | ut_delay(ulint delay) //delay 是[0,innodb_spin_wait_delay)之间的一个随机数
169 | {
170 | ulint i, j;
171 | ```
172 |
173 | UT_LOW_PRIORITY_CPU();
174 |
175 | j = 0;
176 |
177 | for (i = 0; i < delay * 50; i++) { //delay 放大50倍
178 | j += i;
179 | UT_RELAX_CPU(); //调用 CPU Pause
180 | }
181 |
182 | UT_RESUME_PRIORITY_CPU();
183 |
184 | return(j);
185 | }
186 |
187 | innodb_spin_wait_delay的默认值为6. spin 等待延迟是一个动态全局参数,您可以在MySQL选项文件(my.cnf或my.ini)中指定该参数,或者在运行时使用SET GLOBAL 来修改。在我们的MySQL配置中默认改成了30,导致了这个问题。
188 |
189 | ### CPU 为什么要有Pause
190 |
191 | 首先可以看到 Pause 指令的作用:
192 |
193 | - 避免上下文切换,应用层想要休息可能会用yield、sleep,这两操作对于CPU来说太重了(伴随上下文切换)
194 | - 能给超线程腾出计算能力(HT共享核,但是有单独的寄存器等存储单元,CPU Pause的时候,对应的HT可以占用计算资源),比如同一个core上先跑多个Pause,同时再跑 nop 指令,这时 nop指令的 IPC基本不受Pause的影响
195 | - 节能(CPU可以休息、但是不让出来),CPU Pause 的时候你从 top 能看到 CPU 100%,但是不耗能。
196 |
197 | 所以有了 Pause 指令后能够提高超线程的利用率,节能,减少上下文切换提高自旋锁的效率。
198 |
199 | > [The PAUSE instruction is first introduced](https://www.reddit.com/r/intel/comments/hogk2n/research_on_the_impact_of_intel_Pause_instruction/) for Intel Pentium 4 processor to improve the performance of “spin-wait loop”. The PAUSE instruction is typically used with software threads executing on two logical processors located in the same processor core, waiting for a lock to be released. Such short wait loops tend to last between tens and a few hundreds of cycles. When the wait loop is expected to last for thousands of cycles or more, it is preferable to yield to the operating system by calling one of the OS synchronization API functions, such as WaitForSingleObject on Windows OS.
200 | >
201 | > An Intel® processor suffers a severe performance penalty when exiting the loop because it detects a possible memory order violation. The PAUSE instruction provides a hint to the processor that the code sequence is a spin-wait loop. The processor uses this hint to avoid the memory order violation in most situations. The PAUSE instruction can improve the performance of the processors supporting Intel Hyper-Threading Technology when executing “spin-wait loops”. With Pause instruction, processors are able to avoid the memory order violation and pipeline flush, and reduce power consumption through pipeline stall.
202 |
203 |
204 | **从intel sdm手册以及实际测试验证来看,Pause 指令在执行过程中,基本不占用流水线执行资源。**
205 |
206 | ### Skylake 架构的8163 和 Broadwell架构 E5-2682 CPU型号的不同
207 |
208 | 为什么用得好好的 innodb_spin_wait_delay 参数这次就不行了呢?
209 |
210 | 这是因为以前业务一直使用的是 E5-2682 CPU,这次用的是新一代架构的 Skylake 8163,那这两款CPU在这里的核心差别是?
211 |
212 | 在Intel 64-ia-32-architectures-optimization-manual手册中提到:
213 |
214 | > The latency of the PAUSE instruction in prior generation microarchitectures is about 10 cycles, whereas in Skylake microarchitecture it has been extended to as many as 140 cycles.
215 | >
216 | > [The PAUSE instruction can improves the performance](https://xem.github.io/minix86/manual/intel-x86-and-64-manual-vol3/o_fe12b1e2a880e0ce-302.html) of processors supporting Intel Hyper-Threading Technology when executing “spin-wait loops” and other routines where one thread is accessing a shared lock or semaphore in a tight polling loop. When executing a spin-wait loop, the processor can suffer a severe performance penalty when exiting the loop because it detects a possible memory order violation and flushes the core processor’s pipeline. The PAUSE instruction provides a hint to the processor that the code sequence is a spin-wait loop. The processor uses this hint to avoid the memory order violation and prevent the pipeline flush. In addition, the PAUSE instruction de-
217 | > pipelines the spin-wait loop to prevent it from consuming execution resources excessively and consume power needlessly. (See[ Section 8.10.6.1, “Use the PAUSE Instruction in Spin-Wait Loops,” for more ](https://xem.github.io/minix86/manual/intel-x86-and-64-manual-vol3/o_fe12b1e2a880e0ce-305.html)information about using the PAUSE instruction with IA-32 processors supporting Intel Hyper-Threading Technology.)
218 |
219 |
220 | 也就是**Skylake架构的CPU的PAUSE指令从之前的10 cycles 改成了 140 cycles。**这可是14倍的变化呀。
221 |
222 | MySQL 使用 innodb_spin_wait_delay 控制 spin lock等待时间,等待时间时间从0*50个Pause到innodb_spin_wait_delay*50个Pause。
223 | 以前 innodb_spin_wait_delay 默认配置30,对于E5-2682 CPU,等待的最长时间为:
224 | 30 * 50 * 10=15000 cycles,对于2.5GHz的CPU,等待时间为6us。
225 | 对应计算 Skylake CPU的等待时间:30 *50 *140=210000 cycles,CPU主频也是2.5GHz,等待时间84us。
226 |
227 | E5-2682 CPU型号在不同的delay参数和不同并发压力下的写入性能数据:
228 |
229 | 
230 |
231 | Skylake 8163 CPU型号在不同的delay参数和不同并发压力下的写入性能数据:
232 |
233 | 
234 |
235 | ==因为8163的cycles从10改到了140,所以可以看到delay参数对性能的影响更加陡峻。==
236 |
237 | ## 总结分析
238 |
239 | Intel CPU 架构不同使得 Pause 指令的CPU Cycles不同导致了 MySQL innodb_spin_wait_delay 在 spin lock 失败的时候(此时需要 Pause* innodb_spin_wait_delay*N)delay更久,使得调用方看到了MySQL更大的 RT ,进而导致 Tomcat Server上业务并发跑不起来,所以最终压力上不去。
240 |
241 | 在长链路的排查中,细化定位是哪个节点出了问题是最难的,要盯住 RT 而不是 CPU。
242 |
243 | 欲速则不达,做压测的时候还是要老老实实地从一个并发开始观察QPS、 RT ,然后一直增加压力到压不上去了,再看QPS、 RT 变化,然后确认瓶颈点。
244 |
245 | ## 参考文章
246 |
247 | https://cloud.tencent.com/developer/article/1005284
248 |
249 | [mysql doc](https://dev.mysql.com/doc/refman/5.7/en/innodb-performance-spin_lock_polling.html)
250 |
251 | [Cache Line 伪共享发现与优化](http://oliveryang.net/2018/01/cache-false-sharing-debug)
252 |
253 | [intel spec](https://en.wikichip.org/w/images/e/eb/intel-ref-248966-037.pdf)
254 |
255 | [Intel PAUSE指令变化影响到MySQL的性能,该如何解决?](https://mp.weixin.qq.com/s/dlKC13i9Z8wjDDiU2tig6Q)
256 |
257 | [ARM软硬件协同设计:锁优化](https://topic.atatech.org/articles/173194), arm不同于x86,用的是yield来代替Pause
258 |
259 | http://cr.openjdk.java.net/~dchuyko/8186670/yield/spinwait.html
260 |
261 |
262 |
263 | Reference:
264 |
265 |
--------------------------------------------------------------------------------
/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/411f164cdedbddcc-image-20221026145848312.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/411f164cdedbddcc-image-20221026145848312.png
--------------------------------------------------------------------------------
/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/48c976f989747266-48c976f989747266f9892403794996c0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/48c976f989747266-48c976f989747266f9892403794996c0.png
--------------------------------------------------------------------------------
/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/4dbd9dff9deacec0-4dbd9dff9deacec0e9911e3a7d025578.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/4dbd9dff9deacec0-4dbd9dff9deacec0e9911e3a7d025578.png
--------------------------------------------------------------------------------
/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/678d91ac8db34d0f-image-20221026153813774.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/678d91ac8db34d0f-image-20221026153813774.png
--------------------------------------------------------------------------------
/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/b84245c17e213de5-b84245c17e213de528f2ad8090d504f6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/b84245c17e213de5-b84245c17e213de528f2ad8090d504f6.png
--------------------------------------------------------------------------------
/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/cd145c494c074e01-cd145c494c074e01e9d2d1d5583a87a0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/cd145c494c074e01-cd145c494c074e01e9d2d1d5583a87a0.png
--------------------------------------------------------------------------------
/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/e73c1371a02106a5-e73c1371a02106a52f8a13f89a9dd9ad.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/e73c1371a02106a5-e73c1371a02106a52f8a13f89a9dd9ad.png
--------------------------------------------------------------------------------
/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/eb8dc21830973f58-image-20221026153750159.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/eb8dc21830973f58-image-20221026153750159.png
--------------------------------------------------------------------------------
/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/ed46d35161ea2835-ed46d35161ea28352acd4289a3e9ddad.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/ed46d35161ea2835-ed46d35161ea28352acd4289a3e9ddad.png
--------------------------------------------------------------------------------
/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/fdb459972926cff3-fdb459972926cff371f5f5ab703790bb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/fdb459972926cff3-fdb459972926cff371f5f5ab703790bb.png
--------------------------------------------------------------------------------
/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/ffd66d9a6098979b-ffd66d9a6098979b555dfb00d3494255.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/Intel_PAUSE指令变化如何影响MySQL的性能/ffd66d9a6098979b-ffd66d9a6098979b555dfb00d3494255.png
--------------------------------------------------------------------------------
/CPU/十年后数据库还是不敢拥抱NUMA/1230c2cb6619ba39-uma-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/十年后数据库还是不敢拥抱NUMA/1230c2cb6619ba39-uma-architecture.png
--------------------------------------------------------------------------------
/CPU/十年后数据库还是不敢拥抱NUMA/19749ce43337a539-image-20210525151622425.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/十年后数据库还是不敢拥抱NUMA/19749ce43337a539-image-20210525151622425.png
--------------------------------------------------------------------------------
/CPU/十年后数据库还是不敢拥抱NUMA/23453d96e86b616f-1623830161880-c4c74f4d-785e-4274-a579-5d1aa8b5e990.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/十年后数据库还是不敢拥抱NUMA/23453d96e86b616f-1623830161880-c4c74f4d-785e-4274-a579-5d1aa8b5e990.png
--------------------------------------------------------------------------------
/CPU/十年后数据库还是不敢拥抱NUMA/2bad7da356c6e69a-1620967573650-b8400c2f-7b48-4502-b7d5-6c050e557126.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/十年后数据库还是不敢拥抱NUMA/2bad7da356c6e69a-1620967573650-b8400c2f-7b48-4502-b7d5-6c050e557126.png
--------------------------------------------------------------------------------
/CPU/十年后数据库还是不敢拥抱NUMA/2dc4a58d610355d0-1620954918277-c669bd74-df58-4d69-8185-a93f37046972.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/十年后数据库还是不敢拥抱NUMA/2dc4a58d610355d0-1620954918277-c669bd74-df58-4d69-8185-a93f37046972.png
--------------------------------------------------------------------------------
/CPU/十年后数据库还是不敢拥抱NUMA/30c855d972b2e0ed-numa-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/十年后数据库还是不敢拥抱NUMA/30c855d972b2e0ed-numa-architecture.png
--------------------------------------------------------------------------------
/CPU/十年后数据库还是不敢拥抱NUMA/4a4f5be382df065a-1620956491058-09a1ebc6-c248-41db-9def-67b4f489c4f4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/十年后数据库还是不敢拥抱NUMA/4a4f5be382df065a-1620956491058-09a1ebc6-c248-41db-9def-67b4f489c4f4.png
--------------------------------------------------------------------------------
/CPU/十年后数据库还是不敢拥抱NUMA/689e47eb1c7cf6e7-1620966121309-a264fd7f-fe50-4fc6-940f-4cb603ec7874.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/十年后数据库还是不敢拥抱NUMA/689e47eb1c7cf6e7-1620966121309-a264fd7f-fe50-4fc6-940f-4cb603ec7874.png
--------------------------------------------------------------------------------
/CPU/十年后数据库还是不敢拥抱NUMA/7bc694ba82e6dc6e-1620977108922-a2f67827-cf00-43a0-bba1-4ba105a33201.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/十年后数据库还是不敢拥抱NUMA/7bc694ba82e6dc6e-1620977108922-a2f67827-cf00-43a0-bba1-4ba105a33201.png
--------------------------------------------------------------------------------
/CPU/十年后数据库还是不敢拥抱NUMA/8c797b537fc7dac4-1620956551990-6e376a3d-de40-4180-a05b-b21a9cbf33bc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/十年后数据库还是不敢拥抱NUMA/8c797b537fc7dac4-1620956551990-6e376a3d-de40-4180-a05b-b21a9cbf33bc.png
--------------------------------------------------------------------------------
/CPU/十年后数据库还是不敢拥抱NUMA/ade76e4ea9b19117-1620953504602-30988926-85d8-4af1-996d-f35aa5fede00.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/十年后数据库还是不敢拥抱NUMA/ade76e4ea9b19117-1620953504602-30988926-85d8-4af1-996d-f35aa5fede00.png
--------------------------------------------------------------------------------
/CPU/十年后数据库还是不敢拥抱NUMA/c841e079b67b1156-39354-figure-3-184398.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/十年后数据库还是不敢拥抱NUMA/c841e079b67b1156-39354-figure-3-184398.jpg
--------------------------------------------------------------------------------
/CPU/十年后数据库还是不敢拥抱NUMA/d653f2b25e16c008-1620956524069-85ec2c06-ff55-48e9-8c26-96e738456ed4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/十年后数据库还是不敢拥抱NUMA/d653f2b25e16c008-1620956524069-85ec2c06-ff55-48e9-8c26-96e738456ed4.png
--------------------------------------------------------------------------------
/CPU/十年后数据库还是不敢拥抱NUMA/e05e5ea3036225b2-1620953709047-cbe4b59c-aa2b-4845-8b59-9ed6d07e3916.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/十年后数据库还是不敢拥抱NUMA/e05e5ea3036225b2-1620953709047-cbe4b59c-aa2b-4845-8b59-9ed6d07e3916.png
--------------------------------------------------------------------------------
/CPU/十年后数据库还是不敢拥抱NUMA/eed601bd96adeeff-image-20210525151537507.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/十年后数据库还是不敢拥抱NUMA/eed601bd96adeeff-image-20210525151537507.png
--------------------------------------------------------------------------------
/CPU/记一次听风扇声音来定位性能.md:
--------------------------------------------------------------------------------
1 |
2 | # 记一次听风扇声音来定位性能瓶颈
3 |
4 | ## 背景
5 |
6 | 在一次POC测试过程中,测试机构提供了两台Intel压力机来压我们的集群
7 |
8 | - 压力机1:两路共72core intel 5XXX系列 CPU,主频2.2GHz, 128G内存
9 | - 压力机2:四路共196core intel 8XXX系列 CPU,主频2.5GHz, 256G内存 (8系列比5系列 CPU的性能要好、要贵)
10 |
11 | 从CPU硬件指标来看压力机2都是碾压压力机1,但是实际测试是压力机2只能跑到接近压力机1的能力,两台机器CPU基本都跑满,并且都是压测进程消耗了90%以上的CPU,内核态消耗不到5%CPU
12 |
13 | 所以接下来需要在调试我们集群性能前先把测试机优化好,才能把压力打上来。
14 |
15 | ## 分析
16 |
17 | 测试机构提供的机器上没有任何工具来评估CPU性能,也无法安装,只能**仔细听196core机器的CPU风扇声音更小,说明196core的CPU出工不出力,大概是流水线在频繁地Stall**(不管你信不信反正我是信的)
18 |
19 | 进一步分析,首先看到 业务消耗了90%以上的CPU,内核态消耗不到5%CPU,两台机器都是这样,这说明 196core 只跑出了 72core的水平,一定是CPU效率出了问题,top看到的CPU占用率不完全是全力在运算,其实cpu 流水线stall也是占用CPU的。
20 |
21 | 这个分析理论请参考我的文章[《Perf IPC以及CPU性能》](https://plantegg.github.io/2021/05/16/Perf%20IPC%E4%BB%A5%E5%8F%8ACPU%E5%88%A9%E7%94%A8%E7%8E%87/)
22 |
23 | ## 验证
24 |
25 | 通过stream测试读写内存的带宽和时延,得到如下数据:
26 |
27 | 72core机器, 本路时延1.1,跨路时延1.4,因为是2路所以有50%的概率跨路,性能下降30%
28 |
29 | 196core机器,本路时延1.2,跨路时延1.85,因为是4路所以有75%的概率跨路,性能下降50%
30 |
31 | 从以上测试数据可以明显看到虽然196core机器拥有更强的单核能力以及更多的核数,但是因为访问内存太慢严重拖累了CPU运算能力,导致大部分时间CPU都在等待内存,这里CPU和内存的速度差了2个数量级,所以内存延时才是整体的瓶颈。
32 |
33 | 测试数据和方法请参考我的文章[《AMD Zen CPU 架构以及不同CPU性能大PK》](https://plantegg.github.io/2021/06/18/%E5%87%A0%E6%AC%BECPU%E6%80%A7%E8%83%BD%E5%AF%B9%E6%AF%94/)
34 |
35 | 有了这个数据心里非常有底问题在哪里了,但是还要想清楚怎么解释给测试机构他们才会信服,因为第一次解释他们直接说不可能,怎么会196core打不过72core呢,再说从来没有集群是测试机构196core压力机打不满的,这台压力机用了几年从来没有人说过这个问题 :(
36 |
37 | ## 内存信息
38 |
39 | 接下来需要拿到更详细的硬件信息来说服测试机构了。
40 |
41 | 通过dmidecode 获取两台机器内存的速度,分别是2100(196core) VS 2900(72core),同时系统也吐出了内存延时分别是 0.5ns VS 0.3 ns,这两个时间对比很直观,普通人也能看懂。
42 |
43 | ```
44 | //以下硬件信息是从家里机器上获取,并非测试机构提供的机器,测试机构提供的机器不让拍照和采集
45 | #dmidecode -t memory
46 | # dmidecode 3.2
47 | Getting SMBIOS data from sysfs.
48 | SMBIOS 3.2.1 present.
49 | # SMBIOS implementations newer than version 3.2.0 are not
50 | # fully supported by this version of dmidecode.
51 |
52 | Handle 0x0033, DMI type 16, 23 bytes
53 | Physical Memory Array
54 | Location: System Board Or Motherboard
55 | Use: System Memory
56 | Error Correction Type: Multi-bit ECC
57 | Maximum Capacity: 2 TB //最大支持2T
58 | Error Information Handle: 0x0032
59 | Number Of Devices: 32 //32个插槽
60 |
61 | Handle 0x0041, DMI type 17, 84 bytes
62 | Memory Device
63 | Array Handle: 0x0033
64 | Error Information Handle: 0x0040
65 | Total Width: 72 bits
66 | Data Width: 64 bits
67 | Size: 32 GB
68 | Form Factor: DIMM
69 | Set: None
70 | Locator: CPU0_DIMMA0
71 | Bank Locator: P0 CHANNEL A
72 | Type: DDR4
73 | Type Detail: Synchronous Registered (Buffered)
74 | Speed: 2933 MT/s //dmmi 内存插槽支持最大速度 ?
75 | Manufacturer: SK Hynix
76 | Serial Number: 220F9EC0
77 | Asset Tag: Not Specified
78 | Part Number: HMAA4GR7AJR8N-WM
79 | Rank: 2
80 | Configured Memory Speed: 2100 MT/s //内存实际运行速度
81 | Minimum Voltage: 1.2 V
82 | Maximum Voltage: 1.2 V
83 | Configured Voltage: 1.2 V
84 | Memory Technology: DRAM
85 | Memory Operating Mode Capability: Volatile memory
86 | Module Manufacturer ID: Bank 1, Hex 0xAD
87 | Non-Volatile Size: None
88 | Volatile Size: 32 GB
89 |
90 | #lshw
91 | *-bank:19 //主板插槽槽位
92 | description: DIMM DDR4 Synchronous Registered (Buffered) 2933 MHz (0.3 ns)
93 | product: HMAA4GR7AJR8N-WM
94 | vendor: SK Hynix
95 | physical id: 13
96 | serial: 220F9F63
97 | slot: CPU1_DIMMB0
98 | size: 32GiB //实际所插内存大小
99 | width: 64 bits
100 | clock: 2933MHz (0.3ns)
101 | ```
102 |
103 | > In `dmidecode`’s output for memory, “Speed” is the highest speed supported by the DIMM, as determined by [JEDEC](https://en.wikipedia.org/wiki/JEDEC) SPD information. “Configured Clock Speed” is the speed at which it is currently running (as set up during boot).
104 |
105 |
106 | Dimm(双列直插式存储模块(dual In-line memory module)): DIMM是内存条印刷电路板正反面均有金手指与主板上的内存条槽接触,这种结构被称为DIMM。于是内存条也有人叫DIMM条,主板上的内存槽也有人称为DIMM槽。
107 |
108 | DIMM 代表物理上的一根内存条,下图中三根内存条共享一个channel连到 CPU
109 |
110 | 
111 |
112 | 
113 |
114 | 
115 |
116 | ## 最终的运行方案
117 |
118 | 给196core的机器换上新的2933 MHz (0.3 ns)的内存条,速度一下子就上去了。
119 |
120 | 然后在196core的机器上起4个压力进程,每个进程分担25%的压力,避免跨路访问内存导致时延从1.2掉到1.8,实际测试也是只用196core中的48core性能和用全部196core是一样的,所以这里一定要起多个进程做内存亲和性绑定,充分使用全部196core。
121 |
122 | **最终整机196core机器的打压能力达到了原来的3.6倍左右。**
123 |
124 | ## 总结
125 |
126 | 程序员要保护好听力,关键时刻可能会用上 :)
127 |
128 | 你说196core机器用了这么强的CPU但是为什么搭配那么差的内存以及主板,我也不知道,大概是有人拿回扣吧。
129 |
130 | ## 参考资料
131 |
132 | [NUMA DEEP DIVE PART 4: LOCAL MEMORY OPTIMIZATION](https://frankdenneman.nl/2016/07/13/numa-deep-dive-4-local-memory-optimization/)
133 |
134 |
--------------------------------------------------------------------------------
/CPU/记一次听风扇声音来定位性能/03c3d0990a16b711-image-20220705104403314.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/记一次听风扇声音来定位性能/03c3d0990a16b711-image-20220705104403314.png
--------------------------------------------------------------------------------
/CPU/记一次听风扇声音来定位性能/e1f2ff5162e06184-8f04a1f57fe07692327b9269ba484ce4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/CPU/记一次听风扇声音来定位性能/e1f2ff5162e06184-8f04a1f57fe07692327b9269ba484ce4.jpg
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # programmer_case
2 | 程序员案例集锦
3 |
4 | 拍案惊奇
5 |
6 | 案例分析集锦,要求案例典型普适性强,代表基础组件基本原理等知识。分析手段尽量通用,重现容易的更好,分析过程一定要逻辑合理每个疑问都能回答清晰。有没有想要贡献案例的同学?这种案例搞清楚一个基本能横扫一个领域,比如有一个 Nginx 卡顿案例就让我这个从没用过Nginx的人学会了 惊群、epoll条件触发等之类的知识点 #拍案惊奇# 案例首先会去掉敏感信息,然后在分享过的同学之间内部共享,然后再开放。如果你们在网上看过已经发布过的案例更好,我先去学习下
7 |
8 | 学习的时候希望链路尽可能简单,所以不断将问题简化,但是应用的时候链路会非常复杂,在理解问题后又要将问题还原到尽可能复杂的链路中来分析,也就是在复杂场景掩盖下考验自己还能化繁为简学以致用吗?
9 |
10 | 案例整理出来真的很牛逼,首先是分析过于清楚,看完可以平趟这个领域;其次在一个案例后再带3/5个相关小案例可以帮你丰富场景,多角度理解
11 |
12 | 作者twitter: [@plantegg](https://twitter.com/plantegg)
13 |
14 | 或者[来知识星球找我](https://t.zsxq.com/0cqPpX2xQ): https://t.zsxq.com/0cqPpX2xQ
15 |
16 |
17 |
18 |
19 | ### 提交案例
20 |
21 | 支持md格式,图片可以并列新建一个同名文件夹,存放在里面。
22 |
23 | 私信联系 @plantegg 添加提交权限
24 |
--------------------------------------------------------------------------------
/code/HighUS.java:
--------------------------------------------------------------------------------
1 |
2 |
3 | import java.lang.*;
4 | import java.util.ArrayList;
5 | import java.lang.management.*;
6 |
7 | public class HighUS {
8 | public static void main(String[] args) throws Exception{
9 | HighUS us = new HighUS();
10 | us.run();
11 | }
12 |
13 | private void run() throws Exception{
14 | int count = java.lang.Runtime.getRuntime().availableProcessors();
15 |
16 | for(int i=0; i list = new ArrayList();
35 | for(int k=0; k<10000; ++k){
36 | list.add(str+String.valueOf(k));
37 | }
38 | list.contains("AXXXA");
39 | }
40 | }
41 | }
42 |
43 | class NotConsumeCPUTask implements Runnable{
44 |
45 | public void run(){
46 |
47 | while(true){
48 | try{
49 | Thread.sleep(1000);
50 | }catch(InterruptedException e){
51 | e.printStackTrace();
52 | }
53 | }
54 | }
55 | }
56 |
57 | }
58 |
--------------------------------------------------------------------------------
/code/Test.java:
--------------------------------------------------------------------------------
1 | import java.sql.Connection;
2 | import java.sql.DriverManager;
3 | import java.sql.ResultSet;
4 | import java.sql.SQLException;
5 | import java.sql.Statement;
6 | import java.sql.PreparedStatement;
7 | public class Test { //不要琢磨代码规范、为什么要这么写,就是为了方便改吧改吧做很多不同的验证试验
8 | public static void main(String args[]) throws NumberFormatException, InterruptedException, ClassNotFoundException {
9 | Class.forName("com.mysql.jdbc.Driver");
10 | String url = args[0];
11 | String user = args[1];
12 | String pass = args[2];
13 | String sql = args[3];
14 | String interval = args[4];
15 | try {
16 | Connection conn = DriverManager.getConnection(url, user, pass);
17 | while (true) {
18 | PreparedStatement stmt = conn.prepareStatement(sql);
19 | //stmt.setFetchSize(Integer.MIN_VALUE); //这句是表示开流式读取,但是每条SQL 都会先发set net_write_timeout=600 给Server
20 | stmt.setString(1, interval);
21 | ResultSet rs = stmt.executeQuery();
22 | rs.close();
23 | stmt.close();
24 |
25 | PreparedStatement stmt2 = conn.prepareStatement(sql);
26 | stmt2.setString(1, interval);
27 | rs = stmt2.executeQuery();
28 | while (rs.next()) {
29 | System.out.println("fine");
30 | }
31 | rs.close();
32 | stmt2.close();
33 |
34 | Thread.sleep(Long.valueOf(interval));
35 | break;
36 | }
37 | conn.close();
38 | } catch (SQLException e) {
39 | e.printStackTrace();
40 | }
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/code/nop.c:
--------------------------------------------------------------------------------
1 |
2 | void main() {
3 |
4 | while(1) {
5 | __asm__ ("nop\n\t"
6 | "nop\n\t"
7 | "nop\n\t"
8 | "nop\n\t"
9 | "nop\n\t"
10 | "nop\n\t"
11 | "nop\n\t"
12 | "nop\n\t"
13 | "nop\n\t"
14 | "nop\n\t"
15 | "nop\n\t"
16 | "nop\n\t"
17 | "nop\n\t"
18 | "nop\n\t"
19 | "nop\n\t"
20 | "nop\n\t"
21 | "nop\n\t"
22 | "nop\n\t"
23 | "nop\n\t"
24 | "nop\n\t"
25 | "nop\n\t"
26 | "nop\n\t"
27 | "nop\n\t"
28 | "nop\n\t"
29 | "nop\n\t"
30 | "nop\n\t"
31 | "nop\n\t"
32 | "nop\n\t"
33 | "nop\n\t"
34 | "nop\n\t"
35 | "nop\n\t"
36 | "nop\n\t"
37 | "nop\n\t"
38 | "nop\n\t"
39 | "nop\n\t"
40 | "nop\n\t"
41 | "nop\n\t"
42 | "nop\n\t"
43 | "nop\n\t"
44 | "nop\n\t"
45 | "nop\n\t"
46 | "nop\n\t"
47 | "nop\n\t"
48 | "nop\n\t"
49 | "nop\n\t"
50 | "nop\n\t"
51 | "nop\n\t"
52 | "nop\n\t"
53 | "nop\n\t"
54 | "nop\n\t"
55 | "nop\n\t"
56 | "nop\n\t"
57 | "nop\n\t"
58 | "nop\n\t"
59 | "nop\n\t"
60 | "nop\n\t"
61 | "nop\n\t"
62 | "nop\n\t"
63 | "nop\n\t"
64 | "nop\n\t"
65 | "nop\n\t"
66 | "nop\n\t"
67 | "nop\n\t"
68 | "nop\n\t"
69 | "nop\n\t"
70 | "nop\n\t"
71 | "nop\n\t"
72 | "nop\n\t"
73 | "nop\n\t"
74 | "nop\n\t"
75 | "nop\n\t"
76 | "nop\n\t"
77 | "nop\n\t"
78 | "nop\n\t"
79 | "nop\n\t"
80 | "nop\n\t"
81 | "nop\n\t"
82 | "nop\n\t"
83 | "nop\n\t"
84 | "nop\n\t"
85 | "nop\n\t"
86 | "nop\n\t"
87 | "nop\n\t"
88 | "nop\n\t"
89 | "nop\n\t"
90 | "nop\n\t"
91 | "nop\n\t"
92 | "nop\n\t"
93 | "nop\n\t"
94 | "nop\n\t"
95 | "nop\n\t"
96 | "nop\n\t"
97 | "nop\n\t"
98 | "nop\n\t"
99 | "nop\n\t"
100 | "nop\n\t"
101 | "nop\n\t"
102 | "nop\n\t"
103 | "nop\n\t"
104 | "nop\n\t"
105 | "nop\n\t"
106 | "nop\n\t"
107 | "nop\n\t"
108 | "nop\n\t"
109 | "nop\n\t"
110 | "nop\n\t"
111 | "nop\n\t"
112 | "nop\n\t"
113 | "nop\n\t"
114 | "nop\n\t"
115 | "nop\n\t"
116 | "nop\n\t"
117 | "nop\n\t"
118 | "nop\n\t"
119 | "nop\n\t"
120 | "nop\n\t"
121 | "nop\n\t"
122 | "nop\n\t"
123 | "nop\n\t"
124 | "nop\n\t"
125 | "nop\n\t"
126 | "nop\n\t"
127 | "nop\n\t"
128 | "nop\n\t"
129 | "nop\n\t"
130 | "nop\n\t"
131 | "nop\n\t"
132 | "nop");
133 | }
134 | }
135 |
--------------------------------------------------------------------------------
/code/pause.c:
--------------------------------------------------------------------------------
1 |
2 | void main() {
3 |
4 | while(1) {
5 | __asm__ ("pause\n\t"
6 | "pause\n\t"
7 | "pause\n\t"
8 | "pause\n\t"
9 | "pause\n\t"
10 | "pause\n\t"
11 | "pause\n\t"
12 | "pause\n\t"
13 | "pause\n\t"
14 | "pause\n\t"
15 | "pause\n\t"
16 | "pause\n\t"
17 | "pause\n\t"
18 | "pause\n\t"
19 | "pause\n\t"
20 | "pause\n\t"
21 | "pause\n\t"
22 | "pause\n\t"
23 | "pause\n\t"
24 | "pause\n\t"
25 | "pause\n\t"
26 | "pause\n\t"
27 | "pause\n\t"
28 | "pause\n\t"
29 | "pause\n\t"
30 | "pause\n\t"
31 | "pause\n\t"
32 | "pause\n\t"
33 | "pause\n\t"
34 | "pause\n\t"
35 | "pause\n\t"
36 | "pause\n\t"
37 | "pause\n\t"
38 | "pause\n\t"
39 | "pause\n\t"
40 | "pause\n\t"
41 | "pause\n\t"
42 | "pause\n\t"
43 | "pause\n\t"
44 | "pause\n\t"
45 | "pause\n\t"
46 | "pause\n\t"
47 | "pause\n\t"
48 | "pause\n\t"
49 | "pause\n\t"
50 | "pause\n\t"
51 | "pause\n\t"
52 | "pause\n\t"
53 | "pause\n\t"
54 | "pause\n\t"
55 | "pause\n\t"
56 | "pause\n\t"
57 | "pause\n\t"
58 | "pause\n\t"
59 | "pause\n\t"
60 | "pause\n\t"
61 | "pause\n\t"
62 | "pause\n\t"
63 | "pause\n\t"
64 | "pause\n\t"
65 | "pause\n\t"
66 | "pause\n\t"
67 | "pause\n\t"
68 | "pause\n\t"
69 | "pause\n\t"
70 | "pause\n\t"
71 | "pause\n\t"
72 | "pause\n\t"
73 | "pause\n\t"
74 | "pause\n\t"
75 | "pause\n\t"
76 | "pause\n\t"
77 | "pause\n\t"
78 | "pause\n\t"
79 | "pause\n\t"
80 | "pause\n\t"
81 | "pause\n\t"
82 | "pause\n\t"
83 | "pause\n\t"
84 | "pause\n\t"
85 | "pause\n\t"
86 | "pause\n\t"
87 | "pause\n\t"
88 | "pause\n\t"
89 | "pause\n\t"
90 | "pause\n\t"
91 | "pause\n\t"
92 | "pause\n\t"
93 | "pause\n\t"
94 | "pause\n\t"
95 | "pause\n\t"
96 | "pause\n\t"
97 | "pause\n\t"
98 | "pause\n\t"
99 | "pause\n\t"
100 | "pause\n\t"
101 | "pause\n\t"
102 | "pause\n\t"
103 | "pause\n\t"
104 | "pause\n\t"
105 | "pause\n\t"
106 | "pause\n\t"
107 | "pause\n\t"
108 | "pause\n\t"
109 | "pause\n\t"
110 | "pause\n\t"
111 | "pause\n\t"
112 | "pause\n\t"
113 | "pause\n\t"
114 | "pause\n\t"
115 | "pause\n\t"
116 | "pause\n\t"
117 | "pause\n\t"
118 | "pause\n\t"
119 | "pause\n\t"
120 | "pause\n\t"
121 | "pause\n\t"
122 | "pause\n\t"
123 | "pause\n\t"
124 | "pause\n\t"
125 | "pause\n\t"
126 | "pause\n\t"
127 | "pause\n\t"
128 | "pause\n\t"
129 | "pause\n\t"
130 | "pause\n\t"
131 | "pause\n\t"
132 | "pause");
133 | }
134 | }
135 |
--------------------------------------------------------------------------------
/code/send.py:
--------------------------------------------------------------------------------
1 | from scapy.all import *
2 | import time
3 | import sys
4 |
5 | target_ip = "server_host_ip"
6 | target_port = 22345
7 | src_port=random.randint(1024,65535)
8 | #src_port=12345
9 | init_seq=4294967292
10 |
11 | ip = IP(dst=target_ip)
12 | syn = TCP(sport=src_port, dport=target_port, flags="S", seq=init_seq)
13 | syn_ack = sr1(ip / syn)
14 | if syn_ack and TCP in syn_ack and syn_ack[TCP].flags == "SA":
15 | print("Received SYN-ACK")
16 | ack = TCP(sport=src_port, dport=target_port,
17 | flags="A", seq=syn_ack.ack, ack=syn_ack.seq+1)
18 | print(syn_ack.seq)
19 | print(syn_ack.ack)
20 | print(ack)
21 | send(ip/ack)
22 | print("Send ACK")
23 | else:
24 | print("Failed to establish TCP connection")
25 |
26 | print("send payload")
27 | data="rrr"
28 | payload=TCP(sport=src_port, dport=22345,flags="AP", seq=syn_ack.ack, ack=syn_ack.seq+1)
29 | payload2=TCP(sport=src_port, dport=22345,flags="AP", seq=0, ack=syn_ack.seq+1)
30 | syn_ack=send(ip/payload/Raw(load=data))
31 | syn_ack=send(ip/payload2/Raw(load=data))
32 |
--------------------------------------------------------------------------------
/code/spin_lock/LockAccumulator.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/code/spin_lock/LockAccumulator.class
--------------------------------------------------------------------------------
/code/spin_lock/LockAccumulator.java:
--------------------------------------------------------------------------------
1 | public class LockAccumulator {
2 | private int sum = 0; // 变量用于累加
3 | private final Object lock = new Object(); // 自选的锁对象
4 |
5 | public static void main(String[] args) {
6 | if (args.length < 2) {
7 | System.out.println("请提供两个参数:线程数量和循环次数");
8 | return;
9 | }
10 |
11 | int numThreads = Integer.parseInt(args[0]);
12 | int totalIncrements = Integer.parseInt(args[1]);
13 |
14 | LockAccumulator accumulator = new LockAccumulator();
15 |
16 | Thread[] threads = new Thread[numThreads];
17 | long start = System.currentTimeMillis();
18 |
19 | for (int i = 0; i < numThreads; i++) {
20 | threads[i] = new Thread(() -> {
21 | int incrementsPerThread = totalIncrements / numThreads;
22 | for (int j = 0; j < incrementsPerThread; j++) {
23 | accumulator.add();
24 | }
25 | });
26 | threads[i].start();
27 | }
28 |
29 | // 等待所有线程完成
30 | for (int i = 0; i < numThreads; i++) {
31 | try {
32 | threads[i].join();
33 | } catch (InterruptedException e) {
34 | e.printStackTrace();
35 | }
36 | }
37 | long end = System.currentTimeMillis();
38 | System.out.println("累加结果: " + accumulator.getSum() + " and time:" +(end-start));
39 | }
40 |
41 | public void add() {
42 | synchronized (lock) {
43 | sum++;
44 | }
45 | }
46 |
47 | public int getSum() {
48 | synchronized (lock) {
49 | return sum;
50 | }
51 | }
52 | }
53 |
54 |
--------------------------------------------------------------------------------
/code/spin_lock/SpinLockAccumulator.c:
--------------------------------------------------------------------------------
1 | // SpinLockAccumulator.c
2 | #include
3 | #include "SpinLockAccumulator.h" // This header will be generated by the `javah` tool
4 |
5 | // Implementation of the native method
6 | JNIEXPORT void JNICALL Java_SpinLockAccumulator_pauseInstruction(JNIEnv *env, jclass class) {
7 | int i=0;
8 | for(; i<50; ++i)
9 | // Inline assembly for the "pause" instruction, which is often used in spin-wait loops
10 | __asm__ __volatile__ ("pause" ::: "memory");
11 | }
12 |
--------------------------------------------------------------------------------
/code/spin_lock/SpinLockAccumulator.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/code/spin_lock/SpinLockAccumulator.class
--------------------------------------------------------------------------------
/code/spin_lock/SpinLockAccumulator.class.pause:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/code/spin_lock/SpinLockAccumulator.class.pause
--------------------------------------------------------------------------------
/code/spin_lock/SpinLockAccumulator.h:
--------------------------------------------------------------------------------
1 | /* DO NOT EDIT THIS FILE - it is machine generated */
2 | #include
3 | /* Header for class SpinLockAccumulator */
4 |
5 | #ifndef _Included_SpinLockAccumulator
6 | #define _Included_SpinLockAccumulator
7 | #ifdef __cplusplus
8 | extern "C" {
9 | #endif
10 | /*
11 | * Class: SpinLockAccumulator
12 | * Method: pauseInstruction
13 | * Signature: ()V
14 | */
15 | JNIEXPORT void JNICALL Java_SpinLockAccumulator_pauseInstruction
16 | (JNIEnv *, jclass);
17 |
18 | #ifdef __cplusplus
19 | }
20 | #endif
21 | #endif
22 |
--------------------------------------------------------------------------------
/code/spin_lock/SpinLockAccumulator.java:
--------------------------------------------------------------------------------
1 | import java.util.concurrent.atomic.AtomicBoolean;
2 | import java.util.concurrent.atomic.AtomicInteger;
3 |
4 | public class SpinLockAccumulator {
5 | private AtomicInteger sum = new AtomicInteger(0);
6 | private AtomicBoolean lock = new AtomicBoolean(false);
7 |
8 | // Declare the native method
9 | private static native void pauseInstruction();
10 |
11 | // Load the native library containing the implementation of pauseInstruction
12 | static {
13 | System.loadLibrary("pause");
14 | }
15 |
16 | public static void main(String[] args) throws Exception {
17 | if (args.length < 2) {
18 | System.out.println("请提供两个参数:线程数量和循环次数");
19 | return;
20 | }
21 |
22 | int numThreads = Integer.parseInt(args[0]);
23 | int totalIncrements = Integer.parseInt(args[1]);
24 |
25 | SpinLockAccumulator accumulator = new SpinLockAccumulator();
26 |
27 | Thread[] threads = new Thread[numThreads];
28 | long startTime = System.currentTimeMillis(); // 记录开始时间
29 |
30 | for (int i = 0; i < numThreads; i++) {
31 | threads[i] = new Thread(() -> {
32 | int incrementsPerThread = totalIncrements / numThreads +
33 | (totalIncrements % numThreads == 0 ? 0 : 1);
34 | for (int j = 0; j < incrementsPerThread; j++) {
35 | try{
36 | accumulator.add(); }
37 | catch(Exception e){}
38 |
39 | }
40 | });
41 | threads[i].start();
42 | }
43 |
44 | // 等待所有线程完成
45 | for (int i = 0; i < numThreads; i++) {
46 | try {
47 | threads[i].join();
48 | } catch (InterruptedException e) {
49 | e.printStackTrace();
50 | }
51 | }
52 |
53 | long endTime = System.currentTimeMillis(); // 记录结束时间
54 | long totalTime = endTime - startTime; // 计算总耗时
55 |
56 | System.out.println("累加结果: " + accumulator.getSum());
57 | System.out.println("操作耗时: " + totalTime + " 毫秒");
58 | }
59 |
60 | public void add() throws Exception {
61 | while (true) {
62 | if (lock.compareAndSet(false, true)) {
63 | try {
64 | sum.incrementAndGet();
65 | } finally {
66 | lock.set(false);
67 | }
68 | break;
69 | }
70 | // 在实际项目中,可能需要在此处添加Thread.yield()或者Thread.sleep(1)
71 | // 以避免过度消耗CPU资源。
72 | //pauseInstruction();
73 | Thread.sleep(1);
74 | }
75 | }
76 |
77 | public int getSum() {
78 | return sum.get();
79 | }
80 | }
81 |
82 |
--------------------------------------------------------------------------------
/code/spin_lock/SpinLockNoPauseAccumulator.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/code/spin_lock/SpinLockNoPauseAccumulator.class
--------------------------------------------------------------------------------
/code/spin_lock/SpinLockNoPauseAccumulator.java:
--------------------------------------------------------------------------------
1 | import java.util.concurrent.atomic.AtomicBoolean;
2 | import java.util.concurrent.atomic.AtomicInteger;
3 |
4 | public class SpinLockNoPauseAccumulator {
5 | private AtomicInteger sum = new AtomicInteger(0);
6 | private AtomicBoolean lock = new AtomicBoolean(false);
7 |
8 | // Declare the native method
9 | private static native void pauseInstruction();
10 |
11 | // Load the native library containing the implementation of pauseInstruction
12 | static {
13 | System.loadLibrary("pause");
14 | }
15 |
16 | public static void main(String[] args) {
17 | if (args.length < 2) {
18 | System.out.println("请提供两个参数:线程数量和循环次数");
19 | return;
20 | }
21 |
22 | int numThreads = Integer.parseInt(args[0]);
23 | int totalIncrements = Integer.parseInt(args[1]);
24 |
25 | SpinLockNoPauseAccumulator accumulator = new SpinLockNoPauseAccumulator();
26 |
27 | Thread[] threads = new Thread[numThreads];
28 | long startTime = System.currentTimeMillis(); // 记录开始时间
29 |
30 | for (int i = 0; i < numThreads; i++) {
31 | threads[i] = new Thread(() -> {
32 | int incrementsPerThread = totalIncrements / numThreads +
33 | (totalIncrements % numThreads == 0 ? 0 : 1);
34 | for (int j = 0; j < incrementsPerThread; j++) {
35 | accumulator.add();
36 | }
37 | });
38 | threads[i].start();
39 | }
40 |
41 | // 等待所有线程完成
42 | for (int i = 0; i < numThreads; i++) {
43 | try {
44 | threads[i].join();
45 | } catch (InterruptedException e) {
46 | e.printStackTrace();
47 | }
48 | }
49 |
50 | long endTime = System.currentTimeMillis(); // 记录结束时间
51 | long totalTime = endTime - startTime; // 计算总耗时
52 |
53 | System.out.println("累加结果: " + accumulator.getSum());
54 | System.out.println("操作耗时: " + totalTime + " 毫秒");
55 | }
56 |
57 | public void add() {
58 | while (true) {
59 | if (lock.compareAndSet(false, true)) {
60 | try {
61 | sum.incrementAndGet();
62 | } finally {
63 | lock.set(false);
64 | }
65 | break;
66 | }
67 | // 在实际项目中,可能需要在此处添加Thread.yield()或者Thread.sleep(1)
68 | // 以避免过度消耗CPU资源。
69 | //pauseInstruction();
70 | }
71 | }
72 |
73 | public int getSum() {
74 | return sum.get();
75 | }
76 | }
77 |
78 |
--------------------------------------------------------------------------------
/code/spin_lock/libpause.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/code/spin_lock/libpause.so
--------------------------------------------------------------------------------
/code/tcpping:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | #
3 | # tcpping: test response times using TCP SYN packets
4 | # URL: http://www.vdberg.org/~richard/tcpping.html
5 | #
6 | # uses tcptraceroute from http://michael.toren.net/code/tcptraceroute/
7 | #
8 | # (c) 2002-2005 Richard van den Berg under the GPL
9 | # http://www.gnu.org/copyleft/gpl.html
10 | #
11 | # 2002/12/20 v1.0 initial version
12 | # 2003/01/25 v1.1 added -c and -r options
13 | # now accepting all other tcptraceroute options
14 | # 2003/01/30 v1.2 removed double quotes around backquotes
15 | # 2003/03/25 v1.3 added -x option, courtesy of Alvin Austin
16 | # 2005/03/31 v1.4 added -C option, courtesy of Norman Rasmussen
17 | # 2007/01/11 v1.5 catch bad destination addresses
18 | # 2007/01/19 v1.6 catch non-root tcptraceroute
19 | # 2008/02/10 v1.7 make -C work when reverse lookup fails, courtesy of Fabrice Le Dorze
20 |
21 |
22 | ver="v1.7"
23 | format="%Y%m%d%H%M%S"
24 | d="no"
25 | c="no"
26 | C="no"
27 | ttl=255
28 | seq=0
29 | q=1
30 | r=1
31 | w=3
32 | topts=""
33 |
34 | usage () {
35 | name=`basename $0`
36 | echo "tcpping $ver Richard van den Berg "
37 | echo
38 | echo "Usage: $name [-d] [-c] [-C] [-w sec] [-q num] [-x count] ipaddress [port]"
39 | echo
40 | echo " -d print timestamp before every result"
41 | echo " -c print a columned result line"
42 | echo " -C print in the same format as fping's -C option"
43 | echo " -w wait time in seconds (defaults to 3)"
44 | echo " -r repeat every n seconds (defaults to 1)"
45 | echo " -x repeat n times (defaults to unlimited)"
46 | echo
47 | echo "See also: man tcptraceroute"
48 | echo
49 | }
50 |
51 | _checksite() {
52 | ttr=`tcptraceroute -f ${ttl} -m ${ttl} -q ${q} -w ${w} $* 2>&1`
53 | if echo "${ttr}" | egrep -i "(bad destination|got roo)" >/dev/null 2>&1; then
54 | echo "${ttr}"
55 | exit
56 | fi
57 | }
58 |
59 | _testsite() {
60 | myseq="${1}"
61 | shift
62 | [ "${c}" = "yes" ] && nows=`date +${format}`
63 | [ "${d}" = "yes" ] && nowd=`date`
64 | ttr=`tcptraceroute -f ${ttl} -m ${ttl} -q ${q} -w ${w} $* 2>/dev/null`
65 | host=`echo "${ttr}" | awk '{print $2 " " $3}'`
66 | rtt=`echo "${ttr}" | sed 's/.*] //' | awk '{print $1}'`
67 | not=`echo "${rtt}" | tr -d ".0123456789"`
68 | [ "${d}" = "yes" ] && echo "$nowd"
69 | if [ "${c}" = "yes" ]; then
70 | if [ "x${rtt}" != "x" -a "x${not}" = "x" ]; then
71 | echo "$myseq $nows $rtt $host"
72 | else
73 | echo "$myseq $nows $max $host"
74 | fi
75 | elif [ "${C}" = "yes" ]; then
76 | if [ "$myseq" = "0" ]; then
77 | echo -n "$1 :"
78 | fi
79 | if [ "x${rtt}" != "x" -a "x${not}" = "x" ]; then
80 | echo -n " $rtt"
81 | else
82 | echo -n " -"
83 | fi
84 | if [ "$x" = "1" ]; then
85 | echo
86 | fi
87 | else
88 | echo "${ttr}" | sed -e "s/^.*\*.*$/seq $myseq: no response (timeout)/" -e "s/^$ttl /seq $myseq: tcp response from/"
89 | fi
90 | # echo "${ttr}"
91 | }
92 |
93 | while getopts dhq:w:cr:nNFSAEi:f:l:m:p:s:x:C opt ; do
94 | case "$opt" in
95 | d|c|C) eval $opt="yes" ;;
96 | q|w|r|x) eval $opt="$OPTARG" ;;
97 | n|N|F|S|A|E) topt="$topt -$opt" ;;
98 | i|l|p|s) topt="$topt -$opt $OPTARG" ;;
99 | f|m) ttl="$OPTARG" ;;
100 | ?) usage; exit ;;
101 | esac
102 | done
103 |
104 | shift `expr $OPTIND - 1`
105 |
106 | if [ "x$1" = "x" ]; then
107 | usage
108 | exit
109 | fi
110 |
111 | max=`echo "${w} * 1000" | bc`
112 |
113 | if [ `date +%s` != "%s" ]; then
114 | format="%s"
115 | fi
116 |
117 | _checksite ${topt} $*
118 |
119 | if [ "$x" = "" ]; then
120 | while [ 1 ] ; do
121 | _testsite ${seq} ${topt} $* &
122 | pid=$!
123 | if [ "${C}" = "yes" ]; then
124 | wait $pid
125 | fi
126 | seq=`expr $seq + 1`
127 | sleep ${r}
128 | done
129 | else
130 | while [ "$x" -gt 0 ] ; do
131 | _testsite ${seq} ${topt} $* &
132 | pid=$!
133 | if [ "${C}" = "yes" ]; then
134 | wait $pid
135 | fi
136 | seq=`expr $seq + 1`
137 | x=`expr $x - 1`
138 | if [ "$x" -gt 0 ]; then
139 | sleep ${r}
140 | fi
141 | done
142 | fi
143 |
144 | exit
145 |
146 |
--------------------------------------------------------------------------------
/code/timestamp/drop_http_by_paws.py:
--------------------------------------------------------------------------------
1 | from scapy.all import *
2 |
3 | # 目标 IP 和端口
4 | target_ip = "gf"
5 | target_port = 8000
6 |
7 | # 构建 HTTP GET 请求
8 | http_request = (
9 | "GET / HTTP/1.1\r\n"
10 | f"Host: {target_ip}:{target_port}\r\n"
11 | "User-Agent: Mozilla/5.0\r\n"
12 | "Accept: */*\r\n"
13 | "Connection: close\r\n"
14 | "\r\n"
15 | )
16 | seq_num=1234567
17 |
18 | # 创建 IP 层
19 | ip = IP(dst=target_ip)
20 |
21 | # 创建 TCP 层,设置 SYN
22 | tcp = TCP(sport=RandShort(), dport=target_port, flags="S", seq=seq_num, options=[('MSS', 1460), ('NOP', None), ('NOP', None), ('Timestamp', (int(time.time()), 0))])
23 |
24 | # 发送 SYN 包并接收响应
25 | syn_ack = sr1(ip/tcp, timeout=10)
26 |
27 | if syn_ack is None:
28 | print("No response received")
29 | exit()
30 |
31 | # 发送 ACK
32 | tcp_ack = TCP(sport=syn_ack[TCP].dport,
33 | dport=target_port,
34 | flags="A",
35 | seq=syn_ack[TCP].ack,
36 | ack=syn_ack[TCP].seq + 1)
37 |
38 | # 发送 HTTP GET 请求
39 | tcp_push = TCP(sport=syn_ack[TCP].dport,
40 | dport=target_port,
41 | flags="PA",
42 | seq=syn_ack[TCP].ack,
43 | ack=syn_ack[TCP].seq + 1,
44 | options=[('MSS', 1460), ('NOP', None), ('NOP', None), ('Timestamp', (0, 0))])
45 |
46 | # 发送请求并接收响应
47 | response = sr1(ip/tcp_push/http_request, timeout=10)
48 |
49 | if response:
50 | # 打印响应内容
51 | if Raw in response:
52 | print(response[Raw].load.decode())
53 | else:
54 | print("No response received")
55 |
56 | # 发送 FIN 包关闭连接
57 | fin = ip/TCP(sport=syn_ack[TCP].dport,
58 | dport=target_port,
59 | flags="FA",
60 | seq=syn_ack[TCP].ack,
61 | ack=syn_ack[TCP].seq + 1)
62 | send(fin)
63 |
64 |
--------------------------------------------------------------------------------
/code/timestamp/format_netstat.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # 将输入保存到临时文件
4 | head -2 $1 > temp_input.txt
5 |
6 | # 读取第一行的键名
7 | keys=$(head -n 1 temp_input.txt | cut -d' ' -f2-)
8 | # 读取第二行的值
9 | values=$(tail -n 1 temp_input.txt | cut -d' ' -f2-)
10 |
11 | # 将键名和值转换为数组
12 | IFS=' ' read -r -a key_array <<< "$keys"
13 | IFS=' ' read -r -a value_array <<< "$values"
14 |
15 | # 遍历数组并打印对应关系
16 | for i in "${!key_array[@]}"; do
17 | echo "${key_array[i]}: ${value_array[i]}"
18 | done
19 |
20 | # 删除临时文件
21 | rm temp_input.txt
22 |
23 |
--------------------------------------------------------------------------------
/code/timestamp/readme.md:
--------------------------------------------------------------------------------
1 | 对应的[星球文章](https://articles.zsxq.com/id_c8wn3hq6ub1x.html)
2 |
3 |
4 | 重现 timestamp 不递增导致 RST 被丢弃(需要安装 scapy 见星球文章)
5 | python3 rst_ts.py
6 |
7 | 可以监控 /proc/net/netstat 中的指标变化
8 | python3 watch_netstat.py
9 |
--------------------------------------------------------------------------------
/code/timestamp/rst.py:
--------------------------------------------------------------------------------
1 | from scapy.all import *
2 | import time
3 |
4 | # 目标服务器信息
5 | target_ip = "172.26.137.130"
6 | target_port = 8000
7 |
8 | # 源端口(可以随机选择)
9 | source_port = 12345
10 |
11 | # 初始序列号(可以随机生成)
12 | seq_num = RandNum(0, 2**32-1)
13 |
14 | def perform_handshake():
15 | # 构造 SYN 包
16 | ip = IP(dst=target_ip)
17 | syn = TCP(sport=source_port,
18 | dport=target_port,
19 | flags='S',
20 | seq=seq_num,
21 | options=[('MSS', 1460), ('NOP', None), ('NOP', None),
22 | ('Timestamp', (int(time.time()), 0))])
23 |
24 | # 发送 SYN 并接收响应
25 | syn_ack = sr1(ip/syn)
26 |
27 | if syn_ack is None:
28 | print("No response received")
29 | return None
30 |
31 | # 构造不带 timestamp 的 RST 包
32 | rst = TCP(sport=source_port,
33 | dport=target_port,
34 | flags='R',
35 | seq=syn_ack.ack,
36 | ack=syn_ack.seq + 1,
37 | options=[('NOP', None), ('NOP', None)])
38 |
39 | # 发送 RST
40 | #send(ip/rst)
41 |
42 | # 构造 ACK 包
43 | ack = TCP(sport=source_port,
44 | dport=target_port,
45 | flags='A',
46 | seq=syn_ack.ack,
47 | ack=syn_ack.seq + 1,
48 | options=[('NOP', None), ('NOP', None),
49 | ('Timestamp', (int(time.time()), 0))])
50 |
51 | send(ip/rst)
52 | sleep(0.1)
53 | # 发送 ACK
54 | send(ip/ack)
55 |
56 | # 构造不带 timestamp 的 RST 包
57 | rst = TCP(sport=source_port,
58 | dport=target_port,
59 | flags='R',
60 | seq=syn_ack.ack,
61 | ack=syn_ack.seq + 1)
62 |
63 | # 发送 RST
64 | send(ip/rst)
65 |
66 | if __name__ == "__main__":
67 | # 执行三次握手并发送 RST
68 | perform_handshake()
69 |
70 |
--------------------------------------------------------------------------------
/code/timestamp/rst_0.py:
--------------------------------------------------------------------------------
1 | from scapy.all import *
2 | import random
3 |
4 | # 目标服务器信
5 | target_ip = "gf"
6 | target_port = 8000
7 |
8 | # 源端口使用随机端口
9 | source_port = random.randint(1024, 65535)
10 | #source_port = 12345
11 |
12 | # 初始序列号
13 | seq_num = 12345
14 | #seq_num = random.randint(0, 4294967295)
15 |
16 | def send_syn():
17 | # 构建 SYN 包
18 | ip = IP(dst=target_ip)
19 | tcp_options = [('Timestamp', (int(time.time()), 0))]
20 |
21 | syn = TCP(sport=source_port, dport=target_port, flags='S', seq=seq_num, options=tcp_options)
22 | syn_packet = ip/syn
23 |
24 | # 发送 SYN 并等待响应
25 | syn_ack = sr1(syn_packet)
26 | return syn_ack
27 |
28 | def send_rst(syn_ack_packet):
29 | # 构建 RST 包,设置特定的 timestamp options
30 | ip = IP(dst=target_ip)
31 |
32 | # 创建 TCP timestamp option
33 | # timestamp 值为 0,echo reply 值使用服务器发来的 timestamp
34 | ts_val = 0
35 | ts_ecr = 0
36 |
37 | # 从 SYN+ACK 包中获取 timestamp
38 | for option in syn_ack_packet[TCP].options:
39 | if option[0] == 'Timestamp':
40 | ts_ecr = option[1][0] # 使用服务器的 timestamp
41 | break
42 |
43 | tcp_options = [('Timestamp', (ts_val, ts_ecr))]
44 | #tcp_options = [('Timestamp', (int(time.time()), 0))]
45 |
46 | # 构建 RST 包
47 | rst = TCP(
48 | sport=source_port,
49 | dport=target_port,
50 | flags='R',
51 | #seq=1,
52 | #ack=syn_ack_packet[TCP].seq + 1,
53 | seq=syn_ack_packet[TCP].ack,
54 | options=tcp_options
55 | )
56 |
57 | rst_packet = ip/rst
58 | send(rst_packet)
59 |
60 | def send_ack(syn_ack_packet):
61 | # 构建 ACK 包
62 | ip = IP(dst=target_ip)
63 | tcp_options = [('Timestamp', (int(time.time()), 0))]
64 | ack = TCP(
65 | sport=source_port,
66 | dport=target_port,
67 | flags='A',
68 | seq=syn_ack_packet[TCP].ack,
69 | ack=syn_ack_packet[TCP].seq + 1,
70 | options=tcp_options
71 | )
72 |
73 | ack_packet = ip/ack
74 | send(ack_packet)
75 |
76 | def main():
77 | try:
78 | # 发送 SYN 并接收 SYN+ACK
79 | print("Sending SYN...")
80 | syn_ack = send_syn()
81 |
82 | if syn_ack and TCP in syn_ack and syn_ack[TCP].flags & 0x12: # SYN+ACK flags
83 | print("Received SYN+ACK")
84 |
85 | # 发送 RST
86 | print("Sending RST with timestamp=0...")
87 | send_rst(syn_ack)
88 |
89 | # 发送 ACK
90 | print("Sending ACK...")
91 | send_ack(syn_ack)
92 |
93 | print("Sequence completed")
94 | else:
95 | print("Did not receive proper SYN+ACK")
96 |
97 | except Exception as e:
98 | print(f"Error occurred: {e}")
99 |
100 | if __name__ == "__main__":
101 | main()
102 |
103 |
--------------------------------------------------------------------------------
/code/timestamp/rst_ok.py:
--------------------------------------------------------------------------------
1 | from scapy.all import *
2 | import time
3 |
4 | # 目标服务器信息
5 | target_ip = "172.26.137.130"
6 | target_port = 8000
7 |
8 | # 源端口(可以随机选择)
9 | #source_port = 22345
10 | source_port = random.randint(1024, 2048)
11 |
12 | # 初始序列号(可以随机生成)
13 | seq_num = RandNum(0, 2**32-1)
14 |
15 | def perform_handshake():
16 | # 构造 SYN 包
17 | ip = IP(dst=target_ip)
18 | syn = TCP(sport=source_port,
19 | dport=target_port,
20 | flags='S',
21 | seq=seq_num,
22 | options=[('MSS', 1460), ('NOP', None), ('NOP', None),
23 | ('Timestamp', (int(time.time()), 0))])
24 |
25 | # 发送 SYN 并接收响应
26 | syn_ack = sr1(ip/syn)
27 |
28 | if syn_ack is None:
29 | print("No response received")
30 | return None
31 |
32 |
33 | # 构造 ACK 包
34 | ack = TCP(sport=source_port,
35 | dport=target_port,
36 | flags='A',
37 | seq=syn_ack.ack,
38 | ack=syn_ack.seq + 1,
39 | options=[('NOP', None), ('NOP', None),
40 | ('Timestamp', (int(time.time()), 0))])
41 |
42 | # 发送 ACK
43 | #send(ip/ack)
44 |
45 | # 构造不带 timestamp 的 RST 包
46 | rst = TCP(sport=source_port,
47 | dport=target_port,
48 | flags='R',
49 | seq=syn_ack.ack,
50 | options=[('NOP', None), ('NOP', None),
51 | ('Timestamp', (0, 0))],
52 | ack=syn_ack.seq + 1)
53 |
54 | send(ip/ack)
55 | # 发送 RST
56 | send(ip/rst)
57 |
58 | if __name__ == "__main__":
59 | # 执行三次握手并发送 RST
60 | perform_handshake()
61 |
62 |
--------------------------------------------------------------------------------
/code/timestamp/rst_seq.py:
--------------------------------------------------------------------------------
1 | from scapy.all import *
2 | import random
3 |
4 | # 目标服务器信
5 | target_ip = "gf"
6 | target_port = 8000
7 |
8 | # 源端口使用随机端口
9 | source_port = random.randint(1024, 65535)
10 | #source_port = 12345
11 |
12 | # 初始序列号
13 | seq_num = 12345
14 | #seq_num = random.randint(0, 4294967295)
15 |
16 | def send_syn():
17 | # 构建 SYN 包
18 | ip = IP(dst=target_ip)
19 | tcp_options = [('Timestamp', (int(time.time()), 0))]
20 |
21 | syn = TCP(sport=source_port, dport=target_port, flags='S', seq=seq_num, options=tcp_options)
22 | syn_packet = ip/syn
23 |
24 | # 发送 SYN 并等待响应
25 | syn_ack = sr1(syn_packet)
26 | return syn_ack
27 |
28 | def send_rst(syn_ack_packet):
29 | # 构建 RST 包,设置特定的 timestamp options
30 | ip = IP(dst=target_ip)
31 |
32 | # 创建 TCP timestamp option
33 | # timestamp 值为 0,echo reply 值使用服务器发来的 timestamp
34 | ts_val = 0
35 | ts_ecr = 0
36 |
37 | # 从 SYN+ACK 包中获取 timestamp
38 | for option in syn_ack_packet[TCP].options:
39 | if option[0] == 'Timestamp':
40 | ts_ecr = option[1][0] # 使用服务器的 timestamp
41 | break
42 |
43 | #tcp_options = [('Timestamp', (1732611916, 0))]
44 | #tcp_options = [('Timestamp', (int(time.time()), 0))]
45 | tcp_options = [('Timestamp', (int(time.time()), ts_ecr))]
46 |
47 | # 构建 RST 包
48 | rst = TCP(
49 | sport=source_port,
50 | dport=target_port,
51 | flags='R',
52 | #seq=syn_ack_packet[TCP].ack,
53 | seq=1,
54 | #ack=syn_ack_packet[TCP].seq + 1,
55 | #关键行,有 options 且 ts 为 0 才会丢包导致连接残留
56 | options=tcp_options
57 | #options=[('MSS', 1460)]
58 | #options=[('MSS', 1460), ('Timestamp', (1732611916, 0))]
59 | )
60 |
61 | rst_packet = ip/rst
62 | send(rst_packet)
63 |
64 | def send_ack(syn_ack_packet):
65 | # 构建 ACK 包
66 | ip = IP(dst=target_ip)
67 | tcp_options = [('Timestamp', (int(time.time()), 0))]
68 | ack = TCP(
69 | sport=source_port,
70 | dport=target_port,
71 | flags='A',
72 | seq=12346,
73 | #seq=syn_ack_packet[TCP].ack,
74 | ack=syn_ack_packet[TCP].seq + 1
75 | #options=tcp_options
76 | )
77 |
78 | ack_packet = ip/ack
79 | send(ack_packet)
80 |
81 | def main():
82 | try:
83 | # 发送 SYN 并接收 SYN+ACK
84 | print("Sending SYN...")
85 | syn_ack = send_syn()
86 |
87 | if syn_ack and TCP in syn_ack and syn_ack[TCP].flags & 0x12: # SYN+ACK flags
88 | print("Received SYN+ACK")
89 |
90 | # 发送 RST
91 | print("Sending RST with timestamp=0...")
92 | send_rst(syn_ack)
93 |
94 | time.sleep(0.1)
95 | # 发送 ACK
96 | print("Sending ACK...")
97 | send_ack(syn_ack)
98 |
99 | print("Sequence completed")
100 | else:
101 | print("Did not receive proper SYN+ACK")
102 |
103 | except Exception as e:
104 | print(f"Error occurred: {e}")
105 |
106 | if __name__ == "__main__":
107 | main()
108 |
109 |
--------------------------------------------------------------------------------
/code/timestamp/rst_ts.py:
--------------------------------------------------------------------------------
1 | from scapy.all import *
2 | import random
3 |
4 | # 目标服务器信, 需要修改为实际 ip
5 | target_ip = "127.0."
6 | target_port = 8000
7 |
8 | # 源端口使用随机端口
9 | source_port = random.randint(1024, 65535)
10 | #source_port = 12345
11 |
12 | # 初始序列号
13 | seq_num = 12345
14 | #seq_num = random.randint(0, 4294967295)
15 |
16 | def send_syn():
17 | # 构建 SYN 包
18 | ip = IP(dst=target_ip)
19 | tcp_options = [('Timestamp', (int(time.time()), 0))]
20 |
21 | syn = TCP(sport=source_port, dport=target_port, flags='S', seq=seq_num, options=tcp_options)
22 | syn_packet = ip/syn
23 |
24 | # 发送 SYN 并等待响应
25 | syn_ack = sr1(syn_packet)
26 | return syn_ack
27 |
28 | def send_rst(syn_ack_packet):
29 | # 构建 RST 包,设置特定的 timestamp options
30 | ip = IP(dst=target_ip)
31 |
32 | # 创建 TCP timestamp option
33 | # timestamp 值为 0,echo reply 值使用服务器发来的 timestamp
34 | ts_val = 0
35 | ts_ecr = 0
36 |
37 | # 从 SYN+ACK 包中获取 timestamp
38 | for option in syn_ack_packet[TCP].options:
39 | if option[0] == 'Timestamp':
40 | ts_ecr = option[1][0] # 使用服务器的 timestamp
41 | break
42 |
43 | #以下几行可以比较timestamp的不同
44 | tcp_options = [('Timestamp', (0, 0))]
45 | #tcp_options = [('Timestamp', (int(time.time()), 0))]
46 | #tcp_options = [('Timestamp', (int(time.time())-2, ts_ecr))]
47 | #tcp_options = [('Timestamp', (int(time.time()), ts_ecr))]
48 |
49 | # 构建 RST 包
50 | rst = TCP(
51 | sport=source_port,
52 | dport=target_port,
53 | flags='R',
54 | seq=syn_ack_packet[TCP].ack,
55 | #seq=1
56 | #ack=syn_ack_packet[TCP].seq + 1,
57 | #关键行,有 options 且 ts 为 0 才会丢包导致连接残留
58 | #不同的 options 可以用来测试timestamp 为 nop/和0等
59 | options=tcp_options
60 | #options=[('MSS', 1460)]
61 | #options=[('MSS', 1460), ('Timestamp', (1732611916, 0))]
62 | )
63 |
64 | rst_packet = ip/rst
65 | send(rst_packet)
66 |
67 | def send_ack(syn_ack_packet):
68 | # 构建 ACK 包
69 | ip = IP(dst=target_ip)
70 | tcp_options = [('Timestamp', (int(time.time()), 0))]
71 | ack = TCP(
72 | sport=source_port,
73 | dport=target_port,
74 | flags='A',
75 | seq=syn_ack_packet[TCP].ack,
76 | ack=syn_ack_packet[TCP].seq + 1,
77 | options=tcp_options
78 | )
79 |
80 | ack_packet = ip/ack
81 | send(ack_packet)
82 |
83 | def main():
84 | try:
85 | # 发送 SYN 并接收 SYN+ACK
86 | print("Sending SYN...")
87 | syn_ack = send_syn()
88 |
89 | if syn_ack and TCP in syn_ack and syn_ack[TCP].flags & 0x12: # SYN+ACK flags
90 | print("Received SYN+ACK")
91 |
92 | # 发送 RST
93 | print("Sending RST with timestamp=0...")
94 | send_rst(syn_ack)
95 |
96 | time.sleep(0.1)
97 | # 发送 ACK
98 | print("Sending ACK...")
99 | send_ack(syn_ack)
100 |
101 | print("Sequence completed")
102 | else:
103 | print("Did not receive proper SYN+ACK")
104 |
105 | except Exception as e:
106 | print(f"Error occurred: {e}")
107 |
108 | if __name__ == "__main__":
109 | main()
110 |
111 |
--------------------------------------------------------------------------------
/code/timestamp/tcp_check_req.bt:
--------------------------------------------------------------------------------
1 | BEGIN{
2 | printf("Tracing tcp drops. Hit Ctrl-C to end.\n");
3 | printf("%-8s %-21s %-21s %-8s %-8s\n", "TIME", "SADDR:SPORT", "DADDR:DPORT", "STATE", "RET");
4 | @tcp_states[1] = "ESTAB";
5 | @tcp_states[2] = "SYN_SENT";
6 | @tcp_states[3] = "SYN_RECV";
7 | @tcp_states[4] = "FIN_WAIT1";
8 | @tcp_states[5] = "FIN_WAIT2";
9 | @tcp_states[6] = "TIME_WAIT";
10 | @tcp_states[7] = "CLOSE";
11 | @tcp_states[8] = "CLOSE_WAIT";
12 | @tcp_states[9] = "LAST_ACK";
13 | @tcp_states[10] = "LISTEN";
14 | @tcp_states[11] = "CLOSING";
15 | @tcp_states[12] = "NEW_SYN_RECV";
16 | }
17 |
18 |
19 | kprobe:tcp_check_req {
20 | @start[cpu, tid] = nsecs;
21 | @sk[cpu, tid] = arg0;
22 | @skb[cpu, tid] = arg1;
23 | printf("%s\n", kstack);
24 | }
25 |
26 | kretprobe:tcp_check_req {
27 | if (@start[cpu, tid]){
28 | $sk = (struct sock *)@sk[cpu,tid];
29 | $skb = (struct sk_buff *)@skb[cpu, tid];
30 | $iph = (struct iphdr *)($skb->head + $skb->network_header);
31 | $saddr = ntop($iph->saddr);
32 | $daddr = ntop($iph->daddr);
33 | if ($iph->protocol != 6) {
34 | return;
35 | }
36 | $tcph = (struct tcphdr *)($skb->head + $skb->transport_header);
37 | $source_port = bswap($tcph->source);
38 | $dest_port = bswap($tcph->dest);
39 | if ($dest_port != 8000)
40 | {
41 | return;
42 | }
43 |
44 | $state=$sk->__sk_common.skc_state;
45 | $statestr = @tcp_states[$state];
46 | time("%H:%M:%S ");
47 | if(retval !=0){
48 | return;
49 | }
50 | if ($tcph->rst != 1)
51 | {
52 | return;
53 | }
54 | if ($tcph->ack != 1)
55 | {
56 | return;
57 | }
58 | printf("%39s:%-6d %39s:%-6d %-10s %-10d\n", $saddr, $source_port, $daddr, $dest_port, $statestr, retval);
59 |
60 | }
61 | delete(@start[cpu, tid]);
62 | delete(@sk[cpu,tid]);
63 | delete(@skb[cpu, tid]);
64 | }
65 |
66 | END{
67 | clear(@tcp_states);
68 | clear(@start);
69 | clear(@sk);
70 | clear(@skb);
71 | }
72 |
--------------------------------------------------------------------------------
/code/timestamp/watch_netstat.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/python3
2 |
3 | """
4 | 每秒读取一次 /proc/net/netstat 文件的内容
5 | 比较相邻两次读取的差异
6 | 只显示发生变化的项目
7 | 显示以下信息:
8 | 指标名称
9 | 当前值
10 | 变化量(与上次的差值)
11 | 变化率(百分比)
12 | 保存并显示最近5次的变化历史
13 | 使用清屏功能使
14 | """
15 |
16 | import time
17 | from collections import defaultdict
18 | import os
19 |
20 | def read_netstat():
21 | """读取 /proc/net/netstat 文件内容并解析"""
22 | stats = {}
23 | try:
24 | with open('/proc/net/netstat', 'r') as f:
25 | lines = f.readlines()
26 |
27 | # 每两行为一组,第一行是标签行,第二行是值行
28 | for i in range(0, len(lines), 2):
29 | header = lines[i].strip().split()
30 | values = lines[i+1].strip().split()
31 |
32 | # 确保标签行和值行的第一列匹配
33 | if header[0] != values[0]:
34 | continue
35 |
36 | # 将标签和对应的值组合成字典
37 | for j in range(1, len(header)):
38 | key = f"{header[0]}{header[j]}"
39 | stats[key] = int(values[j])
40 |
41 | except Exception as e:
42 | print(f"Error reading netstat: {e}")
43 | return None
44 |
45 | return stats
46 |
47 | def main():
48 | # 存储上一次的数据
49 | last_stats = None
50 |
51 | # 存储变化的历史记录
52 | changes_history = defaultdict(list)
53 |
54 | try:
55 | while True:
56 | # 清屏(Unix/Linux系统)
57 | os.system('clear')
58 |
59 | # 读取当前数据
60 | current_stats = read_netstat()
61 |
62 | if current_stats and last_stats:
63 | print("\n变化项:")
64 | print("-" * 80)
65 | print(f"{'指标名称':<40} {'当前值':<15} {'变化量':<15} {'变化率'}")
66 | print("-" * 80)
67 |
68 | # 比较并显示变化
69 | for key in current_stats:
70 | current_value = current_stats[key]
71 | last_value = last_stats[key]
72 |
73 | if current_value != last_value:
74 | change = current_value - last_value
75 | change_rate = (change / last_value * 100) if last_value != 0 else float('inf')
76 |
77 | # 记录变化
78 | changes_history[key].append(change)
79 | # 只保留最近5次变化
80 | if len(changes_history[key]) > 8:
81 | changes_history[key].pop(0)
82 |
83 | print(f"{key:<40} {current_value:<15} {change:<15} {change_rate:.2f}%")
84 |
85 | # 显示变化趋势
86 | print("\n最近变化趋势(最近8次变化):")
87 | print("-" * 80)
88 | for key, changes in changes_history.items():
89 | if changes: # 只显示有变化的项
90 | changes_str = " -> ".join(map(str, changes))
91 | print(f"{key:<40} {changes_str}")
92 |
93 | last_stats = current_stats
94 | time.sleep(2)
95 |
96 | except KeyboardInterrupt:
97 | print("\n监控已停止")
98 |
99 | if __name__ == "__main__":
100 | main()
101 |
102 |
--------------------------------------------------------------------------------
/network/就是要你懂TCP--半连接队列和全连接队列/01dc036aca4b445e-01dc036aca4b445ed86e3e295bf245b8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--半连接队列和全连接队列/01dc036aca4b445e-01dc036aca4b445ed86e3e295bf245b8.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--半连接队列和全连接队列/0c6bbb5d4a10f40c-0c6bbb5d4a10f40c8b3c4ba6cab82292.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--半连接队列和全连接队列/0c6bbb5d4a10f40c-0c6bbb5d4a10f40c8b3c4ba6cab82292.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--半连接队列和全连接队列/159a331ff8cdd4b8-159a331ff8cdd4b8994dfe6a209d035f.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--半连接队列和全连接队列/159a331ff8cdd4b8-159a331ff8cdd4b8994dfe6a209d035f.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--半连接队列和全连接队列/2452b0e753f83672-5f63b8e0-952c-47a2-8179-48793034f86b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--半连接队列和全连接队列/2452b0e753f83672-5f63b8e0-952c-47a2-8179-48793034f86b.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--半连接队列和全连接队列/2fbdd05162e9fd51-2fbdd05162e9fd51e803682b8a18cc51.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--半连接队列和全连接队列/2fbdd05162e9fd51-2fbdd05162e9fd51e803682b8a18cc51.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--半连接队列和全连接队列/3f5f1eeb0646a3af-3f5f1eeb0646a3af8afd6bbff2a9ea0b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--半连接队列和全连接队列/3f5f1eeb0646a3af-3f5f1eeb0646a3af8afd6bbff2a9ea0b.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--半连接队列和全连接队列/77ed9ba81f70f794-77ed9ba81f70f7940546f0a22dabf010.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--半连接队列和全连接队列/77ed9ba81f70f794-77ed9ba81f70f7940546f0a22dabf010.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--半连接队列和全连接队列/9179e08ac24ce3d5-9179e08ac24ce3d53e74b92dbd044906.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--半连接队列和全连接队列/9179e08ac24ce3d5-9179e08ac24ce3d53e74b92dbd044906.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--半连接队列和全连接队列/a5616904df3a5055-a5616904df3a505572d99d557b534db2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--半连接队列和全连接队列/a5616904df3a5055-a5616904df3a505572d99d557b534db2.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--半连接队列和全连接队列/bcf463efeb677d57-bcf463efeb677d5749d8d7571274ee79.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--半连接队列和全连接队列/bcf463efeb677d57-bcf463efeb677d5749d8d7571274ee79.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--半连接队列和全连接队列/c0849615ae525318-c0849615ae52531887ce6b0313d7d2d1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--半连接队列和全连接队列/c0849615ae525318-c0849615ae52531887ce6b0313d7d2d1.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--半连接队列和全连接队列/ec25ccb6cce8f554-ec25ccb6cce8f554b7ef6927f05bd530.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--半连接队列和全连接队列/ec25ccb6cce8f554-ec25ccb6cce8f554b7ef6927f05bd530.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/028c3cfe690f4f2e-image10-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/028c3cfe690f4f2e-image10-5.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/05d6357ed53c1c16-05d6357ed53c1c16f0dd0454251916ef.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/05d6357ed53c1c16-05d6357ed53c1c16f0dd0454251916ef.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/0db5c3684a931490-0db5c3684a9314907f9158ac15b6ac71.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/0db5c3684a931490-0db5c3684a9314907f9158ac15b6ac71.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/0f3050cd98db40a3-0f3050cd98db40a352410a11a521e8b2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/0f3050cd98db40a3-0f3050cd98db40a352410a11a521e8b2.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/15b7d6852e44fc17-15b7d6852e44fc179d60d76f322695c7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/15b7d6852e44fc17-15b7d6852e44fc179d60d76f322695c7.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/1984258c03009217-1984258c0300921799476777f5f0a38a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/1984258c03009217-1984258c0300921799476777f5f0a38a.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/1de3f2916346e390-1de3f2916346e390be55263d59f5730d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/1de3f2916346e390-1de3f2916346e390be55263d59f5730d.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/2e493d8dc32bb63f-2e493d8dc32bb63f2126375de6675351.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/2e493d8dc32bb63f-2e493d8dc32bb63f2126375de6675351.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/3d9e77f8c9b0cab1-3d9e77f8c9b0cab1484c870d2c0d2473.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/3d9e77f8c9b0cab1-3d9e77f8c9b0cab1484c870d2c0d2473.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/3dcfd469fe1e2f7e-3dcfd469fe1e2f7e1d938a5289b83826.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/3dcfd469fe1e2f7e-3dcfd469fe1e2f7e1d938a5289b83826.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/49e2635a7c4025d4-49e2635a7c4025d44b915a1f17dd272a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/49e2635a7c4025d4-49e2635a7c4025d44b915a1f17dd272a.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/4af4765c045e9eed-4af4765c045e9eed2e36d9760d4a2aba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/4af4765c045e9eed-4af4765c045e9eed2e36d9760d4a2aba.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/4e2b2e12c754f01a-4e2b2e12c754f01a2f99f9f47dd5fd8e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/4e2b2e12c754f01a-4e2b2e12c754f01a2f99f9f47dd5fd8e.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/55cf9875d24d76a0-55cf9875d24d76a077c442327d54fa34.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/55cf9875d24d76a0-55cf9875d24d76a077c442327d54fa34.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/5ec50ecf25444e96-5ec50ecf25444e96d81fab975b5a79e6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/5ec50ecf25444e96-5ec50ecf25444e96d81fab975b5a79e6.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/67f280a1cf499ae3-67f280a1cf499ae388fc44d6418869a7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/67f280a1cf499ae3-67f280a1cf499ae388fc44d6418869a7.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/7ae26e844629258d-7ae26e844629258de173a05d5ad595f9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/7ae26e844629258d-7ae26e844629258de173a05d5ad595f9.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/d0e12e8bad876438-d0e12e8bad8764385549f9b391c62ab0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/d0e12e8bad876438-d0e12e8bad8764385549f9b391c62ab0.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/d188530df31712e8-d188530df31712e8341f5687a960743a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/d188530df31712e8-d188530df31712e8341f5687a960743a.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/d385a7dad76ec403-d385a7dad76ec4031dfb6c096bca434b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/d385a7dad76ec403-d385a7dad76ec4031dfb6c096bca434b.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/d7d3af2c03653e6c-d7d3af2c03653e6cf8ae2befa0022832.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/d7d3af2c03653e6c-d7d3af2c03653e6cf8ae2befa0022832.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/da48878ce0c01bcd-da48878ce0c01bcdedb1e6d6a6cc6d1c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/da48878ce0c01bcd-da48878ce0c01bcdedb1e6d6a6cc6d1c.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/ea04e40acda98667-ea04e40acda986675bf0ad0ea7b9b8ff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/ea04e40acda98667-ea04e40acda986675bf0ad0ea7b9b8ff.png
--------------------------------------------------------------------------------
/network/就是要你懂TCP--性能和发送接收Buffer的关系/ff025f076a4a2bc2-ff025f076a4a2bc2b1b13d11f32a97d3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/network/就是要你懂TCP--性能和发送接收Buffer的关系/ff025f076a4a2bc2-ff025f076a4a2bc2b1b13d11f32a97d3.png
--------------------------------------------------------------------------------
/others/【经验分享】Nginx问题排查必备知识.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | **假如日志欺骗了你,不要悲伤,看下这篇文章吧。**
4 | ## 1 前言
5 | 在日常排查问题或者同用户连调的时候,是否经常会遇到,在access_log 里看到用户请求很快,但用户却反馈很慢;
6 | 在日志中看到用户的请求都成功了,用户却反馈说有大量失败失败等一系列自己看到的和用户描述不一致的问题,有时候会
7 | 怀疑用户搞错了,等用户贴出截图的时候又怀疑自己搞错了,如果有遇到这类问题,这篇文章或许对你有帮助。
8 | 本文主要介绍了Nginx中关于“时间”及”缓存“的一些问题,介绍一些问题发生的原因,并从用户角度及服务端角度尝
9 | 试分析这些问题。
10 |
11 | ## 2 Nginx打access_log 时机
12 | 在接着往下介绍之前,先看下Nginx打access_log 的时机,清楚了这个后,再接着往下看会清晰很多。
13 | Nginx的access_log 是在Nginx”认为“这个请求结束后才打的,对于正常请求,Nginx会在请求最后一个字节发出
14 | 后认为请求结束;对于异常请求,当Nginx判断连接超时或者异常断开,无法再发送和接收数据的时候。通常情况下可以认
15 | 为Nginx在请求结束后随即会打出日志。
16 |
17 | ## 3 如何理解Nginx的"请求最后一个字节发出"
18 | Nginx认为请求最后一个字节发出后,该请求就结束了,其实最后一个字节发出可以理解为最后一串数据发出,这里
19 | 是“发出“ 而不是用户收到,指的是将最后一串数据填到协议栈中,只要send 成功返回,Nginx就认为结束了,至于数
20 | 据是否被客户端收到那就是协议栈和网络上的事情了,Nginx不会去关心。
21 |
22 | ## 4 为什么服务端看到的延时同客户端不一致
23 | ### 4.1 服务端 request_time_msec 的含义
24 | 要搞清楚这个问题,首先我们要明确Nginx access_log 中的“request_time_msec” 字段到底表达了什么含义。
25 | 我们先看下官方文档是怎么说的:
26 | $request_time
27 | request processing time in seconds with a milliseconds resolution; time elapsed between
28 | the first bytes were read from the client and the log write after the last bytes were sent
29 | to the client
30 | 这个字段表示的是从请求的第一个字节开始到请求最后一个字节发出后所经历的时间。
31 | 这里其实包含如下几点信息:
32 | 1 建连的时间是不会被算进去的。
33 | 2 如果是HTTPS 请求,建连及HTTPS 握手的时间都不会被算进去。
34 | 3 最后一个字节发出后Nginx认为请求结束,数据仅仅是填在协议栈中,从协议栈Buffer中的数据发送给用户的这段时间
35 | 是不被算进去的。
36 | 4 连接挥手的过程是不会被算进去。
37 | 注:从长连接的角度去看,上述1、2、4的时间不被算进去还是好理解的。
38 |
39 |
40 |
41 | ### 4.2 客户端看到的E2E 时间
42 | 4.1 中分析的request_time_msec从服务器端看到的请求E2E 时间,而用户看到的时间,假设用户用curl 去测试:
43 | time curl https://bucket.oss-cn-hangzhou.aliyuncs.com/object
44 | 那么上面4.1 提到的几点不会算到服务器端时间的计算逻辑里的,除了4都会被客户端计算进去。
45 | 针对延时不一致,下面我们从HTTP 的上传下载,具体分析一下这个延时区别,是否差,差多少。
46 |
47 | ### 4.3 上传类请求延时差异
48 | 针对于上传来说,服务器端和客户端看到的延时差异不大,相差一个握手/和最后返回的Header发送回去的时间。
49 | 握手到服务器端收到请求首字节 2rtt,请求完成后返回的HEADER 数据一般不会很大可以塞在1个cwnd 内发完,需要一个0.5
50 | 个 rtt,,一共是2.5个rtt。 如果是长连接,忽略三次握手的话,那么看到的差异为1个rtt。
51 | 因此针对上传类请求,客户端和服务器端看到的延时差距为2.5 个RT,如果是长连接(非连接上首个请求)的话差异为1个rtt。
52 |
53 | ### 4.4 下载类请求延时差异
54 | 关于下载请求的延时差异会稍微复杂一些。上传的情况下,服务器只会有HTTP 状态码和一些HTTP Header,通常一个rtt 就
55 | 可以发完。 而下载,通常服务器会有较多的数据发送给客户端,Nginx把最后一串数据填在协议栈的Buffer里,如果再Buffer
56 | 中的数据能在一个rtt内发完,那么同上传类请求一致,否则就会比上传类请求的差异大。至于协议栈Buffer 中最后一串数据花多
57 | 长时间能发送到客户端,这个就不太好估计了,取决于当时的网络状况及当时的用塞窗口大小,需要具体情况具体分析。
58 | 在网络情况不错并且服务器端Buffer 配置较小情况下,通常差距不大,但是如果客户端网络差,而服务器端Buffer 配置较大
59 | 的情况下,差距会比较大。比如此时客户端网络比较差,只达到100KB/s, 而服务器端协议栈Buffer 配置的较大,为1M,Nginx
60 | 最后一串数据把1M Buffer 填满后Nginx认为请求已经结束了,而实际上客户端在10s 之后才完整的收到请求应答数据,才认为结
61 | 束。大家可以用wget 测试一下,分别观察下服务器端和客户端看到的请求时间:
62 | wget your-bucket.oss-cn-hangzhou.aliyuncs.com/tmp/1m-file --limit-rate=10k --debug
63 | 注: wget 这个限速是在应用层面做的,测试看到的时间差异除了服务端Buffer 的原因,还有客户端Buffer 的原因,数据到达客
64 | 户端协议栈,而应用因限速而迟迟不读。
65 |
66 | ### 4.5 总结
67 | 服务器端看到的E2E 时间“request_time_msec” 时间是Nginx收到请求的首字节开始,到最后一个字节写到协议栈的时间。
68 | 客户端看到的E2E时间相比服务器多了:客户端建联及HTTPS 握手时间、请求首字节发送到服务器的时间、外加Nginx认为请求结束
69 | 后协议栈将Buffer中的数据递送到客户端的时间。
70 | 因此当客户抱怨延时高而服务器端看到却很快的时候,可能客户说的也对,你看到的也对,这时候就需要根据上述分析,判断具体
71 | 是哪里导致客户端和服务器端看到延时差距,进而快速定位问题。
72 |
73 | **服务器端慢是真的慢,但是服务器端看到快,可不一定真的快。**
74 |
75 | ## 5 服务器端看到的请求成功和客户端看到的请求成功
76 | 接下来分析的都是小概率事件,正常情况下通常不会遇到,主要针对出问题时的分析。
77 | 服务器端看到的成功,是服务器端正确处理这个请求,并把数据发送到协议栈后,服务器就会认为请求已经成功。
78 | 客户端看到请求成功,是收到服务器端返回的状态码及完整的body 后才认为请求成功。
79 |
80 | ### 5.1 access_log 看到的200 OK
81 | access_log 里的状态码,只要请求的header 已经发出去,那么状态码就确定了,access_log 里面打出来的状态码也是确定的。
82 | 如果是上传类请求,access_log 里打印出状态码为200,那么请求一定是成功了(但是客户端不一定能感知到这个成功)。
83 | 如果是下载类请求,access_log 里打印出来的状态码是200,那么请求不一定成功,可能body 并未发完请求就异常结束了。
84 |
85 | ### 5.2 写到协议栈里的数据不一定能发送出去
86 | Nginx把数据写到协议栈的Buffer中后,从Nginx的角度来说,可以认为数据已经发往客户端了,但从实际角度来看,数据写
87 | 到协议栈仅仅是写到协议栈,至于写到协议栈的数据是否能否真正被发送出去,是不一定的。在协议栈数据还没发出去之前可能网络中
88 | 断了,或者连接被reset 了,都会可能发生。这是造成客户端和服务器端看到有差异的一个主要原因。
89 |
90 | 有的同学会问,TCP 不是可靠的传输协议嘛,怎么会发不过去?建议看下这篇文章,就明白TCP 的可靠性具体指的是什么了
91 | https://blog.csdn.net/dog250/article/details/82177299
92 |
93 | ## 6 单连接最低下载速度
94 | ### 6.1 为什么会有最低下载速度限制
95 | 针对系统性能指标,通常我们会描述一个单连接峰值吞吐的数值,但是实际上一个还有一个最低速度的限制。那么这个最低速度是
96 | 怎么来的呢。
97 | 一个正常C/S 架构的系统,通常会有很多Buffer,会设置很多超时时间,针对Nginx会有send_timeout,recv_timeout,
98 | keepalive_timeout等各种超时限制。这就会造成系统会有一个最小下载速度的限制。
99 | 像上面描述的各类超时时间,其实是会随着各类网络事件触发设置及更新。再Linux 环境下,套接字可写就是其中一个事件,如果
100 | 套接字长时间不可写,超过Nginx配置的send_timeout,那么就会触发超时,引发Nginx主动断开连接,甚至reset 连接。Linux
101 | TCP 套接字在该该套接字上的剩余Buffer空间大于总Buffer 1/3 才会被epoll 等“反应堆”返回可写,也就是说,如果Buffer 被
102 | 填满后,在timeout时间内,Buffer 中的数据1/3 还没被发出去的话,就会触发定时器超时,导致请求异常中断。假设Buffer 配置
103 | 的是512k,send_timeout配置的是30s。那么必须在30s 内发送出去170k才行,也就是最低要达到5.69KB/s 的速度才行。
104 |
105 | ### 6.2 如何获取系统最低下载速度
106 | 正常情况下,我们可以通过分析系统中各个Buffer 的大小及超时时间计算出一个理论的最低下载速度,但是一个复杂的系统,很难理
107 | 清楚或者找到各个位置的Buffer 大小及超时时间。因此我们可以利用wget 的 --limit-rate 功能进行二分测试,直到找到最低下
108 | 载速度的零界点,注意下载的时候文件不要选择太小,选择太小了会测试不出来,当然也不要太大,太大了会造成测试时间过长,设置为
109 | 系统最大buffer 的2倍左右即可。
110 |
111 | 二分测试过程:
112 | low_rate = 0k, up_rate = 100k
113 | deviation = 5k
114 | while up_rate - low_rate < deviation
115 | mid_rate = (low_rate + up_rate)/2
116 | wget url --limit-rate mid_rate
117 | if succ then
118 | up_rate = mid_rate
119 | else
120 | low_rate = mid_rate
121 |
122 | print low_rate, up_rate
123 |
124 | 如下是测试OSS 最低下载速度:
125 | 单连接持续 5k 以内速度必然出问题(一般持续30s+出问题)
126 | 单连接持续 5 ~10k 以内速度随机出问题,看系统状况(比较具有偶然性)
127 | 单连接持续 10k+ 基本不出问题
128 |
129 | 根据上述6.1 中的理论和6.2 中的测试方法,我们甚至可以测试出来服务器端设置的sndbuf 有多大。
130 |
131 |
132 | ### 6.3 如何解决
133 | 在正常情况下,这个最低下载速度并不会造成什么问题,因为大家都想方设法让速度更快,但是有些计算密集型的场景,可能会遇
134 | 到这个问题。比如说之前遇到一个OSS 客户,从OSS 一个文件中读10k 数据,处理30s,然后再读10k 数据,再处理30s,处理一段
135 | 时间后发现服务器端数据没发完就莫名其妙关闭连接了。其实就是遇到“最低下载速度问题”了。
136 | 针对上述情况,客户端不要在一个请求上一条连接反反复复缓慢读数据,如果文件不大,可以考虑一次性全读出来,放内存或者本
137 | 地再慢慢处理。如果文件太大,可以使用RangeGet,需要多少数据就从服务器端RangeGet 读多少。
138 | 注:针对上传类请求,通常来说没有速度下限要求。
139 |
140 | ### 6.4 为什么复现不出来
141 | 有同学使用wget/curl 的limit_rate 功能把连接速度限制到很低,但是复现不出来最低下载速度的问题,这是因为测
142 | 试的文件太小了,测试的文件大小需要比客户度的rcvbuf + 服务端的sndbuf 还要大才能测试出来,否则数据堆在两端的
143 | 协议栈里,是触发不到应用的超时时间限制的。
144 |
145 | ## 7 access_log 中的400 408 及499
146 | ### 7.1 产生原因
147 | 400 是很普通的错误码,但是在Nginx里也有不是普通“400” 的时候,在这里我们只介绍非普通400 的情况。
148 | 408 及499在Nginx中是不会作为错误码返回给用户的(除非upstream 返回了),只是Nginx利用了这两个状态码标识请求的一
149 | 种完成状态。这两种错误码都是和时间相关,但是是不同场景下产生,都是在服务端才能看到的状态,客户端是感知不到的。
150 |
151 | 400,如果用户请求数据还未发完之前,客户端主动断开或者连接异常断开(如被reset 掉),在Nginx的access_log 中计为400。
152 | 499,客户端关闭请求,在proxy 场景下确切的说是客户端先于proxy upstream 返回前断开,Nginx在做proxy 的情况下
153 | (fastcgi_pass/proxy_pass 等),同一请求链路上,客户端与Nginx的连接先于Nginx后端返回前断开,此时在Nginx
154 | access_log中计为499 的日志。
155 | 408,客户端请求超时,确切说客户端发送数据超时,客户端向服务器发送请求数据时中间因某种原因中断了一会,引起服务器端读数
156 | 据超时,此时在Nginx access_log 中会记为408。注意,发送header和发送body可能会有不同的超时时间。
157 |
158 | ### 7.2 如何复现
159 | 400 请求数据发完之前提前断开连接, nc 建立连接后输入完成Host 头部后Ctrl + c 断掉, 或者发送PUT 请求在body 没有发送
160 | 完成之前Ctrl + c 掉
161 | 408 客户端发送超时, nc 建立连接后输入完成Host 头部后等待连接超时, 或者在Body 发送完成之前等待连接超时
162 | 499 客户端在服务器返回之前提前关闭连接 直接Curl,在服务器返回之前Ctrl + c 掉, Nginx在等待upstream返回,此时客户
163 | 端连接已经断开. 可能你的手速没服务端处理的快,可以找一些服务器处理相对耗时的请求来复现,比如OSS的大图片处理。
164 |
165 | 注:用public-read-write权限的 bucket 进行测试
166 |
167 | ### 7.3 是否异常
168 | 一般正常情况下,400、408、499 这三个状态码出现的会比较少,日志中偶尔零星出现一些也不是什么大问题,如果大量出现,那
169 | 就可能出问题了。
170 | 如果日志中大量出现400,如果请求的request_time_msec 很小,优先排查是否是客户端问题,如果这个时间很大,请检查服务器
171 | 压力是否过大,是否有hang住情况。如果服务器端hang 住,请求在发送的时候数据堆在Nginx里,服务器端长时间不读,造成客户端
172 | 超时断开连接,此时Nginx会产生大量因客户端发送超时而提前断连造成的400.
173 | 如果日志中大量出现499,如果请求的request_time_msec 很小(ms 级别),需要排查是否是客户端问题,如果这个时间很大,
174 | 需要从两个方向排查:
175 | 1 检查用户请求,是否后端处理确实需要很长时间,而客户端设置的超时时间又很短,此时需要客户端调整超时时间,否则客户端
176 | 的重试可能会导致雪崩(如果没有限流的话)
177 | 2 检查服务器是否压力过大,是否有hang 住的情况,如果后端持续不返回客户端提前断开的话就会造成大量499.
178 |
179 | 这三个状态码出现,多多少少都是有些异常的,通常情况下,我们需要快速判断是服务器端的异常还是客户端的异常,从而快速定位问题。
180 |
181 | 当然上述描述的情况也不是绝对的,有时候需要特殊场景特殊分析。
182 |
183 |
184 |
185 | ## 8 总结
186 | 学会分析access_log 在日常调查问题中会方便很多,理解access_log 中一些特殊状态码的含义及出现的场景,会让调查问题事
187 | 半功倍。同时对C/S 系统上Buffer 的理解也可以加快调查问题的速度,同时指导设置Buffer 的大小,解决系统在大压力下出现的一些
188 | 性能及其他一些奇怪问题。
189 |
190 |
191 | **上述信息由笔者翻阅源码及问题排查经验所得,如有错漏,敬请指出**
192 |
193 |
194 |
195 |
--------------------------------------------------------------------------------
/performance/10+倍性能提升全过程.md:
--------------------------------------------------------------------------------
1 |
2 | # 10+倍性能提升全过程--优酷账号绑定淘宝账号的TPS从500到5400的优化历程
3 |
4 | ## 背景说明
5 |
6 | > 2016年的双11在淘宝上买买买的时候,天猫和优酷土豆一起做了联合促销,在天猫双11当天购物满XXX元就赠送优酷会员,这个过程需要用户在优酷侧绑定淘宝账号(登录优酷、提供淘宝账号,优酷调用淘宝API实现两个账号绑定)和赠送会员并让会员权益生效(看收费影片、免广告等等)
7 | >
8 | > 这里涉及到优酷的两个部门:Passport(在上海,负责登录、绑定账号,下文中的优化过程主要是Passport部分);会员(在北京,负责赠送会员,保证权益生效)
9 |
10 |
11 | > 在双11活动之前,Passport的绑定账号功能一直在运行,只是没有碰到过大促销带来的挑战
12 |
13 |
14 | ---
15 |
16 | 整个过程分为两大块:
17 |
18 | 1. 整个系统级别,包括网络和依赖服务的性能等,多从整个系统视角分析问题;
19 | 1. 但服务器内部的优化过程,将CPU从si/sy围赶us,然后在us从代码级别一举全歼。
20 |
21 | 系统级别都是最容易被忽视但是成效最明显的,代码层面都是很细致的力气活。
22 |
23 | 整个过程都是在对业务和架构不是非常了解的情况下做出的。
24 |
25 | ## 会员部分的架构改造
26 |
27 | - 接入中间件DRDS,让优酷的数据库支持拆分,分解MySQL压力
28 | - 接入中间件vipserver来支持负载均衡
29 | - 接入集团DRC来保障数据的高可用
30 | - 对业务进行改造支持Amazon的全链路压测
31 |
32 | ## 主要的压测过程
33 |
34 | 
35 |
36 | **上图是压测过程中主要的阶段中问题和改进,主要的问题和优化过程如下:**
37 |
38 | ```
39 | - docker bridge网络性能问题和网络中断si不均衡 (优化后:500->1000TPS)
40 | - 短连接导致的local port不够 (优化后:1000-3000TPS)
41 | - 生产环境snat单核导致的网络延时增大 (优化后生产环境能达到测试环境的3000TPS)
42 | - Spring MVC Path带来的过高的CPU消耗 (优化后:3000->4200TPS)
43 | - 其他业务代码的优化(比如异常、agent等) (优化后:4200->5400TPS)
44 |
45 | ```
46 |
47 | **优化过程中碰到的比如淘宝api调用次数限流等一些业务原因就不列出来了**
48 |
49 | ---
50 |
51 | ## 概述
52 |
53 | 由于用户进来后先要登录并且绑定账号,实际压力先到Passport部分,在这个过程中最开始单机TPS只能到500,经过N轮优化后基本能达到5400 TPS,下面主要是阐述这个优化过程
54 |
55 | ## Passport部分的压力
56 |
57 | ### Passport 核心服务分两个:
58 |
59 | - Login 主要处理登录请求
60 | - userservice 处理登录后的业务逻辑,比如将优酷账号和淘宝账号绑定
61 |
62 | 为了更好地利用资源每台物理加上部署三个docker 容器,跑在不同的端口上(8081、8082、8083),通过bridge网络来互相通讯
63 |
64 | ### Passport机器大致结构
65 |
66 | 
67 |
68 |
69 |
70 | ### userservice服务网络相关的各种问题
71 |
72 | ---
73 |
74 | #### 太多SocketConnect异常(如上图)
75 |
76 | 在userservice机器上通过netstat也能看到大量的SYN_SENT状态,如下图:
77 | 
78 |
79 | #### 因为docker bridge通过nat来实现,尝试去掉docker,让tomcat直接跑在物理机上
80 |
81 | 这时SocketConnect异常不再出现
82 | 
83 |
84 | #### 从新梳理一下网络流程
85 |
86 | docker(bridge)----短连接--->访问淘宝API(淘宝open api只能短连接访问),性能差,cpu都花在si上;
87 |
88 | 如果 docker(bridge)----长连接到宿主机的某个代理上(比如haproxy)-----短连接--->访问淘宝API, 性能就能好一点。问题可能是短连接放大了Docker bridge网络的性能损耗
89 |
90 | #### 当时看到的cpu si非常高,截图如下:
91 |
92 | 
93 |
94 | 去掉Docker后,性能有所提升,继续通过perf top看到内核态寻找可用的Local Port消耗了比较多的CPU,gif动态截图如下(可以点击看高清大图):
95 |
96 | 
97 |
98 | **注意图中ipv6_rcv_saddr_equal和inet_csk_get_port 总共占了30%的CPU** (系统态的CPU使用率高意味着共享资源有竞争或者I/O设备之间有大量的交互。)
99 |
100 | **一般来说一台机器默认配置的可用 Local Port 3万多个,如果是短连接的话,一个连接释放后默认需要60秒回收,30000/60 =500 这是大概的理论TPS值【这里只考虑连同一个server IP:port 的时候】**
101 |
102 | 这500的tps算是一个老中医的经验。不过有些系统调整过Local Port取值范围,比如从1024到65534,那么这个tps上限就是1000附近。
103 |
104 | 同时观察这个时候CPU的主要花在sy上,最理想肯定是希望CPU主要用在us上,截图如下:
105 | 
106 |
107 | **规则:性能优化要先把CPU从SI、SY上的消耗赶到US上去(通过架构、系统配置);然后提升 US CPU的效率(代码级别的优化)**
108 |
109 | sy占用了30-50%的CPU,这太不科学了,同时通过 netstat 分析连接状态,确实看到很多TIME_WAIT:
110 | 
111 |
112 | **cpu要花在us上,这部分才是我们代码吃掉的**
113 |
114 | ***于是让PE修改了tcp相关参数:降低 tcp_max_tw_buckets和开启tcp_tw_reuse,这个时候TPS能从1000提升到3000***
115 |
116 | 鼓掌,赶紧休息,迎接双11啊
117 |
118 | 
119 |
120 | ## 测试环境优化到3000 TPS后上线继续压测
121 |
122 | **居然性能又回到了500,太沮丧了**,其实最开始账号绑定慢,Passport这边就怀疑taobao api是不是在大压力下不稳定,一般都是认为自己没问题,有问题的一定是对方。我不觉得这有什么问题,要是知道自己有什么问题不早就优化掉了,但是这里缺乏证据支撑,也就是如果你觉得自己没有问题或者问题在对方,一定要拿出证据来(有证据那么大家可以就证据来讨论,而不是互相苍白地推诿)。
123 |
124 | 这个时候Passport更加理直气壮啊,好不容易在测试环境优化到3000,怎么一调taobao api就掉到500呢,这么点压力你们就扛不住啊。 但是taobao api那边给出调用数据都是1ms以内就返回了(alimonitor监控图表--拿证据说话)。
125 |
126 | 看到alimonitor给出的api响应时间图表后,我开始怀疑从优酷的机器到淘宝的机器中间链路上有瓶颈,但是需要设计方案来证明这个问题在链路上,要不各个环节都会认为自己没有问题的,问题就会卡死。但是当时Passport的开发也只能拿到Login和Userservice这两组机器的权限,中间的负载均衡、交换机都没有权限接触到。
127 |
128 | 在没有证据的情况下,肯定机房、PE配合你排查的欲望基本是没有的(被坑过很多回啊,你说我的问题,结果几天配合排查下来发现还是你程序的问题,凭什么我要每次都陪你玩?),所以我要给出证明问题出现在网络链路上,然后拿着这个证据跟网络的同学一起排查。
129 |
130 | 讲到这里我禁不住要插一句,在出现问题的时候,都认为自己没有问题这是正常反应,毕竟程序是看不见的,好多意料之外逻辑考虑不周全也是常见的,出现问题按照自己的逻辑自查的时候还是没有跳出之前的逻辑所以发现不了问题。但是好的程序员在问题的前面会尝试用各种手段去证明问题在哪里,而不是复读机一样我的逻辑是这样的,不可能出问题的。即使目的是证明问题在对方,只要能给出明确的证据都是负责任的,拿着证据才能理直气壮地说自己没有问题和干净地甩锅。
131 |
132 | **在尝试过tcpdump抓包、ping等各种手段分析后,设计了场景证明问题在中间链路上。**
133 |
134 | ### 设计如下三个场景证明问题在中间链路上:
135 |
136 | 1. 压测的时候在userservice ping 依赖服务的机器;
137 | 1. 将一台userservice机器从负载均衡上拿下来(没有压力),ping 依赖服务的机器;
138 | 1. 从公网上非我们机房的机器 ping 依赖服务的机器;
139 |
140 | 这个时候奇怪的事情发现了,压力一上来**场景1、2**的两台机器ping淘宝的rt都从30ms上升到100-150ms,**场景1** 的rt上升可以理解,但是**场景2**的rt上升不应该,同时**场景3**中ping淘宝在压力测试的情况下rt一直很稳定(说明压力下淘宝的机器没有问题),到此确认问题在优酷到淘宝机房的链路上有瓶颈,而且问题在优酷机房出口扛不住这么大的压力。于是从上海Passport的团队找到北京Passport的PE团队,确认在优酷调用taobao api的出口上使用了snat,PE到snat机器上看到snat只能使用单核,而且对应的核早就100%的CPU了,因为之前一直没有这么大的压力所以这个问题一直存在只是没有被发现。
141 |
142 | **于是PE去掉snat,再压的话 TPS稳定在3000左右**
143 |
144 | ---
145 |
146 | ## 到这里结束了吗? 从3000到5400TPS
147 |
148 | 优化到3000TPS的整个过程没有修改业务代码,只是通过修改系统配置、结构非常有效地把TPS提升了6倍,对于优化来说这个过程是最轻松,性价比也是非常高的。实际到这个时候也临近双11封网了,最终通过计算(机器数量*单机TPS)完全可以抗住双11的压力,所以最终双11运行的版本就是这样的。 但是有工匠精神的工程师是不会轻易放过这么好的优化场景和环境的(基线、机器、代码、工具都具备配套好了)
149 |
150 | **优化完环境问题后,3000TPS能把CPU US跑上去,于是再对业务代码进行优化也是可行的了**。
151 |
152 | ### 进一步挖掘代码中的优化空间
153 |
154 | 双11前的这段封网其实是比较无聊的,于是和Passport的开发同学们一起挖掘代码中的可以优化的部分。这个过程中使用到的主要工具是这三个:火焰图、perf、perf-map-java。相关链接:[http://www.brendangregg.com/perf.html](http://www.brendangregg.com/perf.html) ; [https://github.com/jrudolph/perf-map-agent](https://github.com/jrudolph/perf-map-agent)
155 |
156 | ### 通过Perf发现的一个SpringMVC 的性能问题
157 |
158 | 这个问题具体参考我之前发表的优化文章[http://www.atatech.org/articles/65232](http://www.atatech.org/articles/65232) 。 主要是通过火焰图发现spring mapping path消耗了过多CPU的性能问题,CPU热点都在methodMapping相关部分,于是修改代码去掉spring中的methodMapping解析后性能提升了40%,TPS能从3000提升到4200.
159 |
160 | ### 著名的fillInStackTrace导致的性能问题
161 |
162 | 代码中的第二个问题是我们程序中很多异常(fillInStackTrace),实际业务上没有这么多错误,应该是一些不重要的异常,不会影响结果,但是异常频率很高,对这种我们可以找到触发的地方,catch住,然后不要抛出去(也就是别触发fillInStackTrace),打印一行error日志就行,这块也能省出10%的CPU,对应到TPS也有几百的提升。
163 |
164 | 
165 |
166 | 部分触发fillInStackTrace的场景和具体代码行(点击看高清大图):
167 | 
168 |
169 | 对应的火焰图(点击看高清大图):
170 | 
171 |
172 | 
173 |
174 | ### 解析useragent 代码部分的性能问题
175 |
176 | 整个useragent调用堆栈和cpu占用情况,做了个汇总(useragent不启用TPS能从4700提升到5400)
177 | 
178 |
179 | 实际火焰图中比较分散:
180 | 
181 |
182 | **最终通过对代码的优化勉勉强强将TPS从3000提升到了5400(太不容易了,改代码过程太辛苦,不如改配置来得快)**
183 |
184 | 优化代码后压测tps可以跑到5400,截图:
185 |
186 | 
187 |
188 | ## 最后再次总结整个压测过程的问题和优化历程
189 |
190 | ```
191 | - docker bridge网络性能问题和网络中断si不均衡 (优化后:500->1000TPS)
192 | - 短连接导致的local port不够 (优化后:1000-3000TPS)
193 | - 生产环境snat单核导致的网络延时增大 (优化后能达到测试环境的3000TPS)
194 | - Spring MVC Path带来的过高的CPU消耗 (优化后:3000->4200TPS)
195 | - 其他业务代码的优化(比如异常、agent等) (优化后:4200->5400TPS)
196 |
197 |
198 |
199 | ```
200 |
201 | 
202 |
203 |
--------------------------------------------------------------------------------
/performance/10+倍性能提升全过程/05703c168e63e968-05703c168e63e96821ea9f921d83712b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/10+倍性能提升全过程/05703c168e63e968-05703c168e63e96821ea9f921d83712b.png
--------------------------------------------------------------------------------
/performance/10+倍性能提升全过程/2ae2cb8b0cb324b6-2ae2cb8b0cb324b68ca22c48c019e029.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/10+倍性能提升全过程/2ae2cb8b0cb324b6-2ae2cb8b0cb324b68ca22c48c019e029.png
--------------------------------------------------------------------------------
/performance/10+倍性能提升全过程/2bb7395a2cc6833c-2bb7395a2cc6833c9c7587b38402a301.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/10+倍性能提升全过程/2bb7395a2cc6833c-2bb7395a2cc6833c9c7587b38402a301.png
--------------------------------------------------------------------------------
/performance/10+倍性能提升全过程/2be2799d1eef982d-2be2799d1eef982d77e5c0a5c896a0e9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/10+倍性能提升全过程/2be2799d1eef982d-2be2799d1eef982d77e5c0a5c896a0e9.png
--------------------------------------------------------------------------------
/performance/10+倍性能提升全过程/36ef4b16c3c400ab-36ef4b16c3c400abf6eb7e6b0fbb2f58.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/10+倍性能提升全过程/36ef4b16c3c400ab-36ef4b16c3c400abf6eb7e6b0fbb2f58.png
--------------------------------------------------------------------------------
/performance/10+倍性能提升全过程/38bb043c85c7b500-38bb043c85c7b50007609484c7bf5698.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/10+倍性能提升全过程/38bb043c85c7b500-38bb043c85c7b50007609484c7bf5698.png
--------------------------------------------------------------------------------
/performance/10+倍性能提升全过程/4c1eff0f925f5997-4c1eff0f925f59977e2557acff5cf03b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/10+倍性能提升全过程/4c1eff0f925f5997-4c1eff0f925f59977e2557acff5cf03b.png
--------------------------------------------------------------------------------
/performance/10+倍性能提升全过程/6b24a854d91aba4d-6b24a854d91aba4dcdbd4f0155683d93.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/10+倍性能提升全过程/6b24a854d91aba4d-6b24a854d91aba4dcdbd4f0155683d93.png
--------------------------------------------------------------------------------
/performance/10+倍性能提升全过程/6ed62fd6b50ad278-6ed62fd6b50ad2785e5b57687d95ad6e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/10+倍性能提升全过程/6ed62fd6b50ad278-6ed62fd6b50ad2785e5b57687d95ad6e.png
--------------------------------------------------------------------------------
/performance/10+倍性能提升全过程/7eb2cbb4afc2c7d7-7eb2cbb4afc2c7d7007c35304c95342a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/10+倍性能提升全过程/7eb2cbb4afc2c7d7-7eb2cbb4afc2c7d7007c35304c95342a.png
--------------------------------------------------------------------------------
/performance/10+倍性能提升全过程/894bd736dd03060e-894bd736dd03060e89e3fa49cc98ae5e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/10+倍性能提升全过程/894bd736dd03060e-894bd736dd03060e89e3fa49cc98ae5e.png
--------------------------------------------------------------------------------
/performance/10+倍性能提升全过程/8a4a97cb74724b8b-8a4a97cb74724b8baa3b90072a1914e0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/10+倍性能提升全过程/8a4a97cb74724b8b-8a4a97cb74724b8baa3b90072a1914e0.png
--------------------------------------------------------------------------------
/performance/10+倍性能提升全过程/91353fb9c88116be-91353fb9c88116be3ff109e3528a4651.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/10+倍性能提升全过程/91353fb9c88116be-91353fb9c88116be3ff109e3528a4651.png
--------------------------------------------------------------------------------
/performance/10+倍性能提升全过程/99bf952b880f1724-99bf952b880f17243953da790ff0e710.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/10+倍性能提升全过程/99bf952b880f1724-99bf952b880f17243953da790ff0e710.png
--------------------------------------------------------------------------------
/performance/10+倍性能提升全过程/afacc681a9550cd0-afacc681a9550cd087838c2383be54c8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/10+倍性能提升全过程/afacc681a9550cd0-afacc681a9550cd087838c2383be54c8.png
--------------------------------------------------------------------------------
/performance/10+倍性能提升全过程/b509b30218dd22e0-b509b30218dd22e03149985cf5e15f8e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/10+倍性能提升全过程/b509b30218dd22e0-b509b30218dd22e03149985cf5e15f8e.png
--------------------------------------------------------------------------------
/performance/10+倍性能提升全过程/fff502ca73e3112e-fff502ca73e3112e585560ffe4a4dbf1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/10+倍性能提升全过程/fff502ca73e3112e-fff502ca73e3112e585560ffe4a4dbf1.gif
--------------------------------------------------------------------------------
/performance/Nginx resueport 导致偶发性卡顿.md:
--------------------------------------------------------------------------------
1 | # 背景
2 |
3 | 从2018年开始,我们有个业务陆续接到反馈 Nginx 线上集群经常出现不响应或者偶发性的“超慢”请求。这种卡顿每天都有少量出现。而只有多个集群中的一个出现,其他压力更大的集群皆未出现。
4 | 业务结构比较简单:LVS->Nginx->后端,如图
5 | 
6 |
7 | 一些观察到的现象:
8 |
9 | - 出问题前不久升级 Nginx 配置,打开了 reuseport 功能
10 | - 在压力大的后端(upstream)服务环境不容易出现,后端压力轻对应的Nginx卡顿概率更高
11 | - 关闭 reuseport 后 问题少了很多
12 | - 失败的请求响应时间都是 0ms(Nginx日志不靠谱了)
13 | - 从 Nginx 日志上看,所有失败的健康检查请求都是0ms 的499 错误码(健康检查设置超时是2秒),但实际出问题的时候有5s-2分钟没有任何日志输出(Nginx卡了这么久)要么是Nginx卡住没去accept,要么是accept了没响应
14 | - 所有超时来自同一个worker(一个Nginx服务一般按照机器核数开启多个worker)
15 |
16 | 并且已知,卡顿的原因是打开 reuseport 后,新进来的请求可以由内核 hash 派发给一个 Nginx woker ,避免了锁争抢以及惊群。但如果网络条件足够好,压力足够低,Nginx worker 一直来不及读完 receive buffer 中的内容时,就无法切换并处理其他的 request,于是在新请求的客户端会观测不间断的卡顿,而压力大的后端由于网络传输慢,经常卡顿,Nginx worker 反而有时间能处理别的请求。在调小 receive buffer 人为制造卡顿后该问题得以解决。
17 |
18 | # 目标
19 | 由于所述场景比较复杂,缺乏直接证据,打算通过构造一个较简单的环境来复现这个问题,并且在这个过程中抓包、观测Nginx worker的具体行为,验证这个假设。
20 |
21 | # 术语
22 | ## 快连接和慢连接
23 | - 快连接:通常是传输时间短、传输量小的连接,耗时通常是ms级别
24 | - 慢连接:通常是传输时间长、传输量大的连接,可以维持传输状态一段时间(如30s, 1min)
25 |
26 | 在本次场景复现过程中,这两种连接都是短连接,每次请求开始前都需要三次握手建立连接,结束后都需要四次挥手销毁连接
27 |
28 | ## Epoll
29 | Nginx使用了epoll模型,epoll 是多路复用的一种实现。在多路复用的场景下,一个task(process)会批量处理多个socket,哪个来了数据就去读那个。这就意味着要公平对待所有这些socket,不能阻塞在任何socket的”数据读”上,也就是说不能在阻塞模式下针对任何socket调用recv/recvfrom。
30 |
31 | epoll 每次循环为O(1) 操作,循环前会得到一个就绪队列,其中包含所有已经准备好的 socket stream(有数据可读),不需要循环全部 socket stream 读取数据,在循环后会将被读取数据的 stream 重新放回睡眠队列。睡眠队列中的 socket stream 有数据可读时,再唤醒加入到 就绪队列中。
32 |
33 | epoll 伪代码 (不包含唤醒、睡眠)
34 | ```
35 | while(true) {
36 | streamArr = getEpollReadyStream(); // 找到准备好的stream
37 | for(Stream i: streamArr) { // 循环准备好的stream
38 | doSomething();
39 | }
40 | }
41 | ```
42 | ## reuseport与惊群
43 | Nginx reuseport 选项解决惊群的问题:在 TCP 多进程/线程场景中(B 图),服务端如果所有新连接只保存在一个 listen socket 的全连接队列中,那么多个进程/线程去这个队列里获取(accept)新的连接,势必会出现多个进程/线程对一个公共资源的争抢,争抢过程中,大量资源的损耗,也就会发生惊群现象。
44 | 
45 | 而开启reuseport后(C 图),有多个 listener 共同 bind/listen 相同的 IP/PORT,也就是说每个进程/线程有一个独立的 listener,相当于每个进程/线程独享一个 listener 的全连接队列,新的连接请求由内核hash分配,不需要多个进程/线程竞争某个公共资源,能充分利用多核,减少竞争的资源消耗,效率自然提高了。
46 |
47 | 但同时也是由于这个分配机制,避免了上下文切换,在服务压力不大,网络情况足够好的情况下,进程/线程更有可能专注于持续读取某个慢连接数据而忽视快连接建立的请求,从而造成快连接方卡顿。
48 |
49 | # 复现过程
50 | ## 思路
51 | 1. 整体的架构是N个client->1个Nginx->N个server。因为卡顿原因和reuseport机制有关,和server数量无关,server数量设为任意数字都能复现,这里为了方便设成1。client数量设为2,为了将快连接和慢连接区分开便于抓包观测
52 | 2. 用慢连接制造卡顿环境,用快连接观测卡顿。在快连接客户端进行观测和抓包
53 | 3. 进程数量要足够少,使得同一个 worker 有几率分配到多个连接 `worker_processes 2`
54 | 4. 连接数目要足够多,慢连接数目>=进程数量,使得快连接在分配时,有一定概率分配到一个正在处理慢连接的worker上
55 | 5. reuseport: 这个配置要开启,卡顿现象才能观测到。`listen 8000 reuseport`
56 |
57 | ## 环境
58 | linux kernal version: 6.1
59 | linux image: amazon/al2023-ami-2023.0.20230419.0-kernel-6.1-x86_64
60 | instance type:
61 | 1X AWS t2.micro (1 vCPU, 1GiB RAM) – Nginx client(fast request)
62 | 3X AWS t3.micro (2 vCPU, 1GiB RAM) – Http server, Nginx server, Nginx client(slow request)
63 |
64 | ## 操作
65 | 1. 在server instance上放置一个 2GiB 大文件(0000000000000000.data)和一个 3MiB 小文件(server.pcap),并开启一个http server
66 | ```
67 | nohup python -m http.server 8000
68 | ```
69 | 2. 在Nginx instance上安装、配置好Nginx,并启动Nginx (注意要绑核!)
70 | ```
71 | # install
72 | sudo yum install nginx
73 | # config (/etc/nginx/nginx.conf)
74 | user nginx;
75 | worker_processes 2;
76 | error_log /var/log/nginx/error.log notice;
77 | pid /run/nginx.pid;
78 |
79 | include /usr/share/nginx/modules/*.conf;
80 |
81 | events {
82 | worker_connections 1024;
83 | }
84 |
85 | http {
86 | log_format main '$remote_addr [$time_local] "$request" '
87 | 'status=$status body_bytes_sent=$body_bytes_sent '
88 | 'rt=$request_time uct="$upstream_connect_time" uht="$upstream_header_time" urt="$upstream_response_time"';
89 |
90 | access_log /var/log/nginx/access.log main;
91 |
92 | sendfile on;
93 | tcp_nopush on;
94 | keepalive_timeout 60;
95 | types_hash_max_size 4096;
96 |
97 | include /etc/nginx/mime.types;
98 | default_type application/octet-stream;
99 |
100 | # Load modular configuration files from the /etc/nginx/conf.d directory.
101 | # See http://nginx.org/en/docs/ngx_core_module.html#include
102 | # for more information.
103 | include /etc/nginx/conf.d/*.conf;
104 |
105 | server {
106 | listen 8000 reuseport;
107 | server_name server1;
108 | root /usr/share/nginx/html;
109 |
110 | # Load configuration files for the default server block.
111 | include /etc/nginx/default.d/*.conf;
112 |
113 | location / {
114 | proxy_pass http://172.31.86.252:8000; # server ip
115 | proxy_set_header Host $host;
116 | proxy_set_header X-Real-IP $remote_addr;
117 | }
118 |
119 | error_page 404 /404.html;
120 | location = /404.html {
121 | }
122 |
123 | error_page 500 502 503 504 /50x.html;
124 | location = /50x.html {
125 | }
126 | }
127 | }
128 | # start nginx
129 | sudo taskset -c 0 nginx
130 | ```
131 | 3. 启动慢连接client,开启4个下载进程并计时,测试脚本[在此](./Nginx%20reuseport%20%E5%AF%BC%E8%87%B4%E5%81%B6%E5%8F%91%E6%80%A7%E5%8D%A1%E9%A1%BF/script/get_big_file.sh)
132 | 4. 启动快连接client,开启1个下载进程并计时,抓包,测试脚本[在此](./Nginx%20reuseport%20%E5%AF%BC%E8%87%B4%E5%81%B6%E5%8F%91%E6%80%A7%E5%8D%A1%E9%A1%BF/script/get_small_file.sh)
133 | 需要注意的是此处使用了curl --max-time 1,意味着即使1s内文件没有下载完,也会自动终止。
134 | 5. 进入Nginx instance观察access.log
135 | 6. 关掉reuseport或者调小recv buffer大小,重试一次
136 |
137 | ## 结果
138 | ip maping:
139 | ```
140 | 172.31.86.252: http server
141 | 172.31.89.152: nginx server
142 | 172.31.91.109: 快连接 client
143 | 172.31.92.10: 慢连接 client
144 | ```
145 | 1. 快连接client端:下载同一个小文件的下载时长有快有慢,方差很大,完整日志[在此](./Nginx%20reuseport%20%E5%AF%BC%E8%87%B4%E5%81%B6%E5%8F%91%E6%80%A7%E5%8D%A1%E9%A1%BF/log-bench/client-runtime.txt)
146 | ```
147 | [2023-05-31 08:27:32,127] runtime=1010
148 | [2023-05-31 08:27:33,140] runtime=1009
149 | [2023-05-31 08:27:34,152] runtime=38
150 | [2023-05-31 08:27:34,192] runtime=1011
151 | [2023-05-31 08:27:35,205] runtime=37
152 | [2023-05-31 08:27:35,245] runtime=1008
153 | [2023-05-31 08:27:36,256] runtime=57
154 | [2023-05-31 08:27:36,315] runtime=1011
155 | ```
156 | 2. 快连接client:无论耗时长短,抓包结果都显示存在不同程度卡顿,抓包文件[在此](./Nginx%20reuseport%20%E5%AF%BC%E8%87%B4%E5%81%B6%E5%8F%91%E6%80%A7%E5%8D%A1%E9%A1%BF/log-bench/nginx-case-client.pcap)
157 | 耗时长的下载过程
158 | 
159 | 耗时短的下载过程
160 | 
161 |
162 | 3. Nginx access.log 存在大量未下载完的200请求,和少量499请求,且499请求的耗时为0,access.log文件[在此](./Nginx%20reuseport%20%E5%AF%BC%E8%87%B4%E5%81%B6%E5%8F%91%E6%80%A7%E5%8D%A1%E9%A1%BF/log-bench/access.log.txt)
163 | 卡顿的日志建立连接时长(utc)在0.3-0.4ms左右,超过1s的就出现499了
164 | ```
165 | 172.31.91.109 [31/May/2023:08:27:49 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=102195 rt=0.790 uct="0.413" uht="0.592" urt="0.791"
166 | 172.31.91.109 [31/May/2023:08:27:50 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.058 uct="0.000" uht="0.002" urt="0.053"
167 | 172.31.91.109 [31/May/2023:08:27:51 +0000] "GET /server.pcap HTTP/1.1" status=499 body_bytes_sent=0 rt=0.000 uct="-" uht="-" urt="0.000"
168 | 172.31.91.109 [31/May/2023:08:27:51 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=102195 rt=0.763 uct="0.400" uht="0.580" urt="0.763"
169 | 172.31.91.109 [31/May/2023:08:27:52 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=102195 rt=0.767 uct="0.480" uht="0.768" urt="0.768"
170 | 172.31.91.109 [31/May/2023:08:27:53 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=580007 rt=0.773 uct="0.330" uht="0.431" urt="0.773"
171 | 172.31.91.109 [31/May/2023:08:27:55 +0000] "GET /server.pcap HTTP/1.1" status=499 body_bytes_sent=0 rt=0.000 uct="-" uht="-" urt="0.000"
172 | 172.31.91.109 [31/May/2023:08:27:55 +0000] "GET /server.pcap HTTP/1.1" status=499 body_bytes_sent=0 rt=0.000 uct="-" uht="-" urt="0.000"
173 | ```
174 | 下载中途被关闭的连接(200),可以观测到Nginx server在客户端已经请求FIN并被ACK之后仍然在发送一些网络数据包,客户端非常迷惑,向Nginx发送RST
175 | 
176 | 未和Nginx建立连接就被关闭的连接(499),可以观测到连接始终没有被建立,在等待1s后客户端超时,主动请求关连接
177 | 
178 |
179 | 4. 限制Nginx server所在的instance的recv buffer大小,重新进行实验,可以观测到仍然有少量停顿,但整体耗时好了很多,不再有长达1s的卡顿,也不再有RST,完整日志[在此](./Nginx%20reuseport%20%E5%AF%BC%E8%87%B4%E5%81%B6%E5%8F%91%E6%80%A7%E5%8D%A1%E9%A1%BF/log-exp1/)
180 | ```
181 | sysctl -w net.ipv4.tcp_rmem="40960 40960 40960"
182 | ```
183 | client runtime log: 耗时稳定在50-100ms,比无慢连接、纯跑快连接时要大一倍(25-50ms)
184 | ```
185 | [2023-06-05 06:13:22,791] runtime=120
186 | [2023-06-05 06:13:22,913] runtime=82
187 | [2023-06-05 06:13:22,997] runtime=54
188 | [2023-06-05 06:13:23,054] runtime=61
189 | [2023-06-05 06:13:23,118] runtime=109
190 | [2023-06-05 06:13:23,229] runtime=58
191 | [2023-06-05 06:13:23,290] runtime=55
192 | [2023-06-05 06:13:23,347] runtime=79
193 | [2023-06-05 06:13:23,429] runtime=65
194 | [2023-06-05 06:13:23,497] runtime=53
195 | ```
196 | client 抓包结果:
197 | 
198 | Nginx access.log: 都发完了,而且发得很流畅,建立连接时间(utc)非常短
199 | ```
200 | 172.31.91.109 [05/Jun/2023:06:13:22 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.101 uct="0.001" uht="0.004" urt="0.101"
201 | 172.31.91.109 [05/Jun/2023:06:13:22 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.064 uct="0.001" uht="0.002" urt="0.064"
202 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.044 uct="0.000" uht="0.001" urt="0.044"
203 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.047 uct="0.000" uht="0.001" urt="0.047"
204 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.100 uct="0.000" uht="0.001" urt="0.099"
205 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.047 uct="0.000" uht="0.001" urt="0.047"
206 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.045 uct="0.001" uht="0.002" urt="0.045"
207 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.066 uct="0.000" uht="0.002" urt="0.066"
208 | ```
209 | 对于慢连接大文件下载时长略有影响:46s (无限制) vs 53s (有限制)
210 |
211 | 5. 关闭nginx reuseport
212 |
213 | 卡顿依然大量存在,但大多以连接能够建立但是下载不完的形式(200)出现,499较少,并且存在惊群现象,完整日志[在此](./Nginx%20reuseport%20%E5%AF%BC%E8%87%B4%E5%81%B6%E5%8F%91%E6%80%A7%E5%8D%A1%E9%A1%BF/log-exp2/)
214 | ```
215 | server {
216 | listen 8000;
217 | ```
218 | client runtime log:存在卡顿,和benchmark没有区别
219 | ```
220 | [2023-06-05 06:38:06,682] runtime=1008
221 | [2023-06-05 06:38:07,692] runtime=1008
222 | [2023-06-05 06:38:08,703] runtime=220
223 | [2023-06-05 06:38:08,926] runtime=112
224 | [2023-06-05 06:38:09,040] runtime=60
225 | [2023-06-05 06:38:09,103] runtime=865
226 | [2023-06-05 06:38:09,970] runtime=1009
227 | [2023-06-05 06:38:10,982] runtime=1008
228 | [2023-06-05 06:38:11,992] runtime=1009
229 | ```
230 | client抓包结果:存在卡顿,存在RST,和benchmark没有区别
231 | 
232 | 
233 | access.log:卡顿的日志连接时间比benchmark略短,在0.2-0.3s左右,出现499的情况少了但是依然会有
234 | ```
235 | 172.31.91.109 [05/Jun/2023:06:38:02 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=204595 rt=0.844 uct="0.362" uht="0.539" urt="0.845"
236 | 172.31.91.109 [05/Jun/2023:06:38:03 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=204595 rt=0.907 uct="0.334" uht="0.476" urt="0.906"
237 | 172.31.91.109 [05/Jun/2023:06:38:04 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=543900 rt=0.836 uct="0.319" uht="0.504" urt="0.836"
238 | 172.31.91.109 [05/Jun/2023:06:38:05 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=204595 rt=0.831 uct="0.161" uht="0.480" urt="0.830"
239 | 172.31.91.109 [05/Jun/2023:06:38:06 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=552849 rt=0.820 uct="0.180" uht="0.329" urt="0.819"
240 | 172.31.91.109 [05/Jun/2023:06:38:07 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=204595 rt=0.800 uct="0.122" uht="0.462" urt="0.800"
241 | 172.31.91.109 [05/Jun/2023:06:38:08 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=543900 rt=0.871 uct="0.251" uht="0.380" urt="0.871"
242 | ```
243 | 存在惊群现象,以下是Nginx worker进程的cpu使用率和上下文切换频率对比
244 | ```
245 | # 每5s输出一次统计结果
246 | pidstat -w -u 5
247 | ```
248 | 两者的cpu使用率和上下文切换频率差不多,但关闭reuseport后花在wait上的cpu时间明显增加(1.3-1.6% vs 2.8-2.9%),这就是惊群带来的性能损耗。原始文件:[开启reuseport](./Nginx%20reuseport%20%E5%AF%BC%E8%87%B4%E5%81%B6%E5%8F%91%E6%80%A7%E5%8D%A1%E9%A1%BF/log-bench/pidstat.txt),[关闭reuseport](./Nginx%20reuseport%20%E5%AF%BC%E8%87%B4%E5%81%B6%E5%8F%91%E6%80%A7%E5%8D%A1%E9%A1%BF/log-exp2/pidstat.txt)
249 | ```
250 | # 开启reuseport
251 | Average: UID PID %usr %system %guest %wait %CPU CPU Command
252 | Average: 992 2590 1.77 9.57 0.00 1.25 11.35 - nginx
253 | Average: 992 2591 1.37 5.75 0.00 1.62 7.12 - nginx
254 |
255 | Average: UID PID cswch/s nvcswch/s Command
256 | Average: 992 2590 179.18 49.64 nginx
257 | Average: 992 2591 342.51 9.87 nginx
258 |
259 | # 关闭reuseport
260 | Average: UID PID %usr %system %guest %wait %CPU CPU Command
261 | Average: 992 2788 1.02 8.02 0.00 2.80 9.04 - nginx
262 | Average: 992 2789 0.92 9.07 0.00 2.97 9.99 - nginx
263 |
264 | Average: UID PID cswch/s nvcswch/s Command
265 | Average: 992 2788 159.06 28.68 nginx
266 | Average: 992 2789 250.26 22.93 nginx
267 | ```
268 | 惊群对于慢连接大文件下载时长略有影响:46s (开reuseport) vs 53s (关reuseport)
269 |
270 | 6. 其他的观察
271 |
272 | 最初复现的场景是所有的instance都是t2.micro,但开2个慢连接进程时比较难复现,开4个进程又太容易触发限流,所以开始考虑用大一些又没那么容易限流的instance型号。考虑到aws是通过间歇掉包来限速的,慢连接进程数量并非越大越好,引发限速后反而会造成网络连接不畅,造成慢连接卡顿,使得快连接卡顿反而不容易观测。最后选择将慢连接全链路改成t3.micro,结果好复现多了.
273 |
274 | 可以观察到有一些access.log上499的连接,各种计时也是0,这其实是因为计时也是通过worker进行的,只有进行epoll和上下文切换才会在日志上打入时间信息,worker如果一直不进行切换,那么计时就会失真,就会看到日志上计时也是0的现象。
275 |
276 | # 结论
277 | 1. reuseport是Nginx避免惊群的优秀feature,应该开启
278 | 2. 开启reuseport后如果网络情况非常好且后端服务压力不大,且存在大量慢连接时,会造成快连接卡顿,这是Nginx的worker-epoll架构带来的,原因是recv buffer一直读不完,缺乏epoll和上下文切换的条件来接受新请求、同时给多个连接发送包
279 | 3. 减小recv buffer通过人为制造卡顿,提供了epoll切换连接的条件,可以很大程度上缓解这个问题,同时带来的负面效果是有一定性能损耗。但卡顿无法根除,只能控制在可接受范围内
280 |
281 | # 参考资料
282 | 1. [Nginx 惊群 – wenfh2020](https://wenfh2020.com/2021/09/29/nginx-thundering-herd/)
283 | 2. [Nginx reuseport – wenfh2020](https://wenfh2020.com/2021/10/12/thundering-herd-tcp-reuseport/)
284 | 3. [Epoll – wenfh2020](https://wenfh2020.com/2021/11/21/question-nginx-epoll-et/)
285 | 4. [上下文切换的案例以及CPU使用率 – cnhkzyy](https://www.cnblogs.com/my_captain/p/12667016.html)
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/arch.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/Nginx reuseport 导致偶发性卡顿/arch.jpg
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/benchmark-pkg-cature1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/Nginx reuseport 导致偶发性卡顿/benchmark-pkg-cature1.png
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/benchmark-pkg-cature2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/Nginx reuseport 导致偶发性卡顿/benchmark-pkg-cature2.png
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/benchmark-pkg-cature3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/Nginx reuseport 导致偶发性卡顿/benchmark-pkg-cature3.png
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/benchmark-pkg-cature4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/Nginx reuseport 导致偶发性卡顿/benchmark-pkg-cature4.png
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/exp1-pkg-cature1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/Nginx reuseport 导致偶发性卡顿/exp1-pkg-cature1.png
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/exp2-pkg-cature1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/Nginx reuseport 导致偶发性卡顿/exp2-pkg-cature1.png
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/exp2-pkg-cature2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/Nginx reuseport 导致偶发性卡顿/exp2-pkg-cature2.png
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/log-bench/access.log.txt:
--------------------------------------------------------------------------------
1 | 172.31.91.109 [31/May/2023:08:27:36 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.045 uct="0.000" uht="0.001" urt="0.041"
2 | 172.31.91.109 [31/May/2023:08:27:35 +0000] "GET /server.pcap HTTP/1.1" status=499 body_bytes_sent=0 rt=0.000 uct="-" uht="-" urt="0.000"
3 | 172.31.91.109 [31/May/2023:08:27:36 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=1898755 rt=1.600 uct="0.221" uht="0.284" urt="0.829"
4 | 172.31.91.109 [31/May/2023:08:27:37 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=102195 rt=0.899 uct="0.418" uht="0.559" urt="0.899"
5 | 172.31.91.109 [31/May/2023:08:27:38 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=102195 rt=0.650 uct="0.449" uht="0.649" urt="0.649"
6 | 172.31.91.109 [31/May/2023:08:27:39 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=102195 rt=0.927 uct="0.437" uht="0.638" urt="0.927"
7 | 172.31.91.109 [31/May/2023:08:27:40 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=1215075 rt=0.900 uct="0.212" uht="0.311" urt="0.781"
8 | 172.31.91.109 [31/May/2023:08:27:41 +0000] "GET /server.pcap HTTP/1.1" status=499 body_bytes_sent=0 rt=0.629 uct="-" uht="-" urt="0.628"
9 | 172.31.91.109 [31/May/2023:08:27:42 +0000] "GET /server.pcap HTTP/1.1" status=499 body_bytes_sent=0 rt=0.791 uct="0.057" uht="-" urt="0.791"
10 | 172.31.91.109 [31/May/2023:08:27:43 +0000] "GET /server.pcap HTTP/1.1" status=499 body_bytes_sent=0 rt=0.686 uct="0.428" uht="-" urt="0.685"
11 | 172.31.91.109 [31/May/2023:08:27:44 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=515891 rt=0.699 uct="0.279" uht="0.379" urt="0.699"
12 | 172.31.91.109 [31/May/2023:08:27:45 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=534951 rt=0.857 uct="0.206" uht="0.347" urt="0.857"
13 | 172.31.91.109 [31/May/2023:08:27:46 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=2126089 rt=0.919 uct="0.204" uht="0.320" urt="0.668"
14 | 172.31.91.109 [31/May/2023:08:27:47 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=1394055 rt=0.667 uct="0.014" uht="0.028" urt="0.537"
15 | 172.31.91.109 [31/May/2023:08:27:48 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=110387 rt=0.709 uct="0.496" uht="0.709" urt="0.709"
16 | 172.31.91.109 [31/May/2023:08:27:49 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=102195 rt=0.790 uct="0.413" uht="0.592" urt="0.791"
17 | 172.31.91.109 [31/May/2023:08:27:50 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.058 uct="0.000" uht="0.002" urt="0.053"
18 | 172.31.91.109 [31/May/2023:08:27:51 +0000] "GET /server.pcap HTTP/1.1" status=499 body_bytes_sent=0 rt=0.000 uct="-" uht="-" urt="0.000"
19 | 172.31.91.109 [31/May/2023:08:27:51 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=102195 rt=0.763 uct="0.400" uht="0.580" urt="0.763"
20 | 172.31.91.109 [31/May/2023:08:27:52 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=102195 rt=0.767 uct="0.480" uht="0.768" urt="0.768"
21 | 172.31.91.109 [31/May/2023:08:27:53 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=580007 rt=0.773 uct="0.330" uht="0.431" urt="0.773"
22 | 172.31.91.109 [31/May/2023:08:27:55 +0000] "GET /server.pcap HTTP/1.1" status=499 body_bytes_sent=0 rt=0.000 uct="-" uht="-" urt="0.000"
23 | 172.31.91.109 [31/May/2023:08:27:55 +0000] "GET /server.pcap HTTP/1.1" status=499 body_bytes_sent=0 rt=0.000 uct="-" uht="-" urt="0.000"
24 | 172.31.91.109 [31/May/2023:08:27:56 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=534951 rt=0.851 uct="0.251" uht="0.401" urt="0.851"
25 | 172.31.91.109 [31/May/2023:08:27:57 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=2047332 rt=0.800 uct="0.231" uht="0.351" urt="0.681"
26 | 172.31.91.109 [31/May/2023:08:27:57 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.039 uct="0.003" uht="0.005" urt="0.040"
27 | 172.31.91.109 [31/May/2023:08:27:57 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.105 uct="0.016" uht="0.022" urt="0.082"
28 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.109 uct="0.024" uht="0.061" urt="0.109"
29 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.011 uct="0.001" uht="0.002" urt="0.011"
30 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.014 uct="0.000" uht="0.002" urt="0.012"
31 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.010 uct="0.000" uht="0.001" urt="0.009"
32 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.015 uct="0.001" uht="0.002" urt="0.015"
33 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.015 uct="0.000" uht="0.001" urt="0.012"
34 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.052 uct="0.027" uht="0.032" urt="0.052"
35 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.018 uct="0.000" uht="0.001" urt="0.015"
36 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.018 uct="0.000" uht="0.001" urt="0.014"
37 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.055 uct="0.002" uht="0.005" urt="0.055"
38 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.014 uct="0.002" uht="0.003" urt="0.013"
39 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.052 uct="0.002" uht="0.004" urt="0.045"
40 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.018 uct="0.001" uht="0.001" urt="0.019"
41 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.013 uct="0.000" uht="0.001" urt="0.012"
42 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.014 uct="0.000" uht="0.001" urt="0.013"
43 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.021 uct="0.008" uht="0.010" urt="0.021"
44 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.008 uct="0.000" uht="0.001" urt="0.007"
45 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.020 uct="0.000" uht="0.001" urt="0.007"
46 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.028 uct="0.004" uht="0.006" urt="0.029"
47 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.016 uct="0.000" uht="0.001" urt="0.015"
48 | 172.31.91.109 [31/May/2023:08:27:58 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.069 uct="0.023" uht="0.025" urt="0.069"
49 | 172.31.91.109 [31/May/2023:08:27:59 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.109 uct="0.004" uht="0.007" urt="0.108"
50 | 172.31.91.109 [31/May/2023:08:27:59 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.033 uct="0.002" uht="0.010" urt="0.032"
51 | 172.31.91.109 [31/May/2023:08:27:59 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.421 uct="0.025" uht="0.034" urt="0.422"
52 | 172.31.91.109 [31/May/2023:08:27:59 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.114 uct="0.000" uht="0.019" urt="0.114"
53 | 172.31.91.109 [31/May/2023:08:27:59 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.021 uct="0.001" uht="0.002" urt="0.021"
54 | 172.31.91.109 [31/May/2023:08:27:59 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.050 uct="0.024" uht="0.027" urt="0.050"
55 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.037 uct="0.003" uht="0.004" urt="0.025"
56 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.036 uct="0.001" uht="0.002" urt="0.027"
57 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.080 uct="0.002" uht="0.010" urt="0.081"
58 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.056 uct="0.002" uht="0.005" urt="0.054"
59 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.010 uct="0.001" uht="0.002" urt="0.010"
60 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.007 uct="0.000" uht="0.001" urt="0.007"
61 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.019 uct="0.000" uht="0.001" urt="0.013"
62 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.009 uct="0.001" uht="0.002" urt="0.009"
63 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.008 uct="0.000" uht="0.001" urt="0.008"
64 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.019 uct="0.001" uht="0.002" urt="0.013"
65 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.127 uct="0.018" uht="0.034" urt="0.127"
66 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.026 uct="0.000" uht="0.004" urt="0.026"
67 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.010 uct="0.000" uht="0.001" urt="0.011"
68 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.014 uct="0.000" uht="0.001" urt="0.014"
69 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.010 uct="0.001" uht="0.002" urt="0.010"
70 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.011 uct="0.000" uht="0.001" urt="0.006"
71 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.011 uct="0.000" uht="0.001" urt="0.010"
72 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.011 uct="0.000" uht="0.001" urt="0.010"
73 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.009 uct="0.001" uht="0.002" urt="0.009"
74 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.008 uct="0.000" uht="0.001" urt="0.007"
75 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.008 uct="0.000" uht="0.001" urt="0.007"
76 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.009 uct="0.000" uht="0.001" urt="0.009"
77 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.008 uct="0.000" uht="0.001" urt="0.009"
78 | 172.31.91.109 [31/May/2023:08:28:00 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.009 uct="0.000" uht="0.001" urt="0.008"
79 | 172.31.91.109 [31/May/2023:08:28:01 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.012 uct="0.001" uht="0.002" urt="0.013"
80 | 172.31.91.109 [31/May/2023:08:28:01 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.010 uct="0.000" uht="0.001" urt="0.010"
81 | 172.31.91.109 [31/May/2023:08:28:01 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.021 uct="0.000" uht="0.001" urt="0.018"
82 | 172.31.91.109 [31/May/2023:08:28:01 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.025 uct="0.002" uht="0.006" urt="0.025"
83 | 172.31.91.109 [31/May/2023:08:28:01 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.015 uct="0.002" uht="0.003" urt="0.015"
84 | 172.31.91.109 [31/May/2023:08:28:01 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.028 uct="0.005" uht="0.006" urt="0.028"
85 | 172.31.91.109 [31/May/2023:08:28:01 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.028 uct="0.000" uht="0.001" urt="0.027"
86 | 172.31.91.109 [31/May/2023:08:28:01 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.036 uct="0.000" uht="0.001" urt="0.035"
87 | 172.31.91.109 [31/May/2023:08:28:01 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.028 uct="0.001" uht="0.002" urt="0.028"
88 | 172.31.91.109 [31/May/2023:08:28:01 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.066 uct="0.002" uht="0.005" urt="0.049"
89 | 172.31.91.109 [31/May/2023:08:28:01 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.025 uct="0.000" uht="0.001" urt="0.019"
90 | 172.31.91.109 [31/May/2023:08:28:01 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.009 uct="0.001" uht="0.002" urt="0.009"
91 | 172.31.91.109 [31/May/2023:08:28:01 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.010 uct="0.001" uht="0.001" urt="0.009"
92 | 172.31.91.109 [31/May/2023:08:28:01 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.009 uct="0.000" uht="0.001" urt="0.007"
93 | 172.31.91.109 [31/May/2023:08:28:01 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.008 uct="0.000" uht="0.001" urt="0.008"
94 | 172.31.91.109 [31/May/2023:08:28:01 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.009 uct="0.000" uht="0.001" urt="0.008"
95 | 172.31.91.109 [31/May/2023:08:28:01 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.009 uct="0.000" uht="0.001" urt="0.008"
96 | 172.31.91.109 [31/May/2023:08:28:01 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.013 uct="0.000" uht="0.001" urt="0.010"
97 | 172.31.92.10 [31/May/2023:08:28:08 +0000] "GET /0000000000000000.data HTTP/1.1" status=200 body_bytes_sent=1725506917 rt=46.019 uct="0.001" uht="0.002" urt="46.020"
98 | 172.31.92.10 [31/May/2023:08:28:10 +0000] "GET /0000000000000000.data HTTP/1.1" status=200 body_bytes_sent=1585081583 rt=46.161 uct="0.001" uht="0.009" urt="46.162"
99 | 172.31.92.10 [31/May/2023:08:28:11 +0000] "GET /0000000000000000.data HTTP/1.1" status=200 body_bytes_sent=1884114736 rt=46.402 uct="0.000" uht="0.001" urt="46.401"
100 | 172.31.92.10 [31/May/2023:08:28:12 +0000] "GET /0000000000000000.data HTTP/1.1" status=200 body_bytes_sent=2037825028 rt=46.613 uct="0.000" uht="0.001" urt="46.614"
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/log-bench/client-runtime.txt:
--------------------------------------------------------------------------------
1 | [2023-05-31 08:27:32,127] runtime=1010
2 | [2023-05-31 08:27:33,140] runtime=1009
3 | [2023-05-31 08:27:34,152] runtime=38
4 | [2023-05-31 08:27:34,192] runtime=1011
5 | [2023-05-31 08:27:35,205] runtime=37
6 | [2023-05-31 08:27:35,245] runtime=1008
7 | [2023-05-31 08:27:36,256] runtime=57
8 | [2023-05-31 08:27:36,315] runtime=1011
9 | [2023-05-31 08:27:37,328] runtime=1010
10 | [2023-05-31 08:27:38,341] runtime=1009
11 | [2023-05-31 08:27:39,353] runtime=1008
12 | [2023-05-31 08:27:40,364] runtime=1010
13 | [2023-05-31 08:27:41,376] runtime=1010
14 | [2023-05-31 08:27:42,389] runtime=1010
15 | [2023-05-31 08:27:43,402] runtime=1009
16 | [2023-05-31 08:27:44,413] runtime=1009
17 | [2023-05-31 08:27:45,425] runtime=1007
18 | [2023-05-31 08:27:46,435] runtime=1009
19 | [2023-05-31 08:27:47,447] runtime=1010
20 | [2023-05-31 08:27:48,460] runtime=1010
21 | [2023-05-31 08:27:49,472] runtime=1010
22 | [2023-05-31 08:27:50,485] runtime=87
23 | [2023-05-31 08:27:50,575] runtime=1010
24 | [2023-05-31 08:27:51,587] runtime=1010
25 | [2023-05-31 08:27:52,600] runtime=1008
26 | [2023-05-31 08:27:53,611] runtime=1010
27 | [2023-05-31 08:27:54,623] runtime=1010
28 | [2023-05-31 08:27:55,636] runtime=1009
29 | [2023-05-31 08:27:56,647] runtime=1008
30 | [2023-05-31 08:27:57,658] runtime=50
31 | [2023-05-31 08:27:57,711] runtime=236
32 | [2023-05-31 08:27:57,950] runtime=148
33 | [2023-05-31 08:27:58,101] runtime=22
34 | [2023-05-31 08:27:58,125] runtime=26
35 | [2023-05-31 08:27:58,154] runtime=19
36 | [2023-05-31 08:27:58,176] runtime=25
37 | [2023-05-31 08:27:58,204] runtime=26
38 | [2023-05-31 08:27:58,233] runtime=99
39 | [2023-05-31 08:27:58,334] runtime=29
40 | [2023-05-31 08:27:58,366] runtime=30
41 | [2023-05-31 08:27:58,399] runtime=107
42 | [2023-05-31 08:27:58,509] runtime=28
43 | [2023-05-31 08:27:58,539] runtime=66
44 | [2023-05-31 08:27:58,608] runtime=32
45 | [2023-05-31 08:27:58,642] runtime=24
46 | [2023-05-31 08:27:58,668] runtime=27
47 | [2023-05-31 08:27:58,697] runtime=35
48 | [2023-05-31 08:27:58,735] runtime=18
49 | [2023-05-31 08:27:58,755] runtime=32
50 | [2023-05-31 08:27:58,790] runtime=60
51 | [2023-05-31 08:27:58,852] runtime=26
52 | [2023-05-31 08:27:58,881] runtime=144
53 | [2023-05-31 08:27:59,027] runtime=183
54 | [2023-05-31 08:27:59,213] runtime=44
55 | [2023-05-31 08:27:59,260] runtime=468
56 | [2023-05-31 08:27:59,731] runtime=132
57 | [2023-05-31 08:27:59,866] runtime=33
58 | [2023-05-31 08:27:59,902] runtime=98
59 | [2023-05-31 08:28:00,003] runtime=50
60 | [2023-05-31 08:28:00,056] runtime=62
61 | [2023-05-31 08:28:00,120] runtime=98
62 | [2023-05-31 08:28:00,221] runtime=68
63 | [2023-05-31 08:28:00,292] runtime=20
64 | [2023-05-31 08:28:00,315] runtime=20
65 | [2023-05-31 08:28:00,337] runtime=33
66 | [2023-05-31 08:28:00,373] runtime=20
67 | [2023-05-31 08:28:00,396] runtime=20
68 | [2023-05-31 08:28:00,419] runtime=31
69 | [2023-05-31 08:28:00,453] runtime=197
70 | [2023-05-31 08:28:00,653] runtime=39
71 | [2023-05-31 08:28:00,695] runtime=23
72 | [2023-05-31 08:28:00,721] runtime=26
73 | [2023-05-31 08:28:00,750] runtime=20
74 | [2023-05-31 08:28:00,773] runtime=23
75 | [2023-05-31 08:28:00,799] runtime=21
76 | [2023-05-31 08:28:00,823] runtime=23
77 | [2023-05-31 08:28:00,849] runtime=21
78 | [2023-05-31 08:28:00,873] runtime=20
79 | [2023-05-31 08:28:00,896] runtime=20
80 | [2023-05-31 08:28:00,919] runtime=22
81 | [2023-05-31 08:28:00,943] runtime=19
82 | [2023-05-31 08:28:00,965] runtime=19
83 | [2023-05-31 08:28:00,987] runtime=24
84 | [2023-05-31 08:28:01,014] runtime=24
85 | [2023-05-31 08:28:01,041] runtime=32
86 | [2023-05-31 08:28:01,076] runtime=40
87 | [2023-05-31 08:28:01,119] runtime=42
88 | [2023-05-31 08:28:01,163] runtime=43
89 | [2023-05-31 08:28:01,208] runtime=40
90 | [2023-05-31 08:28:01,251] runtime=66
91 | [2023-05-31 08:28:01,320] runtime=45
92 | [2023-05-31 08:28:01,368] runtime=79
93 | [2023-05-31 08:28:01,450] runtime=36
94 | [2023-05-31 08:28:01,489] runtime=25
95 | [2023-05-31 08:28:01,517] runtime=21
96 | [2023-05-31 08:28:01,541] runtime=21
97 | [2023-05-31 08:28:01,565] runtime=19
98 | [2023-05-31 08:28:01,586] runtime=21
99 | [2023-05-31 08:28:01,610] runtime=22
100 | [2023-05-31 08:28:01,635] runtime=26
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/log-bench/nginx-case-client.pcap:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/Nginx reuseport 导致偶发性卡顿/log-bench/nginx-case-client.pcap
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/log-bench/readme.md:
--------------------------------------------------------------------------------
1 | benchmark log
2 | - reuseport on
3 | - no recv buffer limit
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/log-exp1/access.log.txt:
--------------------------------------------------------------------------------
1 | 172.31.91.109 [05/Jun/2023:06:13:22 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.101 uct="0.001" uht="0.004" urt="0.101"
2 | 172.31.91.109 [05/Jun/2023:06:13:22 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.064 uct="0.001" uht="0.002" urt="0.064"
3 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.044 uct="0.000" uht="0.001" urt="0.044"
4 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.047 uct="0.000" uht="0.001" urt="0.047"
5 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.100 uct="0.000" uht="0.001" urt="0.099"
6 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.047 uct="0.000" uht="0.001" urt="0.047"
7 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.045 uct="0.001" uht="0.002" urt="0.045"
8 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.066 uct="0.000" uht="0.002" urt="0.066"
9 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.055 uct="0.000" uht="0.001" urt="0.055"
10 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.042 uct="0.001" uht="0.002" urt="0.043"
11 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.065 uct="0.000" uht="0.002" urt="0.065"
12 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.043 uct="0.000" uht="0.001" urt="0.043"
13 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.049 uct="0.000" uht="0.001" urt="0.049"
14 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.059 uct="0.000" uht="0.002" urt="0.059"
15 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.062 uct="0.001" uht="0.002" urt="0.062"
16 | 172.31.91.109 [05/Jun/2023:06:13:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.072 uct="0.000" uht="0.002" urt="0.072"
17 | 172.31.91.109 [05/Jun/2023:06:13:24 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.058 uct="0.000" uht="0.001" urt="0.058"
18 | 172.31.91.109 [05/Jun/2023:06:13:24 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.071 uct="0.001" uht="0.002" urt="0.071"
19 | 172.31.91.109 [05/Jun/2023:06:13:24 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.063 uct="0.001" uht="0.003" urt="0.064"
20 | 172.31.91.109 [05/Jun/2023:06:13:24 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.046 uct="0.000" uht="0.001" urt="0.045"
21 | 172.31.91.109 [05/Jun/2023:06:13:24 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.077 uct="0.000" uht="0.002" urt="0.078"
22 | 172.31.91.109 [05/Jun/2023:06:13:24 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.051 uct="0.001" uht="0.002" urt="0.052"
23 | 172.31.91.109 [05/Jun/2023:06:13:24 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.069 uct="0.001" uht="0.002" urt="0.069"
24 | 172.31.91.109 [05/Jun/2023:06:13:24 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.047 uct="0.001" uht="0.002" urt="0.047"
25 | 172.31.91.109 [05/Jun/2023:06:13:24 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.076 uct="0.000" uht="0.001" urt="0.075"
26 | 172.31.91.109 [05/Jun/2023:06:13:24 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.062 uct="0.002" uht="0.004" urt="0.062"
27 | 172.31.91.109 [05/Jun/2023:06:13:24 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.056 uct="0.002" uht="0.005" urt="0.056"
28 | 172.31.91.109 [05/Jun/2023:06:13:24 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.054 uct="0.000" uht="0.001" urt="0.054"
29 | 172.31.91.109 [05/Jun/2023:06:13:24 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.042 uct="0.000" uht="0.002" urt="0.042"
30 | 172.31.91.109 [05/Jun/2023:06:13:24 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.054 uct="0.000" uht="0.001" urt="0.053"
31 | 172.31.91.109 [05/Jun/2023:06:13:25 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.073 uct="0.001" uht="0.002" urt="0.073"
32 | 172.31.91.109 [05/Jun/2023:06:13:25 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.127 uct="0.001" uht="0.002" urt="0.127"
33 | 172.31.91.109 [05/Jun/2023:06:13:25 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.054 uct="0.000" uht="0.001" urt="0.054"
34 | 172.31.91.109 [05/Jun/2023:06:13:25 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.055 uct="0.001" uht="0.002" urt="0.055"
35 | 172.31.91.109 [05/Jun/2023:06:13:25 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.042 uct="0.000" uht="0.001" urt="0.042"
36 | 172.31.91.109 [05/Jun/2023:06:13:25 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.041 uct="0.000" uht="0.002" urt="0.041"
37 | 172.31.91.109 [05/Jun/2023:06:13:25 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.085 uct="0.000" uht="0.001" urt="0.085"
38 | 172.31.91.109 [05/Jun/2023:06:13:25 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.050 uct="0.000" uht="0.001" urt="0.049"
39 | 172.31.91.109 [05/Jun/2023:06:13:25 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.045 uct="0.000" uht="0.002" urt="0.045"
40 | 172.31.91.109 [05/Jun/2023:06:13:25 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.055 uct="0.000" uht="0.001" urt="0.056"
41 | 172.31.91.109 [05/Jun/2023:06:13:25 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.061 uct="0.002" uht="0.004" urt="0.062"
42 | 172.31.91.109 [05/Jun/2023:06:13:25 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.066 uct="0.000" uht="0.001" urt="0.065"
43 | 172.31.91.109 [05/Jun/2023:06:13:25 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.063 uct="0.003" uht="0.005" urt="0.064"
44 | 172.31.91.109 [05/Jun/2023:06:13:26 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.039 uct="0.001" uht="0.002" urt="0.039"
45 | 172.31.91.109 [05/Jun/2023:06:13:26 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.073 uct="0.000" uht="0.001" urt="0.073"
46 | 172.31.91.109 [05/Jun/2023:06:13:26 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.043 uct="0.000" uht="0.002" urt="0.043"
47 | 172.31.91.109 [05/Jun/2023:06:13:26 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.053 uct="0.000" uht="0.001" urt="0.053"
48 | 172.31.91.109 [05/Jun/2023:06:13:26 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.062 uct="0.000" uht="0.001" urt="0.062"
49 | 172.31.91.109 [05/Jun/2023:06:13:26 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.054 uct="0.002" uht="0.004" urt="0.054"
50 | 172.31.91.109 [05/Jun/2023:06:13:26 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.058 uct="0.000" uht="0.002" urt="0.058"
51 | 172.31.91.109 [05/Jun/2023:06:13:26 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.054 uct="0.000" uht="0.001" urt="0.053"
52 | 172.31.91.109 [05/Jun/2023:06:13:26 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.075 uct="0.000" uht="0.008" urt="0.075"
53 | 172.31.91.109 [05/Jun/2023:06:13:26 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.039 uct="0.000" uht="0.001" urt="0.039"
54 | 172.31.91.109 [05/Jun/2023:06:13:26 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.060 uct="0.000" uht="0.001" urt="0.060"
55 | 172.31.91.109 [05/Jun/2023:06:13:26 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.069 uct="0.000" uht="0.002" urt="0.069"
56 | 172.31.91.109 [05/Jun/2023:06:13:26 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.041 uct="0.001" uht="0.002" urt="0.042"
57 | 172.31.91.109 [05/Jun/2023:06:13:26 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.055 uct="0.000" uht="0.002" urt="0.055"
58 | 172.31.91.109 [05/Jun/2023:06:13:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.053 uct="0.000" uht="0.001" urt="0.053"
59 | 172.31.91.109 [05/Jun/2023:06:13:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.068 uct="0.000" uht="0.001" urt="0.067"
60 | 172.31.91.109 [05/Jun/2023:06:13:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.058 uct="0.001" uht="0.002" urt="0.058"
61 | 172.31.91.109 [05/Jun/2023:06:13:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.052 uct="0.002" uht="0.004" urt="0.052"
62 | 172.31.91.109 [05/Jun/2023:06:13:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.041 uct="0.000" uht="0.002" urt="0.041"
63 | 172.31.91.109 [05/Jun/2023:06:13:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.048 uct="0.000" uht="0.001" urt="0.048"
64 | 172.31.91.109 [05/Jun/2023:06:13:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.054 uct="0.000" uht="0.001" urt="0.054"
65 | 172.31.91.109 [05/Jun/2023:06:13:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.057 uct="0.000" uht="0.001" urt="0.058"
66 | 172.31.91.109 [05/Jun/2023:06:13:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.066 uct="0.000" uht="0.001" urt="0.066"
67 | 172.31.91.109 [05/Jun/2023:06:13:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.073 uct="0.000" uht="0.001" urt="0.072"
68 | 172.31.91.109 [05/Jun/2023:06:13:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.086 uct="0.000" uht="0.002" urt="0.086"
69 | 172.31.91.109 [05/Jun/2023:06:13:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.059 uct="0.001" uht="0.002" urt="0.060"
70 | 172.31.91.109 [05/Jun/2023:06:13:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.065 uct="0.002" uht="0.003" urt="0.065"
71 | 172.31.91.109 [05/Jun/2023:06:13:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.054 uct="0.001" uht="0.002" urt="0.054"
72 | 172.31.91.109 [05/Jun/2023:06:13:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.051 uct="0.001" uht="0.002" urt="0.050"
73 | 172.31.91.109 [05/Jun/2023:06:13:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.057 uct="0.000" uht="0.001" urt="0.056"
74 | 172.31.91.109 [05/Jun/2023:06:13:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.050 uct="0.000" uht="0.001" urt="0.050"
75 | 172.31.91.109 [05/Jun/2023:06:13:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.058 uct="0.000" uht="0.001" urt="0.058"
76 | 172.31.91.109 [05/Jun/2023:06:13:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.087 uct="0.000" uht="0.001" urt="0.088"
77 | 172.31.91.109 [05/Jun/2023:06:13:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.069 uct="0.001" uht="0.002" urt="0.069"
78 | 172.31.91.109 [05/Jun/2023:06:13:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.076 uct="0.001" uht="0.002" urt="0.076"
79 | 172.31.91.109 [05/Jun/2023:06:13:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.051 uct="0.001" uht="0.003" urt="0.051"
80 | 172.31.91.109 [05/Jun/2023:06:13:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.059 uct="0.000" uht="0.001" urt="0.059"
81 | 172.31.91.109 [05/Jun/2023:06:13:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.052 uct="0.000" uht="0.001" urt="0.052"
82 | 172.31.91.109 [05/Jun/2023:06:13:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.069 uct="0.000" uht="0.002" urt="0.069"
83 | 172.31.91.109 [05/Jun/2023:06:13:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.061 uct="0.000" uht="0.002" urt="0.061"
84 | 172.31.91.109 [05/Jun/2023:06:13:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.052 uct="0.000" uht="0.001" urt="0.053"
85 | 172.31.91.109 [05/Jun/2023:06:13:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.087 uct="0.002" uht="0.004" urt="0.087"
86 | 172.31.91.109 [05/Jun/2023:06:13:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.073 uct="0.000" uht="0.001" urt="0.072"
87 | 172.31.91.109 [05/Jun/2023:06:13:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.043 uct="0.000" uht="0.001" urt="0.043"
88 | 172.31.91.109 [05/Jun/2023:06:13:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.041 uct="0.000" uht="0.002" urt="0.041"
89 | 172.31.91.109 [05/Jun/2023:06:13:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.045 uct="0.000" uht="0.001" urt="0.044"
90 | 172.31.91.109 [05/Jun/2023:06:13:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.084 uct="0.000" uht="0.001" urt="0.084"
91 | 172.31.91.109 [05/Jun/2023:06:13:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.069 uct="0.001" uht="0.002" urt="0.069"
92 | 172.31.91.109 [05/Jun/2023:06:13:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.120 uct="0.000" uht="0.001" urt="0.120"
93 | 172.31.91.109 [05/Jun/2023:06:13:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.050 uct="0.000" uht="0.001" urt="0.050"
94 | 172.31.91.109 [05/Jun/2023:06:13:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.052 uct="0.000" uht="0.002" urt="0.052"
95 | 172.31.91.109 [05/Jun/2023:06:13:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.049 uct="0.000" uht="0.001" urt="0.050"
96 | 172.31.91.109 [05/Jun/2023:06:13:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.055 uct="0.000" uht="0.002" urt="0.055"
97 | 172.31.91.109 [05/Jun/2023:06:13:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.052 uct="0.000" uht="0.001" urt="0.053"
98 | 172.31.91.109 [05/Jun/2023:06:13:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.044 uct="0.001" uht="0.002" urt="0.044"
99 | 172.31.91.109 [05/Jun/2023:06:13:30 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.050 uct="0.000" uht="0.002" urt="0.051"
100 | 172.31.91.109 [05/Jun/2023:06:13:30 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.051 uct="0.000" uht="0.001" urt="0.052"
101 | 172.31.92.10 [05/Jun/2023:06:14:06 +0000] "GET /0000000000000000.data HTTP/1.1" status=200 body_bytes_sent=2147483648 rt=54.527 uct="0.001" uht="0.002" urt="54.528"
102 | 172.31.92.10 [05/Jun/2023:06:14:06 +0000] "GET /0000000000000000.data HTTP/1.1" status=200 body_bytes_sent=2147483648 rt=52.633 uct="0.000" uht="0.001" urt="52.632"
103 | 172.31.92.10 [05/Jun/2023:06:14:08 +0000] "GET /0000000000000000.data HTTP/1.1" status=200 body_bytes_sent=2147483648 rt=53.197 uct="0.001" uht="0.002" urt="53.198"
104 | 172.31.92.10 [05/Jun/2023:06:14:10 +0000] "GET /0000000000000000.data HTTP/1.1" status=200 body_bytes_sent=2147483648 rt=53.077 uct="0.000" uht="0.002" urt="53.077"
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/log-exp1/client-runtime.txt:
--------------------------------------------------------------------------------
1 | [2023-06-05 06:13:22,791] runtime=120
2 | [2023-06-05 06:13:22,913] runtime=82
3 | [2023-06-05 06:13:22,997] runtime=54
4 | [2023-06-05 06:13:23,054] runtime=61
5 | [2023-06-05 06:13:23,118] runtime=109
6 | [2023-06-05 06:13:23,229] runtime=58
7 | [2023-06-05 06:13:23,290] runtime=55
8 | [2023-06-05 06:13:23,347] runtime=79
9 | [2023-06-05 06:13:23,429] runtime=65
10 | [2023-06-05 06:13:23,497] runtime=53
11 | [2023-06-05 06:13:23,553] runtime=75
12 | [2023-06-05 06:13:23,631] runtime=54
13 | [2023-06-05 06:13:23,688] runtime=59
14 | [2023-06-05 06:13:23,750] runtime=70
15 | [2023-06-05 06:13:23,822] runtime=73
16 | [2023-06-05 06:13:23,898] runtime=83
17 | [2023-06-05 06:13:23,983] runtime=68
18 | [2023-06-05 06:13:24,054] runtime=82
19 | [2023-06-05 06:13:24,138] runtime=74
20 | [2023-06-05 06:13:24,215] runtime=57
21 | [2023-06-05 06:13:24,275] runtime=87
22 | [2023-06-05 06:13:24,365] runtime=62
23 | [2023-06-05 06:13:24,430] runtime=78
24 | [2023-06-05 06:13:24,511] runtime=57
25 | [2023-06-05 06:13:24,571] runtime=86
26 | [2023-06-05 06:13:24,660] runtime=75
27 | [2023-06-05 06:13:24,738] runtime=67
28 | [2023-06-05 06:13:24,808] runtime=65
29 | [2023-06-05 06:13:24,876] runtime=52
30 | [2023-06-05 06:13:24,930] runtime=64
31 | [2023-06-05 06:13:24,997] runtime=83
32 | [2023-06-05 06:13:25,083] runtime=137
33 | [2023-06-05 06:13:25,223] runtime=64
34 | [2023-06-05 06:13:25,290] runtime=66
35 | [2023-06-05 06:13:25,359] runtime=51
36 | [2023-06-05 06:13:25,413] runtime=52
37 | [2023-06-05 06:13:25,467] runtime=95
38 | [2023-06-05 06:13:25,565] runtime=60
39 | [2023-06-05 06:13:25,628] runtime=55
40 | [2023-06-05 06:13:25,686] runtime=66
41 | [2023-06-05 06:13:25,755] runtime=72
42 | [2023-06-05 06:13:25,830] runtime=78
43 | [2023-06-05 06:13:25,910] runtime=75
44 | [2023-06-05 06:13:25,988] runtime=51
45 | [2023-06-05 06:13:26,042] runtime=83
46 | [2023-06-05 06:13:26,127] runtime=55
47 | [2023-06-05 06:13:26,185] runtime=63
48 | [2023-06-05 06:13:26,250] runtime=74
49 | [2023-06-05 06:13:26,327] runtime=64
50 | [2023-06-05 06:13:26,394] runtime=69
51 | [2023-06-05 06:13:26,465] runtime=64
52 | [2023-06-05 06:13:26,531] runtime=87
53 | [2023-06-05 06:13:26,621] runtime=49
54 | [2023-06-05 06:13:26,673] runtime=71
55 | [2023-06-05 06:13:26,746] runtime=80
56 | [2023-06-05 06:13:26,829] runtime=52
57 | [2023-06-05 06:13:26,884] runtime=65
58 | [2023-06-05 06:13:26,952] runtime=64
59 | [2023-06-05 06:13:27,018] runtime=78
60 | [2023-06-05 06:13:27,099] runtime=69
61 | [2023-06-05 06:13:27,171] runtime=63
62 | [2023-06-05 06:13:27,237] runtime=53
63 | [2023-06-05 06:13:27,293] runtime=61
64 | [2023-06-05 06:13:27,357] runtime=64
65 | [2023-06-05 06:13:27,424] runtime=69
66 | [2023-06-05 06:13:27,496] runtime=75
67 | [2023-06-05 06:13:27,574] runtime=84
68 | [2023-06-05 06:13:27,660] runtime=97
69 | [2023-06-05 06:13:27,760] runtime=70
70 | [2023-06-05 06:13:27,833] runtime=75
71 | [2023-06-05 06:13:27,910] runtime=66
72 | [2023-06-05 06:13:27,978] runtime=62
73 | [2023-06-05 06:13:28,042] runtime=67
74 | [2023-06-05 06:13:28,111] runtime=61
75 | [2023-06-05 06:13:28,175] runtime=68
76 | [2023-06-05 06:13:28,246] runtime=97
77 | [2023-06-05 06:13:28,346] runtime=80
78 | [2023-06-05 06:13:28,429] runtime=86
79 | [2023-06-05 06:13:28,518] runtime=62
80 | [2023-06-05 06:13:28,583] runtime=69
81 | [2023-06-05 06:13:28,655] runtime=62
82 | [2023-06-05 06:13:28,720] runtime=79
83 | [2023-06-05 06:13:28,802] runtime=72
84 | [2023-06-05 06:13:28,877] runtime=62
85 | [2023-06-05 06:13:28,942] runtime=98
86 | [2023-06-05 06:13:29,042] runtime=83
87 | [2023-06-05 06:13:29,128] runtime=53
88 | [2023-06-05 06:13:29,184] runtime=51
89 | [2023-06-05 06:13:29,237] runtime=56
90 | [2023-06-05 06:13:29,296] runtime=93
91 | [2023-06-05 06:13:29,392] runtime=79
92 | [2023-06-05 06:13:29,473] runtime=131
93 | [2023-06-05 06:13:29,607] runtime=60
94 | [2023-06-05 06:13:29,670] runtime=62
95 | [2023-06-05 06:13:29,735] runtime=60
96 | [2023-06-05 06:13:29,798] runtime=65
97 | [2023-06-05 06:13:29,865] runtime=63
98 | [2023-06-05 06:13:29,931] runtime=53
99 | [2023-06-05 06:13:29,987] runtime=60
100 | [2023-06-05 06:13:30,050] runtime=61
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/log-exp1/nginx-case-client.pcap:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/Nginx reuseport 导致偶发性卡顿/log-exp1/nginx-case-client.pcap
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/log-exp1/readme.md:
--------------------------------------------------------------------------------
1 | exp1 log
2 | - reuseport on
3 | - with recv buffer limit
4 | `sysctl -w net.ipv4.tcp_rmem="40960 40960 40960"`
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/log-exp2/access.log.txt:
--------------------------------------------------------------------------------
1 | 172.31.91.109 [05/Jun/2023:06:38:02 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=204595 rt=0.844 uct="0.362" uht="0.539" urt="0.845"
2 | 172.31.91.109 [05/Jun/2023:06:38:03 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=204595 rt=0.907 uct="0.334" uht="0.476" urt="0.906"
3 | 172.31.91.109 [05/Jun/2023:06:38:04 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=543900 rt=0.836 uct="0.319" uht="0.504" urt="0.836"
4 | 172.31.91.109 [05/Jun/2023:06:38:05 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=204595 rt=0.831 uct="0.161" uht="0.480" urt="0.830"
5 | 172.31.91.109 [05/Jun/2023:06:38:06 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=552849 rt=0.820 uct="0.180" uht="0.329" urt="0.819"
6 | 172.31.91.109 [05/Jun/2023:06:38:07 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=204595 rt=0.800 uct="0.122" uht="0.462" urt="0.800"
7 | 172.31.91.109 [05/Jun/2023:06:38:08 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=543900 rt=0.871 uct="0.251" uht="0.380" urt="0.871"
8 | 172.31.91.109 [05/Jun/2023:06:38:08 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.168 uct="0.003" uht="0.005" urt="0.168"
9 | 172.31.91.109 [05/Jun/2023:06:38:09 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.097 uct="0.003" uht="0.005" urt="0.096"
10 | 172.31.91.109 [05/Jun/2023:06:38:09 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.049 uct="0.000" uht="0.003" urt="0.049"
11 | 172.31.91.109 [05/Jun/2023:06:38:09 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.734 uct="0.002" uht="0.004" urt="0.215"
12 | 172.31.91.109 [05/Jun/2023:06:38:12 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=578939 rt=1.005 uct="0.289" uht="0.477" urt="1.006"
13 | 172.31.91.109 [05/Jun/2023:06:38:13 +0000] "GET /server.pcap HTTP/1.1" status=499 body_bytes_sent=0 rt=0.910 uct="-" uht="-" urt="0.910"
14 | 172.31.91.109 [05/Jun/2023:06:38:14 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=204595 rt=0.838 uct="0.338" uht="0.491" urt="0.838"
15 | 172.31.91.109 [05/Jun/2023:06:38:15 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=208691 rt=0.807 uct="0.329" uht="0.500" urt="0.807"
16 | 172.31.91.109 [05/Jun/2023:06:38:16 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=543900 rt=0.980 uct="0.340" uht="0.481" urt="0.981"
17 | 172.31.91.109 [05/Jun/2023:06:38:17 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=204595 rt=0.748 uct="0.272" uht="0.422" urt="0.749"
18 | 172.31.91.109 [05/Jun/2023:06:38:18 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=543900 rt=1.000 uct="0.308" uht="0.499" urt="0.999"
19 | 172.31.91.109 [05/Jun/2023:06:38:19 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=543900 rt=0.938 uct="0.321" uht="0.470" urt="0.939"
20 | 172.31.91.109 [05/Jun/2023:06:38:20 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=102195 rt=0.790 uct="0.322" uht="0.638" urt="0.790"
21 | 172.31.91.109 [05/Jun/2023:06:38:21 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=204595 rt=0.934 uct="0.327" uht="0.484" urt="0.934"
22 | 172.31.91.109 [05/Jun/2023:06:38:22 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=534951 rt=0.822 uct="0.161" uht="0.332" urt="0.822"
23 | 172.31.91.109 [05/Jun/2023:06:38:23 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=543900 rt=0.970 uct="0.278" uht="0.460" urt="0.970"
24 | 172.31.91.109 [05/Jun/2023:06:38:24 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=200499 rt=0.779 uct="0.139" uht="0.439" urt="0.779"
25 | 172.31.91.109 [05/Jun/2023:06:38:25 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=204595 rt=0.798 uct="0.340" uht="0.507" urt="0.797"
26 | 172.31.91.109 [05/Jun/2023:06:38:26 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3256204 rt=0.907 uct="0.157" uht="0.239" urt="0.548"
27 | 172.31.91.109 [05/Jun/2023:06:38:26 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.639 uct="0.060" uht="0.180" urt="0.429"
28 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.287 uct="0.003" uht="0.004" urt="0.287"
29 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.018 uct="0.001" uht="0.002" urt="0.011"
30 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.018 uct="0.001" uht="0.003" urt="0.018"
31 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.009 uct="0.000" uht="0.001" urt="0.009"
32 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.027 uct="0.000" uht="0.002" urt="0.014"
33 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.012 uct="0.000" uht="0.001" urt="0.008"
34 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.012 uct="0.002" uht="0.003" urt="0.011"
35 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.062 uct="0.001" uht="0.002" urt="0.062"
36 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.009 uct="0.002" uht="0.003" urt="0.009"
37 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.010 uct="0.000" uht="0.001" urt="0.009"
38 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.023 uct="0.000" uht="0.001" urt="0.013"
39 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.011 uct="0.000" uht="0.002" urt="0.012"
40 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.014 uct="0.001" uht="0.002" urt="0.014"
41 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.010 uct="0.000" uht="0.001" urt="0.009"
42 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.014 uct="0.002" uht="0.004" urt="0.013"
43 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.022 uct="0.001" uht="0.002" urt="0.021"
44 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.042 uct="0.002" uht="0.004" urt="0.043"
45 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.029 uct="0.001" uht="0.001" urt="0.030"
46 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.018 uct="0.002" uht="0.003" urt="0.018"
47 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.025 uct="0.000" uht="0.001" urt="0.024"
48 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.013 uct="0.002" uht="0.003" urt="0.012"
49 | 172.31.91.109 [05/Jun/2023:06:38:27 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.016 uct="0.001" uht="0.003" urt="0.016"
50 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.024 uct="0.000" uht="0.002" urt="0.009"
51 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.012 uct="0.001" uht="0.003" urt="0.012"
52 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.015 uct="0.001" uht="0.002" urt="0.013"
53 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.009 uct="0.000" uht="0.001" urt="0.007"
54 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.008 uct="0.000" uht="0.001" urt="0.007"
55 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.016 uct="0.000" uht="0.001" urt="0.010"
56 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.017 uct="0.000" uht="0.001" urt="0.017"
57 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.014 uct="0.000" uht="0.001" urt="0.010"
58 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.013 uct="0.000" uht="0.001" urt="0.010"
59 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.010 uct="0.001" uht="0.001" urt="0.010"
60 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.009 uct="0.001" uht="0.002" urt="0.009"
61 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.010 uct="0.000" uht="0.001" urt="0.010"
62 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.018 uct="0.000" uht="0.001" urt="0.010"
63 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.020 uct="0.000" uht="0.003" urt="0.014"
64 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.011 uct="0.000" uht="0.001" urt="0.008"
65 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.015 uct="0.001" uht="0.002" urt="0.014"
66 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.010 uct="0.001" uht="0.001" urt="0.010"
67 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.014 uct="0.000" uht="0.001" urt="0.009"
68 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.012 uct="0.001" uht="0.002" urt="0.009"
69 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.036 uct="0.000" uht="0.001" urt="0.008"
70 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.040 uct="0.001" uht="0.002" urt="0.011"
71 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.039 uct="0.000" uht="0.001" urt="0.008"
72 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.046 uct="0.000" uht="0.001" urt="0.014"
73 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.041 uct="0.000" uht="0.001" urt="0.010"
74 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.034 uct="0.000" uht="0.001" urt="0.007"
75 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.041 uct="0.000" uht="0.001" urt="0.009"
76 | 172.31.91.109 [05/Jun/2023:06:38:28 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.040 uct="0.000" uht="0.001" urt="0.008"
77 | 172.31.91.109 [05/Jun/2023:06:38:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.039 uct="0.001" uht="0.002" urt="0.008"
78 | 172.31.91.109 [05/Jun/2023:06:38:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.046 uct="0.001" uht="0.002" urt="0.009"
79 | 172.31.91.109 [05/Jun/2023:06:38:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.032 uct="0.001" uht="0.002" urt="0.008"
80 | 172.31.91.109 [05/Jun/2023:06:38:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.039 uct="0.000" uht="0.001" urt="0.014"
81 | 172.31.91.109 [05/Jun/2023:06:38:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.050 uct="0.000" uht="0.001" urt="0.008"
82 | 172.31.91.109 [05/Jun/2023:06:38:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.032 uct="0.001" uht="0.001" urt="0.009"
83 | 172.31.91.109 [05/Jun/2023:06:38:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.040 uct="0.001" uht="0.002" urt="0.010"
84 | 172.31.91.109 [05/Jun/2023:06:38:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.048 uct="0.000" uht="0.001" urt="0.008"
85 | 172.31.91.109 [05/Jun/2023:06:38:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.040 uct="0.001" uht="0.002" urt="0.007"
86 | 172.31.91.109 [05/Jun/2023:06:38:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.039 uct="0.004" uht="0.006" urt="0.039"
87 | 172.31.91.109 [05/Jun/2023:06:38:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.038 uct="0.001" uht="0.002" urt="0.013"
88 | 172.31.91.109 [05/Jun/2023:06:38:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.038 uct="0.000" uht="0.002" urt="0.014"
89 | 172.31.91.109 [05/Jun/2023:06:38:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.039 uct="0.000" uht="0.001" urt="0.008"
90 | 172.31.91.109 [05/Jun/2023:06:38:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.050 uct="0.000" uht="0.001" urt="0.009"
91 | 172.31.91.109 [05/Jun/2023:06:38:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.038 uct="0.006" uht="0.007" urt="0.028"
92 | 172.31.91.109 [05/Jun/2023:06:38:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.037 uct="0.000" uht="0.001" urt="0.009"
93 | 172.31.91.109 [05/Jun/2023:06:38:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.045 uct="0.000" uht="0.001" urt="0.009"
94 | 172.31.91.109 [05/Jun/2023:06:38:29 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.037 uct="0.000" uht="0.001" urt="0.011"
95 | 172.31.91.109 [05/Jun/2023:06:38:30 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.041 uct="0.000" uht="0.001" urt="0.009"
96 | 172.31.91.109 [05/Jun/2023:06:38:30 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.052 uct="0.001" uht="0.001" urt="0.009"
97 | 172.31.91.109 [05/Jun/2023:06:38:30 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.029 uct="0.000" uht="0.001" urt="0.011"
98 | 172.31.91.109 [05/Jun/2023:06:38:30 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.049 uct="0.001" uht="0.001" urt="0.010"
99 | 172.31.91.109 [05/Jun/2023:06:38:30 +0000] "GET /server.pcap HTTP/1.1" status=200 body_bytes_sent=3602590 rt=0.036 uct="0.001" uht="0.002" urt="0.011"
100 | 172.31.92.10 [05/Jun/2023:06:38:43 +0000] "GET /0000000000000000.data HTTP/1.1" status=200 body_bytes_sent=2147483648 rt=49.653 uct="0.000" uht="0.005" urt="49.652"
101 | 172.31.92.10 [05/Jun/2023:06:38:49 +0000] "GET /0000000000000000.data HTTP/1.1" status=200 body_bytes_sent=2147483648 rt=52.123 uct="0.001" uht="0.002" urt="52.124"
102 | 172.31.92.10 [05/Jun/2023:06:38:49 +0000] "GET /0000000000000000.data HTTP/1.1" status=200 body_bytes_sent=2147483648 rt=54.741 uct="0.000" uht="0.002" urt="54.740"
103 | 172.31.92.10 [05/Jun/2023:06:38:50 +0000] "GET /0000000000000000.data HTTP/1.1" status=200 body_bytes_sent=2147483648 rt=53.601 uct="0.001" uht="0.002" urt="53.601"
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/log-exp2/client-runtime.txt:
--------------------------------------------------------------------------------
1 | [2023-06-05 06:38:01,630] runtime=1009
2 | [2023-06-05 06:38:02,641] runtime=1008
3 | [2023-06-05 06:38:03,651] runtime=1008
4 | [2023-06-05 06:38:04,662] runtime=1007
5 | [2023-06-05 06:38:05,672] runtime=1007
6 | [2023-06-05 06:38:06,682] runtime=1008
7 | [2023-06-05 06:38:07,692] runtime=1008
8 | [2023-06-05 06:38:08,703] runtime=220
9 | [2023-06-05 06:38:08,926] runtime=112
10 | [2023-06-05 06:38:09,040] runtime=60
11 | [2023-06-05 06:38:09,103] runtime=865
12 | [2023-06-05 06:38:09,970] runtime=1009
13 | [2023-06-05 06:38:10,982] runtime=1008
14 | [2023-06-05 06:38:11,992] runtime=1009
15 | [2023-06-05 06:38:13,004] runtime=1008
16 | [2023-06-05 06:38:14,014] runtime=1009
17 | [2023-06-05 06:38:15,025] runtime=1008
18 | [2023-06-05 06:38:16,036] runtime=1008
19 | [2023-06-05 06:38:17,047] runtime=1007
20 | [2023-06-05 06:38:18,057] runtime=1008
21 | [2023-06-05 06:38:19,067] runtime=1009
22 | [2023-06-05 06:38:20,079] runtime=1007
23 | [2023-06-05 06:38:21,089] runtime=1008
24 | [2023-06-05 06:38:22,100] runtime=1009
25 | [2023-06-05 06:38:23,112] runtime=1008
26 | [2023-06-05 06:38:24,123] runtime=1008
27 | [2023-06-05 06:38:25,134] runtime=1013
28 | [2023-06-05 06:38:26,150] runtime=783
29 | [2023-06-05 06:38:26,936] runtime=306
30 | [2023-06-05 06:38:27,245] runtime=30
31 | [2023-06-05 06:38:27,277] runtime=32
32 | [2023-06-05 06:38:27,312] runtime=21
33 | [2023-06-05 06:38:27,336] runtime=40
34 | [2023-06-05 06:38:27,379] runtime=23
35 | [2023-06-05 06:38:27,405] runtime=26
36 | [2023-06-05 06:38:27,433] runtime=72
37 | [2023-06-05 06:38:27,507] runtime=24
38 | [2023-06-05 06:38:27,533] runtime=21
39 | [2023-06-05 06:38:27,557] runtime=34
40 | [2023-06-05 06:38:27,593] runtime=23
41 | [2023-06-05 06:38:27,619] runtime=27
42 | [2023-06-05 06:38:27,649] runtime=21
43 | [2023-06-05 06:38:27,673] runtime=27
44 | [2023-06-05 06:38:27,703] runtime=34
45 | [2023-06-05 06:38:27,740] runtime=55
46 | [2023-06-05 06:38:27,798] runtime=41
47 | [2023-06-05 06:38:27,842] runtime=35
48 | [2023-06-05 06:38:27,880] runtime=35
49 | [2023-06-05 06:38:27,918] runtime=25
50 | [2023-06-05 06:38:27,946] runtime=27
51 | [2023-06-05 06:38:27,975] runtime=38
52 | [2023-06-05 06:38:28,015] runtime=27
53 | [2023-06-05 06:38:28,045] runtime=27
54 | [2023-06-05 06:38:28,074] runtime=20
55 | [2023-06-05 06:38:28,097] runtime=21
56 | [2023-06-05 06:38:28,120] runtime=28
57 | [2023-06-05 06:38:28,151] runtime=27
58 | [2023-06-05 06:38:28,181] runtime=24
59 | [2023-06-05 06:38:28,208] runtime=26
60 | [2023-06-05 06:38:28,237] runtime=21
61 | [2023-06-05 06:38:28,261] runtime=23
62 | [2023-06-05 06:38:28,286] runtime=22
63 | [2023-06-05 06:38:28,310] runtime=31
64 | [2023-06-05 06:38:28,344] runtime=36
65 | [2023-06-05 06:38:28,382] runtime=23
66 | [2023-06-05 06:38:28,408] runtime=27
67 | [2023-06-05 06:38:28,438] runtime=22
68 | [2023-06-05 06:38:28,462] runtime=26
69 | [2023-06-05 06:38:28,491] runtime=25
70 | [2023-06-05 06:38:28,518] runtime=49
71 | [2023-06-05 06:38:28,569] runtime=53
72 | [2023-06-05 06:38:28,625] runtime=52
73 | [2023-06-05 06:38:28,679] runtime=59
74 | [2023-06-05 06:38:28,741] runtime=54
75 | [2023-06-05 06:38:28,798] runtime=47
76 | [2023-06-05 06:38:28,848] runtime=53
77 | [2023-06-05 06:38:28,904] runtime=56
78 | [2023-06-05 06:38:28,963] runtime=52
79 | [2023-06-05 06:38:29,017] runtime=63
80 | [2023-06-05 06:38:29,082] runtime=50
81 | [2023-06-05 06:38:29,134] runtime=52
82 | [2023-06-05 06:38:29,189] runtime=61
83 | [2023-06-05 06:38:29,252] runtime=46
84 | [2023-06-05 06:38:29,300] runtime=54
85 | [2023-06-05 06:38:29,357] runtime=60
86 | [2023-06-05 06:38:29,420] runtime=55
87 | [2023-06-05 06:38:29,477] runtime=50
88 | [2023-06-05 06:38:29,530] runtime=53
89 | [2023-06-05 06:38:29,586] runtime=53
90 | [2023-06-05 06:38:29,641] runtime=51
91 | [2023-06-05 06:38:29,695] runtime=61
92 | [2023-06-05 06:38:29,759] runtime=53
93 | [2023-06-05 06:38:29,814] runtime=51
94 | [2023-06-05 06:38:29,868] runtime=57
95 | [2023-06-05 06:38:29,928] runtime=50
96 | [2023-06-05 06:38:29,980] runtime=55
97 | [2023-06-05 06:38:30,038] runtime=64
98 | [2023-06-05 06:38:30,105] runtime=40
99 | [2023-06-05 06:38:30,148] runtime=61
100 | [2023-06-05 06:38:30,211] runtime=47
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/log-exp2/nginx-case-client.pcap:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/Nginx reuseport 导致偶发性卡顿/log-exp2/nginx-case-client.pcap
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/log-exp2/readme.md:
--------------------------------------------------------------------------------
1 | exp2 log
2 | - reuseport off
3 | - no recv buffer limit
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/nginx.conf:
--------------------------------------------------------------------------------
1 | # For more information on configuration, see:
2 | # * Official English Documentation: http://nginx.org/en/docs/
3 | # * Official Russian Documentation: http://nginx.org/ru/docs/
4 |
5 | user nginx;
6 | worker_processes 2;
7 | error_log /var/log/nginx/error.log notice;
8 | pid /run/nginx.pid;
9 |
10 | # Load dynamic modules. See /usr/share/doc/nginx/README.dynamic.
11 | include /usr/share/nginx/modules/*.conf;
12 |
13 | events {
14 | worker_connections 1024;
15 | }
16 |
17 | http {
18 | log_format main '$remote_addr [$time_local] "$request" '
19 | 'status=$status body_bytes_sent=$body_bytes_sent '
20 | 'rt=$request_time uct="$upstream_connect_time" uht="$upstream_header_time" urt="$upstream_response_time"';
21 |
22 | access_log /var/log/nginx/access.log main;
23 |
24 | sendfile on;
25 | tcp_nopush on;
26 | keepalive_timeout 60;
27 | types_hash_max_size 4096;
28 |
29 | include /etc/nginx/mime.types;
30 | default_type application/octet-stream;
31 |
32 | # Load modular configuration files from the /etc/nginx/conf.d directory.
33 | # See http://nginx.org/en/docs/ngx_core_module.html#include
34 | # for more information.
35 | include /etc/nginx/conf.d/*.conf;
36 |
37 | server {
38 | listen 8000 reuseport;
39 | server_name server1;
40 | root /usr/share/nginx/html;
41 |
42 | # Load configuration files for the default server block.
43 | include /etc/nginx/default.d/*.conf;
44 |
45 | location / {
46 | proxy_pass http://172.31.86.252:8000;
47 | proxy_set_header Host $host;
48 | proxy_set_header X-Real-IP $remote_addr;
49 | }
50 |
51 | error_page 404 /404.html;
52 | location = /404.html {
53 | }
54 |
55 | error_page 500 502 503 504 /50x.html;
56 | location = /50x.html {
57 | }
58 | }
59 | }
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/reuseport-explained.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/Nginx reuseport 导致偶发性卡顿/reuseport-explained.jpg
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/script/get_big_file.sh:
--------------------------------------------------------------------------------
1 | starttime=`date +'%Y-%m-%d %H:%M:%S,%3N'`
2 | start_seconds=$(date --date="$starttime" +%s%3N);
3 | curl 172.31.81.55:8000/0000000000000000.data --output ~/0000000000000000.data
4 | endtime=`date +'%Y-%m-%d %H:%M:%S,%3N'`
5 | end_seconds=$(date --date="$endtime" +%s%3N);
6 | echo $((end_seconds-start_seconds))
--------------------------------------------------------------------------------
/performance/Nginx reuseport 导致偶发性卡顿/script/get_small_file.sh:
--------------------------------------------------------------------------------
1 | for i in {1..100}
2 | do
3 | starttime=`date +'%Y-%m-%d %H:%M:%S,%3N'`
4 | start_seconds=$(date --date="$starttime" +%s%3N);
5 | curl --max-time 1 -s 172.31.81.55:8000/server.pcap --output ~/aaa.pcap
6 | endtime=`date +'%Y-%m-%d %H:%M:%S,%3N'`
7 | end_seconds=$(date --date="$endtime" +%s%3N);
8 | echo [$starttime] runtime=$((end_seconds-start_seconds))
9 | done
--------------------------------------------------------------------------------
/performance/一次春节大促性能压测不达标的瓶颈推演.md:
--------------------------------------------------------------------------------
1 |
2 | # 一次春节大促性能压测不达标的瓶颈推演
3 |
4 | 本文示范了教科书式的在分布式应用场景下如何通过一个节点的状态来推演分析瓶颈出在上下游的哪个环节上。
5 |
6 | ## 场景描述
7 |
8 | 某客户通过PTS(一个打压力工具)来压选号业务(HTTP服务在9108端口上),一个HTTP请求对应一次select seq-id 和 一次insert
9 |
10 | PTS端看到RT900ms+,QPS大概5万(期望20万), 数据库代理服务 rt 5ms,QPS 10万+
11 |
12 | ### 链路:
13 |
14 | pts发起压力 -> 5个eip -> slb -> app(300个容器运行tomcat监听9108端口上) -> slb -> 数据库代理服务集群 -> RDS集群
15 |
16 | 性能不达标,怀疑数据库代理服务或者RDS性能不行,作为数据库需要自证清白,所以从RDS和数据库代理服务开始分析问题在哪里。
17 |
18 | 略过一系列在数据库代理服务、RDS上分析数据和监控图表都证明数据库代理服务和RDS没问题的过程。
19 |
20 | 在明确给出证据数据库代理服务和RDS都没问题后还是要解决问题,所以只能进一步帮助前面的app来分析为什么性能不达标。
21 |
22 | ## 在其中一个app应用上抓包(00:18秒到1:04秒),到数据库代理服务的一个连接分析:
23 |
24 | 
25 |
26 | 数据库代理服务每个HTTP请求的响应时间都控制在15ms(一个前端HTTP请求对应一个select seq-id,一个 select readonly, 一个insert, 这个响应时间符合预期)。一个连接每秒才收到20 tps(因为压力不够,压力加大的话这个单连接tps还可以增加), 20*3000 = 6万 , 跟压测看到基本一致
27 |
28 | 300个容器,每个容器 10个连接到数据库代理服务
29 |
30 | 如果300个容器上的并发压力不够的话就没法将3000个连接跑满,所以看到的QPS是5万。
31 |
32 | **从300个容器可以计算得到这个集群能支持的tps: 300*10(10个连接)* 1000/15(每秒钟每个连接能处理的请求数)=20万个tps (关键分析能力)**
33 |
34 | 也就是说通过单QPS 15ms,我们计算可得整个后端的吞吐能力在20万QPS。所以目前问题不在后端,而是压力没有打到后端就出现瓶颈了。
35 |
36 | ## 9108的HTTP服务端口上的抓包分析
37 |
38 | 
39 |
40 | 9108服务的每个HTTP response差不多都是15ms(**这个响应时间基本符合预期**),一个HTTP连接上在45秒的抓包时间范围只收到23个HTTP Request。
41 |
42 | 或者下图:
43 |
44 |
45 |
46 |
47 |
48 | 统计9108端口在45秒总共收到的HTTP请求数量是6745(如下图),也就是每个app每秒钟收到的请求是150个,300*150=4.5万(理论值,300个app可能压力分布不一样?),**从这里看app收到的压力还不够**,所以压力还没有打到应用容器中的app,还在更前面
49 |
50 | 
51 |
52 | 后来从容器app监控也确认了这个响应时间和抓包看到的一致,所以从抓包分析http响应时间也基本得到15ms的rt关键结论
53 |
54 | 从wireshark IO Graphs 也能看到RT 和 QPS
55 |
56 | 
57 |
58 | ## 从应用容器上的netstat统计来看,也是压力端回复太慢
59 |
60 | 
61 |
62 | send-q表示回复从9108发走了,没收到对方的ack
63 |
64 | ## ARMS监控分析9108端口上的RT
65 |
66 | 后来PTS的同学说ARMS可以捞到监控数据,如下是对rt时间降序排
67 |
68 | 
69 |
70 | 中的rt平均时间,可以看到http的rt确实14.4ms,表现非常平稳,从这个监控也发现实际app是330个而不是用户自己描述的300个,这也就是为什么实际是tps是5万,但是按300个去算的话tps是4.5万(不要纠结客户为什么告诉你是300个容器而不是330个,有时候他们也搞不清楚,业务封装得太好了)
71 |
72 | 
73 |
74 | 5分钟时间,QPS是5万+,HTTP的平均rt是15ms, HTTP的最大rt才79ms,和前面抓包分析一致。
75 |
76 | ## 从后端分析的总结
77 |
78 | **从9108端口响应时间15ms来看是符合预期的,为什么PTS看到的RT是900ms+,所以压力还没有打到APP上(也就是9108端口)**
79 |
80 | ## 结论
81 |
82 | 最后发现是 eip 带宽不足,只有200M,调整到1G后 tps 也翻了5倍到了25万。
83 |
84 | pts -> 5个eip(总带宽200M) -> slb -> app(330个HTTP容器) -> slb -> 数据库代理服务 -> RDS
85 |
86 | 这个案例有意思的地方是可以通过抓包就能分析出集群能扛的QPS20万(实际只有5万),那么可以把这个分析原则在每个角色上挨个分析一下,来看瓶颈出在了哪个环节。
87 |
88 | 应用端看到的rt是900ms,从后段开始往前面应用端来撸,看看每个环节的rt数据。
89 |
90 | ## 教训
91 |
92 | - 搞清楚 请求 从发起端到DB的链路路径,比如 pts -> 5个eip(总带宽200M) -> slb -> app(330个HTTP容器) -> slb -> 数据库代理服务 -> RDS
93 | - 压不上去得从发压力端开始往后端撸,撸每个产品的rt,每个产品给出自己的rt来自证清白
94 | - 应用有arms的话学会看arms对平均rt和QPS的统计,不要纠结个别请求的rt抖动,看平均rt
95 | - 通过抓包完全可以分析出来系统能扛多少并发,以及可能的瓶颈位置
96 |
97 | 一包在手 万事无忧
98 |
99 |
100 |
101 | Reference:
102 |
103 |
--------------------------------------------------------------------------------
/performance/一次春节大促性能压测不达标的瓶颈推演/0bd20d87d4cbae11-image-20220623003026351.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/一次春节大促性能压测不达标的瓶颈推演/0bd20d87d4cbae11-image-20220623003026351.png
--------------------------------------------------------------------------------
/performance/一次春节大促性能压测不达标的瓶颈推演/2f3b76be63d33151-2f3b76be63d331510eb6f2cecd91747f.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/一次春节大促性能压测不达标的瓶颈推演/2f3b76be63d33151-2f3b76be63d331510eb6f2cecd91747f.png
--------------------------------------------------------------------------------
/performance/一次春节大促性能压测不达标的瓶颈推演/6a289d1bba1e875d-6a289d1bba1e875d215032b6fdc7b084.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/一次春节大促性能压测不达标的瓶颈推演/6a289d1bba1e875d-6a289d1bba1e875d215032b6fdc7b084.png
--------------------------------------------------------------------------------
/performance/一次春节大促性能压测不达标的瓶颈推演/80374e55936bc36b-80374e55936bc36bbd243f79fcdb5f8d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/一次春节大促性能压测不达标的瓶颈推演/80374e55936bc36b-80374e55936bc36bbd243f79fcdb5f8d.png
--------------------------------------------------------------------------------
/performance/一次春节大促性能压测不达标的瓶颈推演/938ce314d19b47cb-938ce314d19b47cba99e2a09c753f606.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/一次春节大促性能压测不达标的瓶颈推演/938ce314d19b47cb-938ce314d19b47cba99e2a09c753f606.png
--------------------------------------------------------------------------------
/performance/一次春节大促性能压测不达标的瓶颈推演/a479bad250c03aee-a479bad250c03aee41d58850afab9c14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/一次春节大促性能压测不达标的瓶颈推演/a479bad250c03aee-a479bad250c03aee41d58850afab9c14.png
--------------------------------------------------------------------------------
/performance/一次春节大促性能压测不达标的瓶颈推演/e239a12a1c361226-e239a12a1c3612263736256c8efc06e4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/performance/一次春节大促性能压测不达标的瓶颈推演/e239a12a1c361226-e239a12a1c3612263736256c8efc06e4.png
--------------------------------------------------------------------------------
/tcpdump/libmariadb 与 libmysqlclient 连接 AnalyticDB 时配置 local_infile 不同导致连接失败的问题.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/tcpdump/libmariadb 与 libmysqlclient 连接 AnalyticDB 时配置 local_infile 不同导致连接失败的问题.zip
--------------------------------------------------------------------------------
/tcpdump/syn_tw_reset.pcap:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/tcpdump/syn_tw_reset.pcap
--------------------------------------------------------------------------------
/tcpdump/toa.lua:
--------------------------------------------------------------------------------
1 | local toa_protocol = Proto("toa","Parse TOA Protocol")
2 | local tcp_opts = Field.new("tcp.options")
3 |
4 | local toa_type = ProtoField.uint16("toa.type", "toa type")
5 | local toa_client_ip = ProtoField.ipv4("toa.client_ip", "Client IP Address")
6 | local toa_client_port = ProtoField.uint16("toa.cport", "Client port")
7 | local toa_vip = ProtoField.ipv4("toa.vip", "LVS Address")
8 | local toa_vport = ProtoField.uint16("toa.vport", "LVS port")
9 | local toa_client_ipv6 = ProtoField.ipv6("toa.client_ipv6", "Client IPv6 Address")
10 | local toa_vipv6 = ProtoField.ipv6("toa.vipv6", "VIPv6 Address")
11 |
12 | toa_protocol.fields = {
13 | toa_type,
14 | toa_client_ip,
15 | toa_client_port,
16 | toa_vip,
17 | toa_vport,
18 | toa_client_ipv6,
19 | toa_vipv6
20 | }
21 |
22 | --- 以下几个 parse 函数针对不同的 toa 类型(252/254/250/249)进行解析
23 | --- toa 252 TCPOLEN_VTOA 20 |opcode|size|cport+cip+vid+vip+vport+pad[2]|=1+1+ 2+4+4+4+2 +2
24 | function parse_252(v, subtree)
25 | local cport = v:range(0,2):uint()
26 | local client_ip = v:range(2, 4):ipv4()
27 | local vaddr_ip= v:range(10,4):ipv4()
28 | local vport = v:range(14,2):uint()
29 | print("debug/252 client:", cport, client_ip, vaddr_ip, vport)
30 |
31 | subtree:add(toa_client_ip, client_ip)
32 | subtree:add(toa_client_port, cport)
33 | subtree:add(toa_vip, vaddr_ip)
34 | subtree:add(toa_vport, vport)
35 | end
36 |
37 | --- fe08cf882f5e575a fe-254 08-length 8 cf88-cport
38 | --- toa 254 TCPOLEN_VTOA 8 |opcode|size|cport+cip=1+1+ 2+4
39 | function parse_254(v, subtree)
40 | local cport = v:range(0, 2):uint()
41 | local client_ip = v:range(2, 4):ipv4()
42 | print("debug/254 client:", cport, client_ip)
43 | subtree:add(toa_client_ip, client_ip)
44 | subtree:add(toa_client_port, cport)
45 | end
46 |
47 | --- toa 253 for smartnat TCPOLEN_VTOA 8 |opcode|size|cport+cip=1+1+ 2+4
48 | function parse_253(v, subtree)
49 | local cport = v:range(0, 2):uint()
50 | local client_ip = v:range(2, 4):ipv4()
51 | print("debug/253 client:", cport, client_ip)
52 | subtree:add(toa_client_ip, client_ip)
53 | subtree:add(toa_client_port, cport)
54 | end
55 |
56 | function parse_250(v, subtree)
57 | local vport = v:range(0, 2):uint()
58 | local vip = v:range(2, 4):ipv4()
59 | print("debug/250 client:", vport, vip)
60 | subtree:add(toa_vip, vip)
61 | subtree:add(toa_vport, vport)
62 | end
63 |
64 | function parse_249(v, subtree)
65 | local vip = v:range(0, 16):ipv6()
66 | print("debug/249 vip ipv6:", vip)
67 |
68 | local client_ip = v:range(16, 16):ipv6()
69 | print("debug/249 client ipv6:", client_ip)
70 |
71 | subtree:add(toa_vipv6, vip)
72 | subtree:add(toa_client_ipv6, client_ip)
73 | end
74 |
75 | function toa_protocol.dissector(buffer, pinfo, tree)
76 | local opts = tcp_opts()
77 | if (opts) then
78 | local len = opts.len
79 | local off = 0
80 | while (off < len)
81 | do
82 | local subtree = nil
83 | local kind = opts.range(off, 1):uint()
84 | if (kind == 1 or kind == 0) then
85 | off = off + 1
86 | else
87 | local toa_len = opts.range(off + 1, 1):uint()
88 | if (toa_len ~= 2) then
89 | local v = opts.range(off + 2, toa_len - 2):tvb()
90 |
91 | print("toa info:", kind, toa_len, v)
92 | if (subtree == nil and kind>248 and kind<255 ) then
93 | subtree = tree:add(toa_protocol, string.format("TOA Protocol: %d", kind))
94 | -- 单独再次添加 toa_type 是希望可以在过滤器中搜索匹配,但不展示
95 | local toa_type_item = subtree:add(toa_type, kind)
96 | -- 只隐藏 toa_type,因为已经在 head 部分显示了,不再单独展示
97 | toa_type_item:set_hidden(true)
98 | -- 这行隐藏单独的整个 subtree,但仍然可以在过滤器中搜索匹配
99 | --subtree:set_hidden(true)
100 | end
101 | if (kind == 254 and toa_len == 8) then
102 | parse_254(v, subtree)
103 | end
104 | if (kind == 253 and toa_len == 8) then
105 | parse_253(v, subtree)
106 | end
107 | if (kind == 252 and toa_len == 20) then
108 | parse_252(v, subtree)
109 | end
110 | if (kind == 250 and toa_len == 8) then
111 | parse_250(v, subtree)
112 | end
113 | if (kind == 249 and toa_len >= 40) then
114 | parse_249(v, subtree)
115 | end
116 | end
117 | off = off + toa_len
118 | end
119 | end
120 | end
121 | end
122 |
123 | register_postdissector(toa_protocol)
--------------------------------------------------------------------------------
/tcpdump/toa_pcap/tcp_options_252.pcap:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/tcpdump/toa_pcap/tcp_options_252.pcap
--------------------------------------------------------------------------------
/tcpdump/toa_pcap/tcp_options_253.pcap:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/tcpdump/toa_pcap/tcp_options_253.pcap
--------------------------------------------------------------------------------
/tcpdump/toa_pcap/tcp_options_254_250.pcap:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/tcpdump/toa_pcap/tcp_options_254_250.pcap
--------------------------------------------------------------------------------
/zsxq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/zsxq.png
--------------------------------------------------------------------------------
/直播/案例星球20240127直播.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/直播/案例星球20240127直播.pdf
--------------------------------------------------------------------------------
/站外案例简析集锦/00e78e6190986e7b-image-20230314100052628.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/00e78e6190986e7b-image-20230314100052628.png
--------------------------------------------------------------------------------
/站外案例简析集锦/0e2a468150a155c8-image-20221125164254479.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/0e2a468150a155c8-image-20221125164254479.png
--------------------------------------------------------------------------------
/站外案例简析集锦/19cfbbd63e749cfe-image-20230314095512464.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/19cfbbd63e749cfe-image-20230314095512464.png
--------------------------------------------------------------------------------
/站外案例简析集锦/24e56165a9336a1b-image-20230220084137826.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/24e56165a9336a1b-image-20230220084137826.png
--------------------------------------------------------------------------------
/站外案例简析集锦/456dbe574617f911-image-20230220085701693.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/456dbe574617f911-image-20230220085701693.png
--------------------------------------------------------------------------------
/站外案例简析集锦/4e503055000530b5-image-20230220084714334.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/4e503055000530b5-image-20230220084714334.png
--------------------------------------------------------------------------------
/站外案例简析集锦/52864c4410f483d1-image-20221125164508492.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/52864c4410f483d1-image-20221125164508492.png
--------------------------------------------------------------------------------
/站外案例简析集锦/64497804a4243f0a-640-20221112211814567.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/64497804a4243f0a-640-20221112211814567.png
--------------------------------------------------------------------------------
/站外案例简析集锦/6c9333df801d32bd-640-7178470.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/6c9333df801d32bd-640-7178470.png
--------------------------------------------------------------------------------
/站外案例简析集锦/6d82a7d91b6d852d-image-20220924091158877.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/6d82a7d91b6d852d-image-20220924091158877.png
--------------------------------------------------------------------------------
/站外案例简析集锦/73776fa27bbcb6b2-image-20230314100259875.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/73776fa27bbcb6b2-image-20230314100259875.png
--------------------------------------------------------------------------------
/站外案例简析集锦/7eb291e891284313-640-20220707152145610.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/7eb291e891284313-640-20220707152145610.png
--------------------------------------------------------------------------------
/站外案例简析集锦/853f3166bd6a6741-image-20220924091212645.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/853f3166bd6a6741-image-20220924091212645.png
--------------------------------------------------------------------------------
/站外案例简析集锦/8580d8a8e71510e2-image-20230220083826110.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/8580d8a8e71510e2-image-20230220083826110.png
--------------------------------------------------------------------------------
/站外案例简析集锦/96a580f723de3e92-640-8259033.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/96a580f723de3e92-640-8259033.png
--------------------------------------------------------------------------------
/站外案例简析集锦/979f55f065bb45c9-640-8259052.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/979f55f065bb45c9-640-8259052.jpeg
--------------------------------------------------------------------------------
/站外案例简析集锦/c7b6e89872f2a73f-640-7083886.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/c7b6e89872f2a73f-640-7083886.jpeg
--------------------------------------------------------------------------------
/站外案例简析集锦/d5edcf4c833587de-image-20220707154232470.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/d5edcf4c833587de-image-20220707154232470.png
--------------------------------------------------------------------------------
/站外案例简析集锦/dbfa3765bf289d2c-image-20220706102446670.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/dbfa3765bf289d2c-image-20220706102446670.png
--------------------------------------------------------------------------------
/站外案例简析集锦/de0ccf287fc93eb7-image-20220707151642981.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/de0ccf287fc93eb7-image-20220707151642981.png
--------------------------------------------------------------------------------
/站外案例简析集锦/e8c9c2961a45aac4-image-20220706103130314.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/e8c9c2961a45aac4-image-20220706103130314.png
--------------------------------------------------------------------------------
/站外案例简析集锦/f1822dfadf71d254-image-20220707151656456.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/f1822dfadf71d254-image-20220707151656456.png
--------------------------------------------------------------------------------
/站外案例简析集锦/f26aabaa37d74fad-640-20221112211745593.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/f26aabaa37d74fad-640-20221112211745593.jpeg
--------------------------------------------------------------------------------
/站外案例简析集锦/fe102ae62b0fb350-image-20230314095644496.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/fe102ae62b0fb350-image-20230314095644496.png
--------------------------------------------------------------------------------
/站外案例简析集锦/fe98b4a0f5a9a664-image-20230314095237337.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/fe98b4a0f5a9a664-image-20230314095237337.png
--------------------------------------------------------------------------------
/站外案例简析集锦/ffd4e48233a458e9-image-20220706113722680.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantegg/programmer_case/743ff8562222406426b7f54624e4fed143670f3d/站外案例简析集锦/ffd4e48233a458e9-image-20220706113722680.png
--------------------------------------------------------------------------------