├── CONTRIBUTING ├── LICENSE ├── README.md ├── common.py ├── connection_info.py ├── doc ├── example.flow.png └── example.packet.png ├── modulo.py ├── modulo_test.py ├── packet_dumper.py ├── packet_info.py ├── plotter.py ├── rttcp.py └── trace_info.py /CONTRIBUTING: -------------------------------------------------------------------------------- 1 | Chema Gonzalez (chema@google.com) 2 | 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [2015] Google Inc. 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rttcp: A Tool To Debug The Performance of a Network Link Using TCP 2 | 3 | Copyright 2017 Google Inc. 4 | 5 | This is not an official Google product. 6 | 7 | 8 | # Introduction 9 | 10 | rttcp is a tool to analyze the performance of a network link from a packet 11 | trace containing TCP traffic. 12 | 13 | The main use case is to provide some characterization of the performance 14 | of a link using a tcp performance test (speedtest). Assume you have a 15 | network whose performance you want to debug (e.g. it includes a radio 16 | link). You get a client to access a speedtest service (e.g. the 17 | [Google Fiber Speedtest](http://speedtest.googlefiber.net/). This is 18 | very handy, as any device with a browser can do. You run your HTML 19 | speedtest, and get some numbers. You also capture the trace at any 20 | point in the path between your device and the speedtest server. 21 | 22 | While measuring the performance of a connection by using tcp traffic 23 | presents some issues (it depends on the exact parameters of tcp, like 24 | congestion control, etc.), we can get an idea of how the link is 25 | behaving by checking how the packets go back and forth. 26 | 27 | 28 | # 1. Discussion 29 | 30 | rttcp provides 2 analysis modes, namely "flow" and "packet". 31 | 32 | 1. the "flow" mode provides a per-flow performance analysis. In 33 | particular, it provides 4 graphs: 34 | 35 | 1. per-flow goodput rate as a function of the timestamp of the first 36 | packet in the flow. This is an important statistic, as it shows 37 | how the different flows have performed in the experiment. On the 38 | other hand, the exact performance depends on multiple factors (the 39 | RTT time, the number of losses, etc.), which makes the signal noisy. 40 | 2. per-flow median packet RTT as a function of the timestamp of 41 | the first packet in the flow. This is the most interesting signal, 42 | as discussed in the [example analysis](./example.analysis.md). 43 | 3. per-flow total goodput as a function of the timestamp of the first 44 | packet in the flow. This is a description of the test setup, for 45 | validation purposes. 46 | 4. per-flow extra IP traffic as a function of the timestamp of the first 47 | packet in the flow. This is a rough validation that there have not 48 | been too many losses. 49 | 50 | 51 | 2. the "packet" mode provides an analysis of per-packet RTT data. We 52 | measure a TCP segment RTT as the difference between the timestamp of 53 | the TCP segment, and the timestamp of the first ACK packet that ACKs 54 | the highest SEQ number of the TCP segment. 55 | 56 | The idea is that the first ACK that acknowledges a full segment is 57 | the first instant where we can be sure that the segment has been 58 | received. We remove any duplicate data segments to avoid biasing the 59 | statistics with the latency of lost packets. 60 | 61 | This mode, in fact, measures the time for (a) the (data) segment to 62 | reach the receiver, (b) the time for the receiver tcp stack to receive 63 | it and generate an ACK, and (c) the time for the (pure) ACK to reach 64 | the capturing point. This provides a nice measurement of the 65 | round-trip time performance of the download path from the capturing 66 | point. 67 | 68 | In particular, the "packet" mode provides 4 graphs (also known as 69 | "deltas"): 70 | 71 | 1. delta1: delta1 is the RTT of every TCP segment in the trace, as a 72 | function of the timestamp of the TCP segment. 73 | 2. delta2: delta2 is similar to delta1, but instead of using the ACK 74 | and SEQ numbers, it uses the TSval and TSecr of the 75 | [tcp timestamp option](http://www.ietf.org/rfc/rfc1323.txt). 76 | 3. delta3: delta3 tries to calculate the one-way latency of traffic 77 | by estimating the offset and HZ of the sender. 78 | 4. delta4: delta4 measures the inter-packet latency in tcp trains. 79 | It defines a train as a set of packets separated no more than a 80 | constant (we use 2 msec for this, as this is the `hystart_ack_delta` 81 | constant in `linux/net/ipv4/tcp_cubic.c`). 82 | 83 | 84 | ``` 85 | delta1/delta2 data segments 86 | /---------------------| - - - - - - - - - - - -\ 87 | | | ^ 88 | +---------|--+ +---------------+ +--|--------+ 89 | | client +| |------------| capture point |-------------| | server | 90 | +--------||--| +---------------+ +--|--------+ 91 | || | pure ACKs ^ 92 | \\---------------------| - - - - - - - - - - - -/ 93 | \-------------------->| - - - - - - - - - - - -/ 94 | delta3 95 | ``` 96 | 97 | Figure 1 shows a depiction of 3 of the 4 deltas in the forward case. 98 | delta1 and delta2 both measure the time for a segment to go from the 99 | capture point to the client and back. delta3 measures the time from 100 | the client to the capture point. 101 | 102 | 103 | As the data segments can flow in both directions, we provide for 104 | separation of the forward and reverse paths, which are typically 105 | different. We also provide a boxplot of the distribution of the 106 | values, plus the basic statistics. 107 | 108 | 109 | # 2. Operation 110 | 111 | 0. `rttcp.py` is a vanilla python file. It uses 112 | [tshark](https://www.wireshark.org/docs/wsug_html_chunked/AppToolstshark.html) 113 | (a CLI version of wireshark) to print some selected fields in the 114 | packet trace. It then uses a combination of numpy, pandas, and matplotlib 115 | to analyze and graph the results. 116 | 117 | To get to know the options of the tool, run: 118 | 119 | ```shell 120 | $ ./rttcp.py help 121 | usage: rttcp.py [-h] [-d] [--quiet] [-v] [--tshark TSHARK] [-i INPUT-FILE] 122 | [-o OUTPUT-FILE] [--type ANALYSIS_TYPE] 123 | [--src-reverse SRC-REVERSE] 124 | {help,analyze,plot} ... 125 | 126 | rttcp flow aggregator. 127 | 128 | positional arguments: 129 | {help,analyze,plot} 130 | help show help screen 131 | analyze analyze pcap file 132 | plot plot analysis file 133 | 134 | optional arguments: 135 | -h, --help show this help message and exit 136 | -d, --debug Increase verbosity (use multiple times for more) 137 | --quiet Zero verbosity 138 | -v, --version show program's version number and exit 139 | --tshark TSHARK tshark binary 140 | -i INPUT-FILE, --input INPUT-FILE 141 | input file 142 | -o OUTPUT-FILE, --output OUTPUT-FILE 143 | output file 144 | --type ANALYSIS_TYPE set the analysis type (flow, packet) 145 | --src-reverse SRC-REVERSE 146 | any packet from a src definition (cidr) as reverse 147 | ``` 148 | 149 | 150 | 1. To run the "flow" analysis: 151 | 152 | ```shell 153 | $ ./rttcp.py analyze --type "flow" -i trace.pcap -o trace.pcap.flow.txt 154 | ... 155 | $ ./rttcp.py plot --type "flow" -i trace.pcap.flow.txt --title "flow analysis, trace.fibertest.pcap" --src-reverse 192.168 -o trace.fibertest.pcap.flow.png 156 | # plotting /data/chema/proj/marconi-traces/loon/trace.fibertest.pcap.flow.txt 157 | $ ./rttcp.py plot --type "flow" -i trace.pcap.flow.txt --title "flow analysis, trace.fibertest.pcap" --src-reverse 192.168 -o trace.fibertest.pcap.flow.pdf 158 | ``` 159 | 160 | ![Figure 2](doc/example.flow.png) 161 | 162 | Figure 2 shows an example of "flow" analysis result. 163 | 164 | 165 | 2. To run the "packet" analysis: 166 | 167 | ```shell 168 | $ ./rttcp.py analyze --type "packet" -i trace.pcap -o trace.pcap.packet.txt 169 | ... 170 | $ ./rttcp.py plot --type "packet" -i trace.pcap.packet.txt --title "packet analysis, trace.fibertest.pcap" --src-reverse 192.168 -o trace.fibertest.pcap.packet.png 171 | ``` 172 | 173 | ![Figure 3](doc/example.packet.png) 174 | 175 | Figure 3 shows an example of "packet" analysis result. 176 | 177 | 178 | # 3. References 179 | 180 | * [tcptrace](http://www.tcptrace.org/): rttcp is very similar to tcptrace, 181 | but oriented to analyzing a trace composed of multiple connections, 182 | instead of performing per-connection analysis. 183 | 184 | -------------------------------------------------------------------------------- /common.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright 2017 Google Inc. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | """Common code.""" 19 | 20 | 21 | __version__ = '0.0.1' 22 | 23 | TCP_SEQ_MAX_VALUE = (1 << 33) - 1 24 | 25 | 26 | def endpoint_cmp(ip1, port1, ip2, port2): 27 | if ip1 < ip2: 28 | return -1 29 | if ip1 > ip2: 30 | return 1 31 | return cmp(port1, port2) 32 | 33 | 34 | # http://stackoverflow.com/questions/1094841/ 35 | def binary_fmt(num, suffix='B'): 36 | """A binary pretty-printer.""" 37 | if num == 0.0: 38 | return '0 %s' % suffix 39 | for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']: 40 | if abs(num) < 1024.0: 41 | return '%.3g %s%s' % (num, unit, suffix) 42 | num /= 1024.0 43 | return '%.3g %s%s' % (num, 'Yi', suffix) 44 | 45 | 46 | def decimal_fmt(num, suffix='sec'): 47 | """A decimal pretty-printer.""" 48 | if num == 0.0: 49 | return '0 %s' % suffix 50 | if num < 1.0: 51 | for unit in ['', 'm', 'u', 'n', 'p', 'f', 'a', 'z']: 52 | if abs(num) >= 1.0: 53 | return '%.3g %s%s' % (num, unit, suffix) 54 | num *= 1000.0 55 | return '%.3g %s%s' % (num, 'y', suffix) 56 | for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']: 57 | if abs(num) < 1000.0: 58 | return '%.3g %s%s' % (num, unit, suffix) 59 | num /= 1000.0 60 | return '%.3g %s%s' % (num, 'Y', suffix) 61 | 62 | -------------------------------------------------------------------------------- /connection_info.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright 2017 Google Inc. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | """Class containing info about a connection.""" 19 | 20 | 21 | import sys 22 | import numpy as np 23 | 24 | from common import endpoint_cmp 25 | from common import TCP_SEQ_MAX_VALUE 26 | from modulo import Modulo 27 | 28 | 29 | class ConnectionInfo(object): 30 | """A class containing a summary about a 5-tuple connection.""" 31 | 32 | def __init__(self, analysis_type, connhash, f, debug): 33 | self._analysis_type = analysis_type 34 | self._connhash = connhash 35 | self._f = f 36 | self._debug = debug 37 | self._ip_total_pkt = 0 38 | self._ip_total_bytes = 0 39 | self._seq = Modulo(TCP_SEQ_MAX_VALUE) 40 | 41 | def endpoint(self, addr, port): 42 | return '%s:%s' % (addr, port) 43 | 44 | @classmethod 45 | def header(cls, analysis_type): 46 | if analysis_type == 'flow': 47 | return cls.flow_header() 48 | elif analysis_type == 'packet': 49 | return cls.packet_header() 50 | 51 | def process_packet(self, packet): 52 | """Main packet processing method.""" 53 | self.common_process_packet(packet) 54 | self.flow_process_packet(packet) 55 | self.packet_process_packet(packet) 56 | self._ip_total_pkt += 1 57 | 58 | def common_process_packet(self, packet): 59 | # first packet of the connection 60 | if self._ip_total_pkt == 0: 61 | self._ip_proto = packet.ip_proto 62 | # sort the connection 63 | if endpoint_cmp(packet.ip_src, packet.sport, packet.ip_dst, 64 | packet.dport) <= 0: 65 | self._ip_src = packet.ip_src 66 | self._ip_dst = packet.ip_dst 67 | self._sport = packet.sport 68 | self._dport = packet.dport 69 | else: 70 | self._ip_src = packet.ip_dst 71 | self._ip_dst = packet.ip_src 72 | self._sport = packet.dport 73 | self._dport = packet.sport 74 | self._src = self.endpoint(self._ip_src, self._sport) 75 | self._dst = self.endpoint(self._ip_dst, self._dport) 76 | 77 | @classmethod 78 | def packet_header(cls): 79 | return '#%s %s %s %s %s %s' % ( 80 | 'type', 81 | 'src', 82 | 'dst', 83 | 'timestamp', 84 | 'delta', 85 | 'other') 86 | 87 | def packet_process_packet(self, packet): 88 | """Process a packet for this connection (packet mode).""" 89 | src = self.endpoint(packet.ip_src, packet.sport) 90 | dst = self.endpoint(packet.ip_dst, packet.dport) 91 | # append new data segments 92 | if self._debug > 0: 93 | sys.stderr.write('%s %s %s %s %s %s\n' % ( 94 | packet.timestamp, src, dst, packet.tcp_len, packet.tcp_nxtseq, 95 | packet.tcp_ack)) 96 | self.packet_process_delta1(src, dst, packet) 97 | self.packet_process_delta2(src, dst, packet) 98 | self.packet_process_delta3(src, dst, packet) 99 | self.packet_process_delta4(src, dst, packet) 100 | 101 | def packet_process_delta1(self, src, dst, packet): 102 | """delta1: match data segments with the first ACK that acks them.""" 103 | if self._ip_total_pkt == 0: 104 | # segments with data that have not been ACKed yet 105 | self._tcp_unacked_segments = { 106 | src: [], 107 | dst: [], 108 | } 109 | self._tcp_ack_highest = { 110 | src: None, 111 | dst: None, 112 | } 113 | self._delta1_list = { 114 | src: [], 115 | dst: [], 116 | } 117 | if packet.tcp_len > 0: 118 | # detect and delete duplicate data segments 119 | is_duplicate = any([(tcp_nxtseq == packet.tcp_nxtseq) 120 | for (_, _, tcp_nxtseq) 121 | in self._tcp_unacked_segments[src]]) 122 | if is_duplicate: 123 | # remove all the duplicates 124 | new_list = [] 125 | for l in self._tcp_unacked_segments[src]: 126 | _, _, tcp_nxtseq = l 127 | if tcp_nxtseq == packet.tcp_nxtseq: 128 | continue 129 | new_list += [l] 130 | self._tcp_unacked_segments[src] = new_list 131 | else: 132 | self._tcp_unacked_segments[src] += [[packet.timestamp, packet.tcp_len, 133 | packet.tcp_nxtseq]] 134 | new_ack_value = False 135 | if packet.tcp_ack is not None: 136 | if self._tcp_ack_highest[src] is None: 137 | self._tcp_ack_highest[src] = packet.tcp_ack 138 | new_ack_value = True 139 | else: 140 | if self._seq.cmp(self._tcp_ack_highest[src], packet.tcp_ack) < 0: 141 | new_ack_value = True 142 | self._tcp_ack_highest[src] = packet.tcp_ack 143 | if not new_ack_value: 144 | return 145 | # check for already-acked data 146 | new_list = [] 147 | for l in self._tcp_unacked_segments[dst]: 148 | timestamp, _, tcp_nxtseq = l 149 | if self._seq.cmp(tcp_nxtseq, self._tcp_ack_highest[src]) <= 0: 150 | # segment has been acked 151 | delta1 = packet.timestamp - timestamp 152 | if delta1 > 1.0: 153 | if self._debug > 0: 154 | print 'delta1: should remove [%f, %s, %s]' % ( 155 | timestamp, _, tcp_nxtseq) 156 | if self._analysis_type == 'flow': 157 | self._delta1_list[src] += [delta1] 158 | elif self._analysis_type == 'packet': 159 | # emit delta1 line 160 | # (note that we are reversing src and dst as the information 161 | # we have right now refers to the ACK, which goes in the reverse 162 | # direction than the segment we care about) 163 | self._f.write('%s %f %s %s %f -\n' % ('delta1', timestamp, 164 | dst, src, delta1)) 165 | else: 166 | new_list += [l] 167 | self._tcp_unacked_segments[dst] = new_list 168 | 169 | def packet_process_delta2(self, src, dst, packet): 170 | """delta2: match segments with the first TSecr that "acks" its TSval.""" 171 | if self._ip_total_pkt == 0: 172 | # segments with tsval that have not been "ACKed" by a tsecr yet 173 | self._tcp_untsecred_segments = { 174 | src: [], 175 | dst: [], 176 | } 177 | self._tcp_tsecr_highest = { 178 | src: None, 179 | dst: None, 180 | } 181 | if packet.tcp_tsval is None or packet.tcp_tsecr is None: 182 | return 183 | # we can only assume cause-effect on pure ACKs 184 | if packet.tcp_len > 0: 185 | self._tcp_untsecred_segments[src] += [[ 186 | packet.timestamp, packet.tcp_tsval]] 187 | # TODO(chema): detect and delete duplicate data segments 188 | new_tsecr_value = False 189 | if packet.tcp_tsecr is not None: 190 | if self._tcp_tsecr_highest[src] is None: 191 | self._tcp_tsecr_highest[src] = packet.tcp_tsecr 192 | new_tsecr_value = True 193 | else: 194 | if self._tcp_tsecr_highest[src] < packet.tcp_tsecr: 195 | new_tsecr_value = True 196 | self._tcp_tsecr_highest[src] = packet.tcp_tsecr 197 | if not new_tsecr_value: 198 | return 199 | # check for already-tsecr'ed segments 200 | new_list = [] 201 | for l in self._tcp_untsecred_segments[dst]: 202 | timestamp, tcp_tsval = l 203 | if tcp_tsval <= self._tcp_tsecr_highest[src]: 204 | # tsval has been tsecr'ed 205 | delta2 = packet.timestamp - timestamp 206 | if delta2 > 1.0: 207 | print 'delta2: should remove [%f, %s]' % (timestamp, tcp_tsval) 208 | if self._analysis_type == 'packet': 209 | # emit delta2 line 210 | # (note that we are reversing src and dst as the information 211 | # we have right now refers to the TSecr, which goes in the reverse 212 | # direction than the segment we care about) 213 | self._f.write('%s %f %s %s %f -\n' % ('delta2', timestamp, 214 | dst, src, delta2)) 215 | else: 216 | new_list += [l] 217 | self._tcp_untsecred_segments[dst] = new_list 218 | 219 | POPULAR_HZ_VALUES = [100., 200., 250., 1000.] 220 | 221 | def estimate_hz(self, packet, src): 222 | """Estimate the HZ of a host by comparing the ts and TSval of 2 packets.""" 223 | ref_timestamp, ref_tcp_tsval = self._reference_tcp_tsval[src] 224 | estimated_hz = ((packet.tcp_tsval - ref_tcp_tsval) / 225 | (packet.timestamp - ref_timestamp)) 226 | # round the estimated HZ to a popular value 227 | error_l = [abs((estimated_hz - hz) / hz) for hz in self.POPULAR_HZ_VALUES] 228 | pos = error_l.index(min(error_l)) 229 | if min(error_l) > 0.05: 230 | # invalid HZ 231 | print 'error: unexpected estimated HZ (src: %s, %f = %f + %.2f%%)' % ( 232 | src, estimated_hz, self.POPULAR_HZ_VALUES[pos], 100 * min(error_l)) 233 | return -1 234 | return self.POPULAR_HZ_VALUES[pos] 235 | 236 | def packet_process_delta3(self, src, dst, packet): 237 | """delta3: estimate the sender's delay variance from the TSval.""" 238 | if self._ip_total_pkt == 0: 239 | self._reference_tcp_tsval = { 240 | src: None, 241 | dst: None, 242 | } 243 | self._estimated_hz = { 244 | src: None, 245 | dst: None, 246 | } 247 | if packet.tcp_tsval is None or packet.tcp_tsecr is None: 248 | return 249 | if self._reference_tcp_tsval[src] is None: 250 | self._reference_tcp_tsval[src] = [packet.timestamp, packet.tcp_tsval] 251 | return 252 | ref_timestamp, ref_tcp_tsval = self._reference_tcp_tsval[src] 253 | if self._estimated_hz[src] is None: 254 | self._estimated_hz[src] = self.estimate_hz(packet, src) 255 | if self._estimated_hz[src] == -1: 256 | return 257 | expected_timestamp = ref_timestamp + ((packet.tcp_tsval - ref_tcp_tsval) / 258 | self._estimated_hz[src]) 259 | delta3 = packet.timestamp - expected_timestamp 260 | if self._analysis_type == 'packet': 261 | # emit delta3 line 262 | if delta3 > 1.0: 263 | print 'delta3: should remove [%f, %s]' % (packet.timestamp, delta3) 264 | self._f.write('%s %f %s %s %f -\n' % ('delta3', packet.timestamp, 265 | src, dst, delta3)) 266 | 267 | def packet_process_delta4(self, src, dst, packet): 268 | """delta4: match consecutive segments from the same src.""" 269 | if self._ip_total_pkt == 0: 270 | self._last_timestamp_from = { 271 | 'ack': { 272 | src: None, 273 | dst: None, 274 | }, 275 | 'data': { 276 | src: None, 277 | dst: None, 278 | }, 279 | } 280 | traffic = 'ack' if packet.tcp_len == 0 else 'data' 281 | if self._last_timestamp_from[traffic][src] is not None: 282 | delta4 = packet.timestamp - self._last_timestamp_from[traffic][src] 283 | if self._analysis_type == 'packet': 284 | # emit delta4 line 285 | self._f.write('%s %f %s %s %f %s\n' % ('delta4', packet.timestamp, 286 | src, dst, delta4, traffic)) 287 | self._last_timestamp_from[traffic][src] = packet.timestamp 288 | 289 | @classmethod 290 | def flow_header(cls): 291 | return '#%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s' % ( 292 | 'connhash', 293 | 'first_ts', 294 | 'last_ts', 295 | 'ip_proto', 296 | 'tcp_seq_syn[src]', 297 | 'tcp_seq_syn[dst]', 298 | 'ip_total_pkt', 299 | 'ip_total_bytes', 300 | 'pps', 301 | 'ip_bitrate', 302 | 'tcp_bytes', 303 | 'tcp_goodput_bytes', 304 | 'tcp_goodput_bitrate', 305 | 'delta1_small_mean', 306 | 'delta1_small_median', 307 | 'delta1_large_mean', 308 | 'delta1_large_median') 309 | 310 | def flow_process_packet(self, packet): 311 | """Process a packet for this connection (flow mode).""" 312 | # first packet of the connection 313 | src = self.endpoint(packet.ip_src, packet.sport) 314 | dst = self.endpoint(packet.ip_dst, packet.dport) 315 | if self._ip_total_pkt == 0: 316 | self._first_ts = packet.timestamp 317 | # init connection values 318 | self._tcp_seq_syn = { 319 | src: None, 320 | dst: None, 321 | } 322 | self._tcp_seq_first = { 323 | src: None, 324 | dst: None, 325 | } 326 | self._tcp_seq_last = { 327 | src: None, 328 | dst: None, 329 | } 330 | self._tcp_total_bytes = { 331 | src: 0, 332 | dst: 0, 333 | } 334 | # SYN packet 335 | if packet.tcp_flags_syn: 336 | self._tcp_seq_syn[src] = packet.tcp_seq 337 | # any packet: manage time 338 | self._last_ts = packet.timestamp 339 | # any packet: manage bytes 340 | self._ip_total_bytes += packet.ip_len 341 | self._tcp_total_bytes[src] += packet.tcp_len 342 | if self._tcp_seq_first[src] is None: 343 | self._tcp_seq_first[src] = packet.tcp_seq 344 | nxtseq = (packet.tcp_nxtseq if packet.tcp_nxtseq is not None 345 | else packet.tcp_seq) 346 | if self._tcp_seq_last[src] is None: 347 | self._tcp_seq_last[src] = nxtseq 348 | else: 349 | self._tcp_seq_last[src] = self._seq.max(self._tcp_seq_last[src], nxtseq) 350 | 351 | def print_connection_info(self): 352 | """Prints information about a full connection (flow mode).""" 353 | if self._analysis_type == 'packet': 354 | return 355 | pps = '-' 356 | ip_bitrate = '-' 357 | tcp_bytes = '-' 358 | tcp_goodput_bitrate = '-' 359 | tcp_goodput_bytes = '-' 360 | if self._first_ts != self._last_ts: 361 | pps = self._ip_total_pkt / (self._last_ts - self._first_ts) 362 | ip_bitrate = (8. * self._ip_total_bytes / 363 | (self._last_ts - self._first_ts)) 364 | tcp_bytes = (self._tcp_total_bytes[self._src] + 365 | self._tcp_total_bytes[self._dst]) 366 | tcp_goodput_bytes = 0 367 | tcp_goodput_bytes += self._seq.diff(self._tcp_seq_last[self._src], 368 | self._tcp_seq_first[self._src]) 369 | tcp_goodput_bytes += self._seq.diff(self._tcp_seq_last[self._dst], 370 | self._tcp_seq_first[self._dst]) 371 | tcp_goodput_bitrate = (8. * tcp_goodput_bytes / 372 | (self._last_ts - self._first_ts)) 373 | if (np.median(self._delta1_list[self._src]) < 374 | np.median(self._delta1_list[self._dst])): 375 | small_median = np.median(self._delta1_list[self._src]) 376 | small_mean = np.mean(self._delta1_list[self._src]) 377 | large_median = np.median(self._delta1_list[self._dst]) 378 | large_mean = np.mean(self._delta1_list[self._dst]) 379 | else: 380 | small_median = np.median(self._delta1_list[self._dst]) 381 | small_mean = np.mean(self._delta1_list[self._dst]) 382 | large_median = np.median(self._delta1_list[self._src]) 383 | large_mean = np.mean(self._delta1_list[self._src]) 384 | self._f.write('%s %f %f %s %s %s %i %i %f %f %i %i %f %f %f %f %f\n' % ( 385 | self._connhash, self._first_ts, self._last_ts, 386 | self._ip_proto, 387 | self._tcp_seq_syn[self._src], self._tcp_seq_syn[self._dst], 388 | self._ip_total_pkt, self._ip_total_bytes, 389 | pps, ip_bitrate, tcp_bytes, 390 | tcp_goodput_bytes, tcp_goodput_bitrate, 391 | small_mean, small_median, large_mean, large_median)) 392 | -------------------------------------------------------------------------------- /doc/example.flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/rttcp/fc2f0af6814def37c949719665afc7cef8ced124/doc/example.flow.png -------------------------------------------------------------------------------- /doc/example.packet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/rttcp/fc2f0af6814def37c949719665afc7cef8ced124/doc/example.packet.png -------------------------------------------------------------------------------- /modulo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright 2017 Google Inc. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | """Modulo number operations.""" 19 | 20 | 21 | class Modulo(object): 22 | """A class providing modulo math operations.""" 23 | 24 | def __init__(self, max_value, invalid=-1): 25 | self._max = max_value 26 | self._half_max = self._max >> 1 27 | self._invalid = invalid 28 | 29 | def wrap_correction(self, x): 30 | """Returns x in the range [0..self._max].""" 31 | return ((x % (self._max + 1)) + (self._max + 1)) % (self._max + 1) 32 | 33 | def add(self, x, y): 34 | """Returns (x+y) in the range [0..self._max].""" 35 | if x == self._invalid or y == self._invalid: 36 | return self._invalid 37 | return self.wrap_correction(x + y) 38 | 39 | def diff(self, x, y): 40 | """Returns (x-y) in the range [-self._half_max..self._half_max].""" 41 | if x == self._invalid or y == self._invalid: 42 | return self._invalid 43 | return self.wrap_correction(x - y) 44 | 45 | def sub(self, x, y): 46 | """Returns (x-y) in the range [0..self._max].""" 47 | if x == self._invalid or y == self._invalid: 48 | return self._invalid 49 | diff = self.wrap_correction(x - y) 50 | if diff > ((self._max + 1) >> 1): 51 | return diff - (self._max + 1) 52 | return diff 53 | 54 | def cmp(self, x, y): 55 | """Compares 2 values. 56 | 57 | Args: 58 | x: first value 59 | y: second value 60 | 61 | Returns: 62 | an integer less than, equal to, or greater than zero if x is 63 | found, respectively, to be less than, to match, or be greater than y. 64 | """ 65 | diff = self.wrap_correction(y - x) 66 | if diff == 0: 67 | # y - x == 0 68 | return 0 69 | elif diff > ((self._max + 1) >> 1): 70 | # y - x < 0 71 | return 1 72 | else: 73 | # y - x > 0 74 | return -1 75 | 76 | def cmp_range_closed(self, x, y1, y2): 77 | """Compares a value and a range [y1, y2]. 78 | 79 | Args: 80 | x: value to compare 81 | y1: start of the range 82 | y2: end of the range 83 | 84 | Returns: 85 | an integer less than, equal to, or greater than zero if x is found, 86 | respectively, to be less than y1, in [y1, y2], or greater than y2. 87 | """ 88 | if self.cmp(x, y1) < 0: 89 | # x < y1 90 | return -1 91 | elif self.cmp(x, y1) >= 0 and self.cmp(x, y2) <= 0: 92 | # y1 <= x <= y2 93 | return 0 94 | else: 95 | # y2 < x 96 | return 1 97 | 98 | def cmp_range_closed_open(self, x, y1, y2): 99 | """Compares a value and a range [y1, y2). 100 | 101 | Args: 102 | x: value to compare 103 | y1: start of the range 104 | y2: end of the range 105 | 106 | Returns: 107 | an integer less than, equal to, or greater than zero if x is found, 108 | respectively, to be less than y1, in [y1, y2), or greater or equal to y2. 109 | """ 110 | if self.cmp(x, y1) < 0: 111 | # x < y1 112 | return -1 113 | elif self.cmp(x, y1) >= 0 and self.cmp(x, y2) < 0: 114 | # y1 <= x < y2 115 | return 0 116 | else: 117 | # y2 <= x 118 | return 1 119 | 120 | def range_overlap(self, x1, x2, y1, y2): 121 | """Whether the ranges ([x1, x2] and [y1, y2]) overlap at all.""" 122 | if self.cmp(y2, x1) < 0 or self.cmp(y1, x2) > 0: 123 | return False 124 | return True 125 | 126 | def max(self, x, y): 127 | """Returns the greatest of 2 values.""" 128 | if x == self._invalid: 129 | return y 130 | if y == self._invalid: 131 | return x 132 | if self.cmp(x, y) < 0: 133 | return y 134 | return x 135 | 136 | def map_into_same_timeline(self, x, ref_value): 137 | """Map a value on the same timeline than a reference one. 138 | 139 | Map a value x into the same time line as a reference value in order 140 | to compare them easily. 141 | A timeline is essentially one "run" from 0 to self._max. If two 142 | values are separated by a wrap-around point, they are in two 143 | different timelines and cannot be compared directly. 144 | Note that the values cannot be apart by more self._half_max or else 145 | it is not possible to correctly map them (aliasing effect). 146 | 147 | Args: 148 | x: value to map 149 | ref_value: reference value 150 | 151 | Returns: 152 | the mapped value. In most cases (both are on the same 153 | timeline) it will be the same as the input value, but in the wrapped 154 | case, the mapped value may be negative or larger than self._max. 155 | """ 156 | # The two values have a wrapping point between them if they are more 157 | # than self._half_max apart. 158 | if x > ref_value + self._half_max: 159 | # target -> wrap-point -> ref 160 | return x - (self._max + 1) 161 | elif ref_value > x + self._half_max: 162 | # ref -> wrap-point -> target 163 | return x + (self._max + 1) 164 | return x 165 | -------------------------------------------------------------------------------- /modulo_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright 2017 Google Inc. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | """Unit tests for modulo.py.""" 19 | 20 | import unittest 21 | from modulo import Modulo 22 | 23 | 24 | class ModuloTest(unittest.TestCase): 25 | 26 | MAX_VALUE = (1 << 33) - 1 27 | HALF_MAX_VALUE = MAX_VALUE >> 1 28 | INVALID_VALUE = -1 29 | 30 | def testWrapCorrection(self): 31 | """A test of the wrap_correction() method.""" 32 | m = Modulo(self.MAX_VALUE, self.INVALID_VALUE) 33 | self.assertEqual(0, m.wrap_correction(0)) 34 | self.assertEqual(90000, m.wrap_correction(90000)) 35 | self.assertEqual(0, m.wrap_correction(self.MAX_VALUE + 1)) 36 | self.assertEqual(90000, m.wrap_correction(self.MAX_VALUE + 1 + 90000)) 37 | self.assertEqual(self.MAX_VALUE, m.wrap_correction(self.INVALID_VALUE)) 38 | self.assertEqual(self.MAX_VALUE - 1, m.wrap_correction(-2)) 39 | self.assertEqual(0, m.wrap_correction(2 * (self.MAX_VALUE + 1) + 0)) 40 | self.assertEqual(1, m.wrap_correction(3 * (self.MAX_VALUE + 1) + 1)) 41 | self.assertEqual(0, m.wrap_correction(-self.MAX_VALUE - 1)) 42 | self.assertEqual(0, m.wrap_correction(-(2 * (self.MAX_VALUE + 1)) + 0)) 43 | self.assertEqual(1, m.wrap_correction(-(3 * (self.MAX_VALUE + 1)) + 1)) 44 | 45 | def testCmp(self): 46 | m = Modulo(self.MAX_VALUE, self.INVALID_VALUE) 47 | self.assertEqual(0, m.cmp(0, 0)) 48 | self.assertEqual(0, m.cmp(0, self.MAX_VALUE + 1)) 49 | self.assertEqual(0, m.cmp(90000, self.MAX_VALUE + 1 + 90000)) 50 | self.assertEqual(-1, m.cmp(0, 1)) 51 | self.assertEqual(1, m.cmp(1, 0)) 52 | self.assertEqual(-1, m.cmp(self.MAX_VALUE, 0)) 53 | self.assertEqual(1, m.cmp(0, self.MAX_VALUE)) 54 | self.assertEqual(-1, m.cmp(0, (self.MAX_VALUE + 1) >> 1)) 55 | self.assertEqual(1, m.cmp(0, ((self.MAX_VALUE + 1) >> 1) + 1)) 56 | 57 | def testCmpRangeClosed(self): 58 | m = Modulo(self.MAX_VALUE, self.INVALID_VALUE) 59 | self.assertEqual(-1, m.cmp_range_closed(89999, 90000, 91000)) 60 | self.assertEqual(0, m.cmp_range_closed(90000, 90000, 91000)) 61 | self.assertEqual(0, m.cmp_range_closed(90001, 90000, 91000)) 62 | self.assertEqual(0, m.cmp_range_closed(90999, 90000, 91000)) 63 | self.assertEqual(0, m.cmp_range_closed(91000, 90000, 91000)) 64 | self.assertEqual(1, m.cmp_range_closed(91001, 90000, 91000)) 65 | self.assertEqual(-1, m.cmp_range_closed(self.MAX_VALUE, 0, 1)) 66 | self.assertEqual(0, m.cmp_range_closed(0, 0, 1)) 67 | self.assertEqual(0, m.cmp_range_closed(1, 0, 1)) 68 | self.assertEqual(1, m.cmp_range_closed(2, 0, 1)) 69 | self.assertEqual(1, m.cmp_range_closed(((self.MAX_VALUE + 1) >> 1) - 1, 70 | 0, 1)) 71 | self.assertEqual(-1, m.cmp_range_closed((self.MAX_VALUE + 1) >> 1, 0, 1)) 72 | 73 | def testCmpRangeClosedOpen(self): 74 | m = Modulo(self.MAX_VALUE, self.INVALID_VALUE) 75 | self.assertEqual(-1, m.cmp_range_closed_open(89999, 90000, 91000)) 76 | self.assertEqual(0, m.cmp_range_closed_open(90000, 90000, 91000)) 77 | self.assertEqual(0, m.cmp_range_closed_open(90001, 90000, 91000)) 78 | self.assertEqual(0, m.cmp_range_closed_open(90999, 90000, 91000)) 79 | self.assertEqual(1, m.cmp_range_closed_open(91000, 90000, 91000)) 80 | self.assertEqual(1, m.cmp_range_closed_open(91001, 90000, 91000)) 81 | self.assertEqual(-1, m.cmp_range_closed_open(self.MAX_VALUE, 0, 1)) 82 | self.assertEqual(0, m.cmp_range_closed_open(0, 0, 1)) 83 | self.assertEqual(1, m.cmp_range_closed_open(1, 0, 1)) 84 | self.assertEqual(1, m.cmp_range_closed_open(2, 0, 1)) 85 | self.assertEqual(1, m.cmp_range_closed_open( 86 | ((self.MAX_VALUE + 1) >> 1) - 1, 0, 1)) 87 | self.assertEqual(-1, m.cmp_range_closed_open((self.MAX_VALUE + 1) >> 1, 88 | 0, 1)) 89 | 90 | def testRangeOverlap(self): 91 | m = Modulo(self.MAX_VALUE, self.INVALID_VALUE) 92 | y1 = 1000 93 | y2 = 2000 94 | test_arr = [ 95 | # [x1, x2] covers [y1, y2] 96 | [0, 4000, True], 97 | [1000, 2000, True], 98 | [999, 2000, True], 99 | [1000, 2001, True], 100 | [999, 2001, True], 101 | # [x1, x2] is covered by [y1, y2] 102 | [1001, 1999, True], 103 | [1500, 1501, True], 104 | # y1 is overlapped 105 | [900, 1500, True], 106 | # y2 is overlapped 107 | [1500, 2100, True], 108 | # non overlap 109 | [900, 999, False], 110 | [2001, 2100, False], 111 | ] 112 | for item in test_arr: 113 | x1, x2, expected_overlap = item 114 | self.assertEqual(expected_overlap, m.range_overlap(x1, x2, y1, y2)) 115 | # overlap is a commutative operation 116 | self.assertEqual(expected_overlap, m.range_overlap(y1, y2, x1, x2)) 117 | 118 | def testAdd(self): 119 | m = Modulo(self.MAX_VALUE, self.INVALID_VALUE) 120 | self.assertEqual(0, m.add(0, 0)) 121 | self.assertEqual(123, m.add(23, 100)) 122 | self.assertEqual(100, m.add(-100, 200)) 123 | self.assertEqual(self.INVALID_VALUE, m.add(self.INVALID_VALUE, 100)) 124 | self.assertEqual(self.INVALID_VALUE, m.add(100, self.INVALID_VALUE)) 125 | self.assertEqual(self.INVALID_VALUE, m.add(self.INVALID_VALUE, 126 | self.INVALID_VALUE)) 127 | 128 | def testDiff(self): 129 | m = Modulo(self.MAX_VALUE, self.INVALID_VALUE) 130 | self.assertEqual(0, m.diff(0, 0)) 131 | self.assertEqual(23, m.diff(123, 100)) 132 | self.assertEqual(self.MAX_VALUE + 1 - 23, m.diff(100, 123)) 133 | self.assertEqual(123456, m.diff(self.MAX_VALUE, self.MAX_VALUE - 123456)) 134 | self.assertEqual(self.MAX_VALUE+1-123456, 135 | m.diff(self.MAX_VALUE - 123456, self.MAX_VALUE)) 136 | self.assertEqual(9, m.diff(m.wrap_correction(self.MAX_VALUE + 9), 137 | self.MAX_VALUE)) 138 | self.assertEqual(self.MAX_VALUE+1-9, 139 | m.diff(self.MAX_VALUE, 140 | m.wrap_correction(self.MAX_VALUE + 9))) 141 | self.assertEqual(16234, m.diff(15000, m.wrap_correction(-1234))) 142 | self.assertEqual(self.HALF_MAX_VALUE, 143 | m.diff(self.HALF_MAX_VALUE, 0)) 144 | self.assertEqual(self.MAX_VALUE + 1 - self.HALF_MAX_VALUE, 145 | m.diff(0, self.HALF_MAX_VALUE)) 146 | self.assertEqual(self.INVALID_VALUE, m.diff(self.INVALID_VALUE, 100)) 147 | self.assertEqual(self.INVALID_VALUE, m.diff(100, self.INVALID_VALUE)) 148 | self.assertEqual(self.INVALID_VALUE, 149 | m.diff(self.INVALID_VALUE, self.INVALID_VALUE)) 150 | 151 | def testSub(self): 152 | m = Modulo(self.MAX_VALUE, self.INVALID_VALUE) 153 | self.assertEqual(0, m.sub(0, 0)) 154 | self.assertEqual(23, m.sub(123, 100)) 155 | self.assertEqual(-23, m.sub(100, 123)) 156 | self.assertEqual(123456, 157 | m.sub(self.MAX_VALUE, self.MAX_VALUE - 123456)) 158 | self.assertEqual(-123456, 159 | m.sub(self.MAX_VALUE - 123456, self.MAX_VALUE)) 160 | self.assertEqual(9, m.sub(m.wrap_correction(self.MAX_VALUE + 9), 161 | self.MAX_VALUE)) 162 | self.assertEqual(-9, m.sub(self.MAX_VALUE, 163 | m.wrap_correction(self.MAX_VALUE + 9))) 164 | self.assertEqual(16234, m.sub(15000, m.wrap_correction(-1234))) 165 | self.assertEqual(self.HALF_MAX_VALUE, 166 | m.sub(self.HALF_MAX_VALUE, 0)) 167 | self.assertEqual(-self.HALF_MAX_VALUE, 168 | m.sub(0, self.HALF_MAX_VALUE)) 169 | self.assertEqual(self.INVALID_VALUE, m.sub(self.INVALID_VALUE, 100)) 170 | self.assertEqual(self.INVALID_VALUE, m.sub(100, self.INVALID_VALUE)) 171 | self.assertEqual(self.INVALID_VALUE, 172 | m.sub(self.INVALID_VALUE, self.INVALID_VALUE)) 173 | 174 | def testCompareEq(self): 175 | # Compare same value 176 | m = Modulo(self.MAX_VALUE, self.INVALID_VALUE) 177 | self.assertEqual(0, m.cmp(0, 0)) 178 | self.assertEqual(0, m.cmp(10, 10)) 179 | self.assertEqual(0, m.cmp(3423410, 3423410)) 180 | self.assertEqual(0, m.cmp(898798798, 898798798)) 181 | self.assertEqual(0, m.cmp(self.MAX_VALUE, self.MAX_VALUE)) 182 | 183 | def testCompareLarger(self): 184 | m = Modulo(self.MAX_VALUE, self.INVALID_VALUE) 185 | # first value larger than second by 1 186 | self.assertEqual(1, m.cmp(1, 0)) 187 | self.assertEqual(1, m.cmp(10, 9)) 188 | self.assertEqual(1, m.cmp(3423410, 3423409)) 189 | self.assertEqual(1, m.cmp(898798798, 898798797)) 190 | self.assertEqual(1, m.cmp(self.MAX_VALUE, self.MAX_VALUE - 1)) 191 | # first value larger than second by a lot 192 | self.assertEqual(1, m.cmp(10, 5)) 193 | self.assertEqual(1, m.cmp(3423410, 342340)) 194 | self.assertEqual(1, m.cmp(898798798, 689879877)) 195 | self.assertEqual(1, m.cmp(self.MAX_VALUE, self.MAX_VALUE - 110000)) 196 | 197 | def testCompareSmaller(self): 198 | m = Modulo(self.MAX_VALUE, self.INVALID_VALUE) 199 | # first value smaller than second by 1 200 | self.assertEqual(-1, m.cmp(0, 1)) 201 | self.assertEqual(-1, m.cmp(10, 11)) 202 | self.assertEqual(-1, m.cmp(3423410, 3423411)) 203 | self.assertEqual(-1, m.cmp(898798798, 898798799)) 204 | self.assertEqual(-1, m.cmp(self.MAX_VALUE - 1, self.MAX_VALUE)) 205 | # first value smaller than second by a lot 206 | self.assertEqual(-1, m.cmp(0, 1000)) 207 | self.assertEqual(-1, m.cmp(10, 11000)) 208 | self.assertEqual(-1, m.cmp(423410, 3423411)) 209 | self.assertEqual(-1, m.cmp(498798798, 898798799)) 210 | self.assertEqual(-1, m.cmp(self.MAX_VALUE - 100000, self.MAX_VALUE)) 211 | 212 | def testCompareWrapSmaller(self): 213 | m = Modulo(self.MAX_VALUE, self.INVALID_VALUE) 214 | # first value smaller than second with wrap around 215 | # Test edge limit on the 1st value 216 | self.assertEqual(-1, m.cmp(self.MAX_VALUE, 0)) 217 | self.assertEqual(-1, m.cmp(self.MAX_VALUE, 1)) 218 | self.assertEqual(-1, m.cmp(self.MAX_VALUE, 1100)) 219 | self.assertEqual(-1, m.cmp(self.MAX_VALUE, 99999)) 220 | 221 | # Test edge limit on the 2nd value 222 | self.assertEqual(-1, m.cmp(self.MAX_VALUE - 1, 0)) 223 | self.assertEqual(-1, m.cmp(self.MAX_VALUE - 2, 0)) 224 | self.assertEqual(-1, m.cmp(self.MAX_VALUE - 1000, 0)) 225 | self.assertEqual(-1, m.cmp(self.MAX_VALUE - 9999, 0)) 226 | 227 | # Test close to edge limit on the 2nd value 228 | self.assertEqual(-1, m.cmp(self.MAX_VALUE - 1, 1)) 229 | self.assertEqual(-1, m.cmp(self.MAX_VALUE - 2, 1)) 230 | self.assertEqual(-1, m.cmp(self.MAX_VALUE - 1000, 1)) 231 | self.assertEqual(-1, m.cmp(self.MAX_VALUE - 9999, 1)) 232 | 233 | self.assertEqual(-1, m.cmp(self.MAX_VALUE - 1, 134234)) 234 | self.assertEqual(-1, m.cmp(self.MAX_VALUE - 2, 43213123)) 235 | self.assertEqual(-1, m.cmp(self.MAX_VALUE - 1000, 212321)) 236 | self.assertEqual(-1, m.cmp(self.MAX_VALUE - 9999, 7842341)) 237 | 238 | def testCompareWrapBigger(self): 239 | m = Modulo(self.MAX_VALUE, self.INVALID_VALUE) 240 | # first value bigger than second with wrap around 241 | # Test edge limit on the 2nd value 242 | self.assertEqual(1, m.cmp(0, self.MAX_VALUE)) 243 | self.assertEqual(1, m.cmp(1, self.MAX_VALUE)) 244 | self.assertEqual(1, m.cmp(1100, self.MAX_VALUE)) 245 | self.assertEqual(1, m.cmp(999999, self.MAX_VALUE)) 246 | 247 | # Test edge limit on the first value 248 | self.assertEqual(1, m.cmp(0, self.MAX_VALUE - 1)) 249 | self.assertEqual(1, m.cmp(0, self.MAX_VALUE - 2)) 250 | self.assertEqual(1, m.cmp(0, self.MAX_VALUE - 1000)) 251 | self.assertEqual(1, m.cmp(0, self.MAX_VALUE - 9999)) 252 | 253 | # Test close to edge limit on the first value 254 | self.assertEqual(1, m.cmp(1, self.MAX_VALUE - 1)) 255 | self.assertEqual(1, m.cmp(1, self.MAX_VALUE - 2)) 256 | self.assertEqual(1, m.cmp(1, self.MAX_VALUE - 1000)) 257 | self.assertEqual(1, m.cmp(1, self.MAX_VALUE - 9999)) 258 | 259 | self.assertEqual(1, m.cmp(13434, self.MAX_VALUE - 1)) 260 | self.assertEqual(1, m.cmp(134234234, self.MAX_VALUE - 2)) 261 | self.assertEqual(1, m.cmp(342341, self.MAX_VALUE - 1000)) 262 | self.assertEqual(1, m.cmp(743451, self.MAX_VALUE - 9999)) 263 | 264 | def testMapIntoSameTimeline(self): 265 | m = Modulo(self.MAX_VALUE, self.INVALID_VALUE) 266 | x_list = [ 267 | -self.HALF_MAX_VALUE, -self.HALF_MAX_VALUE + 1, 268 | -self.HALF_MAX_VALUE + 3, -self.HALF_MAX_VALUE / 2, 269 | -self.HALF_MAX_VALUE / 4, -self.HALF_MAX_VALUE / 8, 270 | -256, -100, -10, -3, -1, 0, 1, 3, 10, 100, 256, 271 | self.HALF_MAX_VALUE / 8, self.HALF_MAX_VALUE / 4, 272 | self.HALF_MAX_VALUE / 2, self.HALF_MAX_VALUE - 3, 273 | self.HALF_MAX_VALUE - 1, self.HALF_MAX_VALUE, 274 | ] 275 | ref_list = [ 276 | 0, 1, 3, 10, 100, 256, 277 | self.MAX_VALUE/16-1, self.MAX_VALUE/16, self.MAX_VALUE/16+1, 278 | self.MAX_VALUE/8-1, self.MAX_VALUE/8, self.MAX_VALUE/8+1, 279 | self.MAX_VALUE/4-1, self.MAX_VALUE/4, self.MAX_VALUE/4+1, 280 | self.MAX_VALUE/2-1, self.MAX_VALUE/2, self.MAX_VALUE/2+1, 281 | self.MAX_VALUE-1, self.MAX_VALUE, 282 | ] 283 | 284 | for r in ref_list: 285 | for x in x_list: 286 | v = m.wrap_correction(r + x) 287 | self.assertGreaterEqual(v, 0) 288 | self.assertLessEqual(v, self.MAX_VALUE) 289 | mapped_value = m.map_into_same_timeline(v, r) 290 | self.assertEqual(r + x, mapped_value) 291 | 292 | 293 | if __name__ == '__main__': 294 | unittest.main() 295 | 296 | -------------------------------------------------------------------------------- /packet_dumper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright 2017 Google Inc. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | """Packet dumper.""" 19 | 20 | 21 | import subprocess 22 | import sys 23 | 24 | from packet_info import PacketInfo 25 | from trace_info import TraceInfo 26 | 27 | 28 | class PacketDumper(object): 29 | """A class used to cherry-pick data from a packet trace (tshark).""" 30 | 31 | def __init__(self, tshark_bin, infile, outfile, analysis_type, debug): 32 | self._tshark_bin = tshark_bin 33 | self._infile = infile 34 | self._outfile = outfile 35 | self._analysis_type = analysis_type 36 | self._debug = debug 37 | 38 | def create_command(self): 39 | """Create the right tshark command.""" 40 | tshark_opts = ['-n', '-T', 'fields', '-E', 'separator=;'] 41 | # required to get absolute (raw) tcp seq numbers 42 | tshark_opts += ['-o', 'tcp.relative_sequence_numbers: false'] 43 | tshark_opts += ['-e', 'frame.time_epoch'] 44 | tshark_opts += ['-e', 'ip.proto'] 45 | tshark_opts += ['-e', 'ip.src'] 46 | tshark_opts += ['-e', 'ip.dst'] 47 | tshark_opts += ['-e', 'ip.len'] 48 | tshark_opts += ['-e', 'tcp.srcport'] 49 | tshark_opts += ['-e', 'tcp.dstport'] 50 | tshark_opts += ['-e', 'tcp.seq'] 51 | tshark_opts += ['-e', 'tcp.len'] 52 | tshark_opts += ['-e', 'tcp.nxtseq'] 53 | tshark_opts += ['-e', 'tcp.ack'] 54 | tshark_opts += ['-e', 'tcp.flags.syn'] 55 | tshark_opts += ['-e', 'tcp.options.timestamp.tsval'] 56 | tshark_opts += ['-e', 'tcp.options.timestamp.tsecr'] 57 | command = [self._tshark_bin] + tshark_opts + ['-r', self._infile] 58 | return command 59 | 60 | def parse_line(self, line): 61 | """Parses the output of a tshark line.""" 62 | try: 63 | (timestamp, ip_proto, ip_src, ip_dst, ip_len, 64 | sport, dport, tcp_seq, tcp_len, tcp_nxtseq, tcp_ack, 65 | tcp_flags_syn, tcp_tsval, tcp_tsecr) = line[:-1].split(';') 66 | except ValueError: 67 | sys.stderr.write('discarding line = "%s"\n' % line) 68 | raise 69 | timestamp = float(timestamp) 70 | # if there are multiple IP values, use the last one 71 | if ',' in ip_proto: 72 | ip_proto = ip_proto.split(',')[-1] 73 | ip_proto = int(ip_proto) 74 | if ',' in ip_src: 75 | ip_src = ip_src.split(',')[-1] 76 | if ',' in ip_dst: 77 | ip_dst = ip_dst.split(',')[-1] 78 | if ',' in ip_len: 79 | ip_len = ip_len.split(',')[-1] 80 | ip_len = int(ip_len) 81 | # sanitize tcp values 82 | tcp_seq = int(tcp_seq) 83 | tcp_len = int(tcp_len) 84 | tcp_nxtseq = int(tcp_nxtseq) if tcp_nxtseq else None 85 | tcp_ack = int(tcp_ack) if tcp_ack else None 86 | tcp_flags_syn = int(tcp_flags_syn) 87 | tcp_tsval = int(tcp_tsval) 88 | tcp_tsecr = int(tcp_tsecr) 89 | return PacketInfo(timestamp, ip_proto, ip_src, ip_dst, ip_len, 90 | sport, dport, tcp_seq, tcp_len, tcp_nxtseq, tcp_ack, 91 | tcp_flags_syn, tcp_tsval, tcp_tsecr) 92 | 93 | def run(self): 94 | # prepare the output fd 95 | # we cannot use controlled execution (`with open(...) as f:`) as we want 96 | # to support sys.stdout too. 97 | f = (open(self._outfile, 'w+') if self._outfile != sys.stdout else 98 | sys.stdout) 99 | try: 100 | # init trace info object 101 | trace_info = TraceInfo(f, self._analysis_type, self._debug) 102 | # run command 103 | command = self.create_command() 104 | if self._debug > 0: 105 | sys.stderr.write(' '.join(command) + '\n') 106 | proc = subprocess.Popen(command, stdout=subprocess.PIPE) 107 | # process the output 108 | for line in iter(proc.stdout.readline, ''): 109 | try: 110 | packet = self.parse_line(line) 111 | except ValueError: 112 | continue 113 | trace_info.process_packet(packet) 114 | # clean up trace object 115 | del trace_info 116 | finally: 117 | if self._outfile != sys.stdout: 118 | f.close() 119 | -------------------------------------------------------------------------------- /packet_info.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright 2017 Google Inc. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | """Class containing info about a packet.""" 19 | 20 | 21 | class PacketInfo(object): 22 | """A class containing a summary about a packet.""" 23 | 24 | def __init__(self, timestamp, ip_proto, ip_src, ip_dst, ip_len, 25 | sport, dport, tcp_seq, tcp_len, tcp_nxtseq, tcp_ack, 26 | tcp_flags_syn, tcp_tsval, tcp_tsecr): 27 | self.timestamp = timestamp 28 | self.ip_proto = ip_proto 29 | self.ip_src = ip_src 30 | self.ip_dst = ip_dst 31 | self.ip_len = ip_len 32 | self.sport = sport 33 | self.dport = dport 34 | self.tcp_seq = tcp_seq 35 | self.tcp_len = tcp_len 36 | self.tcp_nxtseq = tcp_nxtseq 37 | self.tcp_ack = tcp_ack 38 | self.tcp_flags_syn = tcp_flags_syn 39 | self.tcp_tsval = tcp_tsval 40 | self.tcp_tsecr = tcp_tsecr 41 | -------------------------------------------------------------------------------- /plotter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright 2017 Google Inc. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | """Plotting code.""" 19 | 20 | 21 | from functools import partial 22 | import sys 23 | 24 | from common import decimal_fmt 25 | import matplotlib.gridspec as gridspec 26 | import matplotlib.pyplot as plt 27 | import matplotlib.ticker as ticker 28 | import numpy as np 29 | import pandas as pd 30 | 31 | 32 | MAX_SEPARATE = 5 33 | CUT_VALUE_HEAD_SECS = 0.01 34 | CUT_VALUE_TAIL_SECS = 0.1 35 | NUM_BINS = 50 36 | NUM_LARGEST_BINS = 10 37 | MIN_SEPARATION = 0.05 38 | MIN_FLOW_GOODPUT = 5000000 39 | 40 | NUM_MEAN_MARKERS = 10 41 | 42 | # per-experiment, per-direction color/marker 43 | DIR_CONN_COLOR_D = { 44 | 'delta1': { 45 | 'fwd': ['darkblue', 'x'], 46 | 'rev': ['darkgreen', 'x'], 47 | }, 48 | 'delta2': { 49 | 'fwd': ['darkblue', 'x'], 50 | 'rev': ['darkgreen', 'x'], 51 | }, 52 | 'delta3': { 53 | 'fwd': ['blue', 'x'], 54 | 'rev': ['green', 'x'], 55 | }, 56 | 'delta4': { 57 | 'fwd': ['blue', 'x'], 58 | 'rev': ['green', 'x'], 59 | }, 60 | } 61 | 62 | # per-connection color/marker 63 | IP_CONN_COLOR_D = { 64 | 0: ['g', 'o'], 65 | 1: ['r', 'v'], 66 | 2: ['c', '^'], 67 | 3: ['m', '>'], 68 | 4: ['y', '<'], 69 | 5: ['k', 'o'], 70 | 6: ['g', 'v'], 71 | 7: ['r', '^'], 72 | 8: ['c', '>'], 73 | 9: ['m', '<'], 74 | 'remaining': ['b', 'x'], 75 | } 76 | 77 | 78 | class Plotter(object): 79 | """Class that processes analyzed files and plots them.""" 80 | 81 | def __init__(self, infile, outfile, analysis_type, plot_format, 82 | plot_title, src_reverse, debug): 83 | self._infile = infile 84 | self._outfile = outfile 85 | self._analysis_type = analysis_type 86 | self._plot_format = plot_format 87 | self._plot_title = plot_title 88 | self._src_reverse = src_reverse 89 | self._debug = debug 90 | milli = 1e-3 91 | self._format_milli = ticker.FuncFormatter( 92 | lambda y, pos: '{0:g}'.format(y / milli)) 93 | kilo = 1e+3 94 | self._format_kilo = ticker.FuncFormatter( 95 | lambda y, pos: '{0:g}'.format(y / kilo)) 96 | mega = 1e+6 97 | self._format_mega = ticker.FuncFormatter( 98 | lambda y, pos: '{0:g}'.format(y / mega)) 99 | cent = 100 100 | self._format_percent = ticker.FuncFormatter( 101 | lambda y, pos: '{0:g}'.format(y * cent)) 102 | 103 | def run(self): 104 | """Plot a result file obtained from the pcap analysis.""" 105 | df = self.read_input() 106 | if self._analysis_type == 'flow': 107 | self.flow_process_data(df) 108 | elif self._analysis_type == 'packet': 109 | self.packet_process_data(df) 110 | 111 | def read_input(self): 112 | """Read an input file into a pandas dataframe.""" 113 | # prepare the input fd 114 | # we cannot use controlled execution (`with open(...) as f:`) as we want 115 | # to support sys.stdin too. 116 | f = (open(self._infile, 'r') if self._infile != sys.stdin else sys.stdin) 117 | try: 118 | if self._analysis_type == 'flow': 119 | df = self.flow_read_input(f) 120 | elif self._analysis_type == 'packet': 121 | df = self.packet_read_input(f) 122 | finally: 123 | if self._infile != sys.stdin: 124 | f.close() 125 | return df 126 | 127 | def flow_read_input(self, f): 128 | """Read input file into a pandas dataframe (flow type).""" 129 | lst = [] 130 | i = 0 131 | for line in f: 132 | try: 133 | (connhash, first_ts, last_ts, 134 | ip_proto, 135 | tcp_seq_syn_sport, tcp_seq_syn_dport, 136 | ip_total_pkt, ip_total_bytes, 137 | pps, ip_bitrate, tcp_bytes, 138 | tcp_goodput_bytes, tcp_goodput_bitrate, 139 | delta1_small_mean, 140 | delta1_small_median, 141 | delta1_large_mean, 142 | delta1_large_median) = line.split() 143 | except ValueError: 144 | sys.stderr.write('discarding line = "%s"\n' % line) 145 | continue 146 | if line[0] == '#': 147 | # this is a comment 148 | continue 149 | if self._debug > 0: 150 | sys.stderr.write('%s\n' % line) 151 | if pps == '-' or ip_bitrate == '-': 152 | continue 153 | lst += [[i, connhash, float(first_ts), float(last_ts), 154 | int(ip_proto), 155 | tcp_seq_syn_sport, tcp_seq_syn_dport, 156 | int(ip_total_pkt), int(ip_total_bytes), 157 | float(pps), float(ip_bitrate), int(tcp_bytes), 158 | int(tcp_goodput_bytes), float(tcp_goodput_bitrate), 159 | float(delta1_small_mean), float(delta1_small_median), 160 | float(delta1_large_mean), float(delta1_large_median)]] 161 | i += 1 162 | df = pd.DataFrame(lst, columns=['order', 'connhash', 'first_ts', 'last_ts', 163 | 'ip_proto', 164 | 'tcp_seq_syn_sport', 'tcp_seq_syn_dport', 165 | 'ip_total_pkt', 'ip_total_bytes', 166 | 'pps', 'ip_bitrate', 'tcp_bytes', 167 | 'tcp_goodput_bytes', 'tcp_goodput_bitrate', 168 | 'delta1_small_mean', 'delta1_small_median', 169 | 'delta1_large_mean', 'delta1_large_median']) 170 | return df 171 | 172 | def packet_read_input(self, f): 173 | """Read input file into a pandas dataframe (packet type).""" 174 | lst = [] 175 | i = 0 176 | for line in f: 177 | try: 178 | t, timestamp, src, dst, delta, traffic = line.split() 179 | except ValueError: 180 | sys.stderr.write('discarding line = "%s"\n' % line) 181 | continue 182 | if line[0] == '#': 183 | # this is a comment 184 | continue 185 | if self._debug > 0: 186 | sys.stderr.write('%s\n' % line) 187 | lst += [[i, t, float(timestamp), src, dst, float(delta), traffic]] 188 | i += 1 189 | df = pd.DataFrame(lst, columns=['order', 'type', 'timestamp', 'src', 'dst', 190 | 'delta', 'traffic']) 191 | return df 192 | 193 | def flow_process_data(self, df): 194 | """Process a pandas dataframe (flow mode).""" 195 | # create the matplotlib figure 196 | fig = plt.figure(figsize=(9, 7)) 197 | # ax_pps = fig.add_subplot(5, 1, 1) 198 | ax_tcp_rate = fig.add_subplot(4, 1, 1) 199 | ax_delta1 = fig.add_subplot(4, 1, 2) 200 | ax_tcp_total = fig.add_subplot(4, 1, 3) 201 | ax_tcp_extra_bytes = fig.add_subplot(4, 1, 4) 202 | # ax_ip_rate = fig.add_subplot(4, 1, 4) 203 | 204 | # shift x axis 205 | time_shift = float(df[:1].first_ts) 206 | format_shift = ticker.FuncFormatter( 207 | lambda x, pos: '{0:g}'.format(x - time_shift)) 208 | for ax in (ax_tcp_rate, ax_delta1, ax_tcp_total, ax_tcp_extra_bytes): 209 | ax.xaxis.set_major_formatter(format_shift) 210 | xticks = ax.get_xticks() + (time_shift - int(time_shift)) 211 | ax.set_xticks(xticks) 212 | xmin = float(df.first_ts[:1]) 213 | xmax = float(df.first_ts[-1:]) 214 | if xmin < xmax: 215 | extra_space = (xmax - xmin) * .05 / 2 216 | ax.set_xlim(xmin - extra_space, xmax + extra_space) 217 | 218 | # scale y axis 219 | # ax_pps.yaxis.set_major_formatter(self._format_kilo) 220 | ax_tcp_rate.yaxis.set_major_formatter(self._format_mega) 221 | ax_tcp_total.yaxis.set_major_formatter(self._format_mega) 222 | ax_tcp_extra_bytes.yaxis.set_major_formatter(self._format_percent) 223 | ax_delta1.yaxis.set_major_formatter(self._format_milli) 224 | # ax_ip_rate.yaxis.set_major_formatter(self._format_mega) 225 | 226 | # ax_delta1.plot(df.first_ts, df.delta1_large_mean, 227 | # linestyle='', marker='v', 228 | # color='g', markersize=3) 229 | 230 | # select tcp flows only 231 | df_tcp = df[(df.ip_proto == 6)] 232 | label, color, marker = 'tcp', 'b', 'x' 233 | # ax_pps.plot(df_tcp.first_ts, df_tcp.ip_total_pkt, 234 | # label=label, linestyle='', marker=marker, 235 | # color=color, markersize=3) 236 | 237 | # plot TCP flow goodput 238 | ax_tcp_rate.plot(df_tcp.first_ts, df_tcp.tcp_goodput_bitrate, 239 | label=label, linestyle='', marker=marker, 240 | color=color, markersize=3) 241 | tcp_goodput_quantile_01 = df_tcp.tcp_goodput_bitrate.quantile(q=0.01) 242 | ax_tcp_rate.axhline(y=tcp_goodput_quantile_01, color='g', 243 | ls='dotted', lw=0.5) 244 | tcp_goodput_quantile_50 = df_tcp.tcp_goodput_bitrate.quantile(q=0.50) 245 | ax_tcp_rate.axhline(y=tcp_goodput_quantile_50, color='g', 246 | ls='dashed', lw=0.5) 247 | tcp_goodput_quantile_99 = df_tcp.tcp_goodput_bitrate.quantile(q=0.99) 248 | ax_tcp_rate.axhline(y=tcp_goodput_quantile_99, color='g', 249 | ls='dotted', lw=0.5) 250 | # zoom on around the median 251 | ax_tcp_rate.set_ylim([0, 10 * tcp_goodput_quantile_50]) 252 | # add a label with the median 253 | ax_tcp_rate.text(time_shift, tcp_goodput_quantile_50, 254 | decimal_fmt(tcp_goodput_quantile_50, 'bps'), 255 | fontsize='x-small') 256 | 257 | # plot flow media delta1 258 | ax_delta1.plot(df_tcp.first_ts, df_tcp.delta1_large_median, 259 | linestyle='', marker='x', 260 | color='b', markersize=3) 261 | delta1_quantile_01 = df_tcp.delta1_large_median.quantile(q=0.01) 262 | ax_delta1.axhline(y=delta1_quantile_01, color='g', ls='dotted', lw=0.5) 263 | delta1_quantile_50 = df_tcp.delta1_large_median.quantile(q=0.50) 264 | ax_delta1.axhline(y=delta1_quantile_50, color='g', ls='dashed', lw=0.5) 265 | delta1_quantile_99 = df_tcp.delta1_large_median.quantile(q=0.99) 266 | ax_delta1.axhline(y=delta1_quantile_99, color='g', ls='dotted', lw=0.5) 267 | # zoom on around the median 268 | ax_delta1.set_ylim([0, 10 * delta1_quantile_50]) 269 | # add a label with the median 270 | ax_delta1.text(time_shift, delta1_quantile_50, 271 | '%s' % decimal_fmt(delta1_quantile_50, 'sec'), 272 | fontsize='x-small') 273 | 274 | # plot flow goodput (absolute) 275 | ax_tcp_total.plot(df_tcp.first_ts, df_tcp.tcp_goodput_bytes, 276 | label=label, linestyle='', marker=marker, 277 | color=color, markersize=3) 278 | tcp_bytes_quantile_50 = df_tcp.tcp_goodput_bytes.quantile(q=0.50) 279 | ax_tcp_total.axhline(y=tcp_bytes_quantile_50, color='g', 280 | ls='dashed', lw=0.5) 281 | tcp_extra_percent = ((df_tcp.tcp_bytes - df_tcp.tcp_goodput_bytes) / 282 | df_tcp.tcp_goodput_bytes) 283 | ax_tcp_extra_bytes.plot(df_tcp.first_ts, tcp_extra_percent, 284 | label=label, linestyle='', marker=marker, 285 | color=color, markersize=3) 286 | ax_tcp_extra_bytes.axhline(y=0, color='k', ls='solid', lw=0.5) 287 | ax_tcp_extra_bytes.axhline(y=tcp_extra_percent.mean(), color='g', 288 | ls='dashed', lw=0.5) 289 | 290 | # ax_ip_rate.plot(df_tcp.first_ts, df_tcp.ip_bitrate, 291 | # label=label, linestyle='', marker=marker, 292 | # color=color, markersize=3) 293 | total_line = 'total { flows: %s pkt: %s ip_bytes: %s }' % ( 294 | decimal_fmt(len(df_tcp), ''), 295 | decimal_fmt(sum(df_tcp['ip_total_pkt']), 'pkt'), 296 | decimal_fmt(sum(df_tcp['ip_total_bytes']), 'B')) 297 | tcp_flows_over_threshold = len( 298 | df_tcp[(df_tcp.tcp_goodput_bitrate > MIN_FLOW_GOODPUT)]) 299 | total_line += '\ntcp_goodput { median: %s percent_over_%s: %f } ' % ( 300 | decimal_fmt(tcp_goodput_quantile_50, 'bps'), 301 | decimal_fmt(MIN_FLOW_GOODPUT, 'bps'), 302 | 100.0 * tcp_flows_over_threshold / len(df_tcp)) 303 | total_line += '\ndelta1 { median: %s } ' % ( 304 | decimal_fmt(delta1_quantile_50, 'sec')) 305 | 306 | ax_tcp_extra_bytes.set_xlabel('Flow Start (sec) -- ' + total_line, 307 | fontsize='small') 308 | # ax_pps.set_ylabel('Flow Throughput (Kpps)') 309 | ax_tcp_rate.set_ylabel('Flow Goodput\n(Mbps)') 310 | ax_delta1.set_ylabel('Flow Median\ndelta1 (msec)') 311 | ax_tcp_total.set_ylabel('Flow Goodput\n(MB)') 312 | ax_tcp_extra_bytes.set_ylabel('Flow Extra\nTCP Bytes (%)') 313 | # ax_ip_rate.set_ylabel('Flow IP Throughput (Mbps)') 314 | # ax_tcp_total.legend() 315 | ax_tcp_rate.set_title(self._plot_title) 316 | plt.savefig(self._outfile, format=self._plot_format) 317 | 318 | def packet_process_data(self, df): 319 | """Process a pandas dataframe (packet mode).""" 320 | # create the matplotlib figure 321 | fig = plt.figure(figsize=(9, 7)) 322 | fig.subplots_adjust(hspace=.4) 323 | fig.canvas.set_window_title('packet_process_data') 324 | 325 | outer_grid = gridspec.GridSpec(2, 2) 326 | layout = [ 327 | ((0, 0), 'delta1', 'time', '-'), 328 | ((0, 1), 'delta2', 'time', '-'), 329 | # ((1, 0), 'delta3', 'time', '-'), 330 | ((1, 0), 'delta4', 'distro', 'data'), 331 | ((1, 1), 'delta4', 'distro', 'ack'), 332 | ] 333 | 334 | # split the data depending on the direction 335 | def match_direction(reverse, x): 336 | addr = x['src'] 337 | if ':' in addr: 338 | addr, _ = addr.split(':', 1) 339 | if not reverse or not addr.startswith(reverse): 340 | return 'fwd' 341 | else: 342 | return 'rev' 343 | bound_match_direction = partial(match_direction, self._src_reverse) 344 | df['dir'] = df.apply(bound_match_direction, axis=1) 345 | 346 | ax = {} 347 | subplot_spec = {} 348 | for (position, delta, graph, traffic) in layout: 349 | ax[delta] = {} 350 | subplot_spec[delta] = {} 351 | # get the data frame to analyze here 352 | data = {} 353 | for direction in ('fwd', 'rev'): 354 | data[direction] = df[(df.dir == direction) & (df.type == delta)] 355 | if delta == 'delta4': 356 | # remove the heads of the trains (hystart_ack_delta in tcp_cubic.c) 357 | data[direction] = data[direction][(data[direction].delta < 0.002)] 358 | # print the data frames 359 | # ax[delta][graph] = fig.add_subplot(4, 2, position) 360 | subplot_spec[delta][graph] = outer_grid[position[0], position[1]] 361 | ax[delta][graph] = plt.subplot(subplot_spec[delta][graph]) 362 | 363 | if graph == 'time': 364 | # print the time series 365 | ax[delta][graph] = self.add_timeseries_graph( 366 | delta, ax[delta][graph], subplot_spec[delta][graph], data) 367 | elif graph == 'distro': 368 | # print the distribution 369 | self.add_distribution_graph(delta, ax[delta][graph], data, traffic) 370 | 371 | # main title 372 | plt.suptitle(self._plot_title, fontsize='x-small') 373 | 374 | # synchronize the y axes for delta1 and delta2 375 | ymin_l = [] 376 | ymax_l = [] 377 | for delta in ('delta1', 'delta2'): 378 | for vax in ax[delta]['time']: 379 | ymin, ymax = vax.get_ylim() 380 | ymin_l.append(ymin) 381 | ymax_l.append(ymax) 382 | ymin = min(ymin_l) 383 | ymax = max(ymax_l) 384 | for delta in ('delta1', 'delta2'): 385 | for vax in ax[delta]['time']: 386 | vax.set_ylim(ymin, ymax) 387 | # add the legend 388 | ax['delta1']['time'][1].legend(prop={'size': 'xx-small'}) 389 | 390 | plt.savefig(self._outfile, format=self._plot_format) 391 | 392 | def add_timeseries_graph(self, delta, _, subplot_spec, data): 393 | """Print the time series.""" 394 | total_line = '%s' % delta 395 | time_shift = {} 396 | 397 | # ensure there is at least some non-empty dataframes 398 | # pylint: disable=g-explicit-length-test 399 | if all([len(df) == 0 for df in data.values()]): 400 | print 'error: no actual data for %s' % delta 401 | return 402 | # pylint: enable=g-explicit-length-test 403 | 404 | # split the plot in 2 uneven parts 405 | inner_grid = gridspec.GridSpecFromSubplotSpec(1, 5, subplot_spec) 406 | axl = plt.subplot(inner_grid[0, 0]) 407 | axl.xaxis.set_ticks_position('none') 408 | axl.yaxis.set_ticks_position('left') 409 | axr = plt.subplot(inner_grid[0, 1:]) 410 | axr.yaxis.set_ticks_position('right') 411 | axr.tick_params(labeltop='off', labelright='off') 412 | 413 | for i in range(len(data)): 414 | df_local = data.values()[i] 415 | if len(df_local) == 0: # pylint: disable=g-explicit-length-test 416 | continue 417 | direction = data.keys()[i] 418 | color, marker = DIR_CONN_COLOR_D[delta][direction] 419 | 420 | # get the time series label 421 | label = '%s "src %s %s"' % (direction, '==' if direction == 'rev' 422 | else '!=', self._src_reverse) 423 | # get x-axis shift 424 | time_shift[direction] = float(df_local[:1].timestamp) 425 | df_all = df_local 426 | 427 | # for (src, dst, color, marker) in separate_conn_l: 428 | # df_conn = df_all[(df_all.src == src) & (df_all.dst == dst)] 429 | # label = '%s->%s' % (df_conn.src.iloc[0], df_conn.dst.iloc[0]) 430 | # ax[delta]['time'].plot(df_conn.timestamp, df_conn.delta, 431 | # label=label, linestyle='-', marker=marker, 432 | # color=color, markersize=3) 433 | # # remove the connection 434 | # df_all = df_all[(df_all.src != src) | (df_all.dst != dst)] 435 | 436 | # print the time series 437 | axr.plot(df_all.timestamp, df_all.delta, 438 | linestyle='', marker=marker, 439 | label=label, 440 | color=color, markersize=3) 441 | 442 | # calculate and plot the per-second averages 443 | x_l = [] 444 | y_l = [] 445 | for d in np.array_split(df_all, NUM_MEAN_MARKERS): 446 | if len(d) == 0: # pylint: disable=g-explicit-length-test 447 | continue 448 | x_l.append(d.timestamp.mean()) 449 | y_l.append(d.delta.mean()) 450 | axr.plot(x_l, y_l, color='w', marker='*', markeredgecolor=color) 451 | 452 | # calculate the delta percentiles 453 | delta_quantile_50 = df_local.delta.quantile(q=0.50) 454 | delta_mean = df_local.delta.mean() 455 | delta_stddev = df_local.delta.std() 456 | 457 | # print delta percentile lines 458 | axr.axhline(y=delta_quantile_50, color=color, ls='dashed', lw=0.5) 459 | axr.axhline(y=delta_mean, color=color, ls='dotted', lw=0.5) 460 | total_line += ' %s { avg: %s median: %s stddev: %s }\n' % ( 461 | direction, 462 | decimal_fmt(delta_mean, 'sec'), 463 | decimal_fmt(delta_quantile_50, 'sec'), 464 | decimal_fmt(delta_stddev, 'sec')) 465 | 466 | # print a boxplot 467 | bp_data = [df['delta'] for df in data.values()] 468 | bp = axl.boxplot(bp_data, sym='k+', 469 | notch=True, 470 | bootstrap=5000, 471 | patch_artist=True) 472 | 473 | # change the names of the distro ticks 474 | plt.setp(axl, xticklabels=data.keys()) 475 | 476 | # mark the medians in white 477 | plt.setp(bp['medians'], color='white') 478 | # add a mark for the average (mean) 479 | for i in range(len(data.values())): 480 | df_local = data.values()[i] 481 | if len(df_local) == 0: # pylint: disable=g-explicit-length-test 482 | continue 483 | med = bp['medians'][i] 484 | delta_mean = df_local.delta.mean() 485 | axl.plot([np.average(med.get_xdata())], [delta_mean], 486 | color='w', marker='*', markeredgecolor='k') 487 | 488 | i = 0 489 | for obj in bp['boxes']: 490 | direction = data.keys()[i] 491 | color, marker = DIR_CONN_COLOR_D[delta][direction] 492 | plt.setp(obj, color=color) 493 | i += 1 494 | i = 0 495 | for obj in bp['whiskers'] + bp['caps']: 496 | direction = data.keys()[(i / 2) % 2] 497 | color, marker = DIR_CONN_COLOR_D[delta][direction] 498 | plt.setp(obj, color=color, ls='solid', lw=0.5) 499 | i += 1 500 | 501 | # shift x axis 502 | min_time_shift = min(time_shift.values()) 503 | format_shift = ticker.FuncFormatter( 504 | lambda x, pos: '{0:g}'.format(x - min_time_shift)) 505 | axr.xaxis.set_major_formatter(format_shift) 506 | xticks = axr.get_xticks() + (min_time_shift - int(min_time_shift)) 507 | axr.set_xticks(xticks) 508 | # pylint: disable=g-explicit-length-test 509 | xmin = min([(float(d.timestamp[:1]) if len(d) > 0 else float('nan')) 510 | for d in data.values()]) 511 | xmax = max([(float(d.timestamp[-1:]) if len(d) > 0 else float('nan')) 512 | for d in data.values()]) 513 | # pylint: enable=g-explicit-length-test 514 | extra_space = (xmax - xmin) * .05 / 2 515 | axr.set_xlim(xmin - extra_space, xmax + extra_space) 516 | 517 | # plot labels 518 | axr.set_xlabel('trace timestamp (sec) -- ' + total_line, 519 | fontsize='xx-small') 520 | axl.set_ylabel('%s value (sec)' % delta, fontsize='x-small') 521 | axl.tick_params(axis='both', which='major', labelsize=10) 522 | axl.tick_params(axis='both', which='minor', labelsize=8) 523 | axr.tick_params(axis='both', which='major', labelsize=10) 524 | axr.tick_params(axis='both', which='minor', labelsize=8) 525 | return (axl, axr) 526 | 527 | def add_distribution_graph(self, delta, ax, data, traffic): 528 | """Print the time series.""" 529 | for i in range(len(data)): 530 | df_local = data.values()[i] 531 | if traffic != '-': 532 | df_local = df_local[df_local.traffic == traffic] 533 | # pylint: disable=g-explicit-length-test 534 | if len(df_local.delta.values) == 0: 535 | continue 536 | # pylint: enable=g-explicit-length-test 537 | direction = data.keys()[i] 538 | color, _ = DIR_CONN_COLOR_D[delta][direction] 539 | 540 | # # 1. print the head distro 541 | # df_head = df_local[df_local.delta < CUT_VALUE_HEAD_SECS] 542 | # if len(df_head) >= 1: 543 | # n, bins, _ = ax[delta]['head'].hist(df_head.delta.values, 544 | # NUM_BINS, histtype='step', 545 | # cumulative=False) 546 | # # set the xlim before adding the percentiles 547 | # ax[delta]['head'].set_xlim(0, bins[-1]) 548 | 549 | # 2. print the tail distro 550 | _, _, _ = ax.hist(df_local.delta.values, NUM_BINS, 551 | histtype='step', cumulative=False, 552 | color=color) 553 | 554 | # 3. calculate the delta percentiles 555 | delta_quantile_50 = df_local.delta.quantile(q=0.50) 556 | delta_quantile_99 = df_local.delta.quantile(q=0.99) 557 | 558 | # print the delta percentile lines 559 | ax.axvline(x=delta_quantile_50, color=color, ls='dashed', lw=0.5) 560 | ax.axvline(x=delta_quantile_99, color=color, ls='dotted', lw=0.5) 561 | 562 | # plot labels 563 | # ax[delta]['head'].set_xlabel('%s value (sec)' % delta, 564 | # fontsize='xx-small') 565 | # ax[delta]['head'].set_ylabel('Head PDF (absolute)', fontsize='x-small') 566 | ax.set_xlabel('%s %s value (sec)' % (traffic, delta), fontsize='xx-small') 567 | ax.set_ylabel('Tail PDF (absolute)', fontsize='x-small') 568 | ax.tick_params(axis='both', which='major', labelsize=10) 569 | ax.tick_params(axis='both', which='minor', labelsize=8) 570 | 571 | # # capture up to the n largest bins in the tail 572 | # if len(np.where(bins > CUT_VALUE_TAIL_SECS)[0]) >= 1: 573 | # # trace has packets with delta in the tail 574 | # index = np.where(bins > CUT_VALUE_TAIL_SECS)[0][0] 575 | # argarray = n[index:].argsort()[-NUM_LARGEST_BINS:] 576 | # ymax_remaining = n[index + argarray[-1]] 577 | # ax[delta]['tail'].set_ylim(0, ymax_remaining * 1.5) 578 | # # add lines for the largest bins 579 | # xlist = [] 580 | # for i in argarray[::-1]: 581 | # yval = n[index + i] 582 | # xval = bins[index + i] 583 | # # discard values too close to previous values 584 | # if any([abs(x - xval) < MIN_SEPARATION for x in xlist]): 585 | # continue 586 | # xlist += [xval] 587 | # ax[delta]['tail'].axvline(x=xval, color='r', ls='dotted') 588 | # ax[delta]['tail'].text(xval, yval, 589 | # 'delta: %s' % decimal_fmt(xval, 'sec'), 590 | # fontsize='x-small') 591 | # ax[delta]['time'].axhline(y=xval, color='r', ls='dotted') 592 | 593 | # # print the min_delta(pkt_len) distribution 594 | # tcp_len_list = df_local.tcp_len.unique() 595 | # tcp_len_list.sort() 596 | # delta_list = [] 597 | # for tcp_len in tcp_len_list: 598 | # delta_list.append(min(df_local[df_local.tcp_len == tcp_len].delta)) 599 | # ax[delta]['length'].loglog(tcp_len_list, delta_list, 600 | # linestyle='', marker='x', 601 | # color=color, markersize=3) 602 | # ax[delta]['length'].axhline(y=delta_min, color=color, ls='dotted') 603 | 604 | # # plot labels 605 | # ax[delta]['length'].set_xlabel('Packet size (bytes)', 606 | # fontsize='x-small') 607 | # ax[delta]['length'].set_ylabel('min %s time (secs)' % delta, 608 | # fontsize='x-small') 609 | 610 | def get_most_popular_connections(self, df, delta, max_conn): 611 | """Select the `max_conn` most popular connections based on `delta`.""" 612 | separate_conn_l = [] 613 | i = 0 614 | df_all = df 615 | for i in range(max_conn): 616 | if len(df_all) < 1: 617 | break 618 | order = df_all[delta].argmax() 619 | src = df_all.src[order] 620 | dst = df_all.dst[order] 621 | try: 622 | color, marker = IP_CONN_COLOR_D[i] 623 | except KeyError: 624 | color, marker = IP_CONN_COLOR_D['remaining'] 625 | separate_conn_l += [[src, dst, color, marker]] 626 | # remove the connection 627 | df_all = df_all[(df_all.src != src) | (df_all.dst != dst)] 628 | i += 1 629 | return separate_conn_l 630 | -------------------------------------------------------------------------------- /rttcp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright 2017 Google Inc. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | """Simple tcp flow aggregation.""" 19 | 20 | 21 | import argparse 22 | import os.path 23 | import sys 24 | 25 | from common import __version__ 26 | from packet_dumper import PacketDumper 27 | from plotter import Plotter 28 | 29 | 30 | def get_options(argv): 31 | """Generic option parser. 32 | 33 | Args: 34 | argv: list containing arguments 35 | 36 | Returns: 37 | argparse.ArgumentParser - generated option object 38 | """ 39 | # init parser 40 | parser = argparse.ArgumentParser(description='rttcp flow aggregator.') 41 | subparsers = parser.add_subparsers() 42 | # independent sub-commands 43 | parser_help = subparsers.add_parser('help', help='show help screen') 44 | parser_help.set_defaults(subcommand='help') 45 | parser_anal = subparsers.add_parser('analyze', help='analyze pcap file') 46 | parser_anal.set_defaults(subcommand='analyze') 47 | parser_plot = subparsers.add_parser('plot', help='plot analysis file') 48 | parser_plot.set_defaults(subcommand='plot') 49 | # common arguments 50 | for p in (parser, parser_anal, parser_plot): 51 | p.add_argument('-d', '--debug', action='count', 52 | dest='debug', default=0, 53 | help='Increase verbosity (use multiple times for more)',) 54 | p.add_argument('--quiet', action='store_const', 55 | dest='debug', const=-1, 56 | help='Zero verbosity',) 57 | p.add_argument('-v', '--version', action='version', 58 | version=__version__) 59 | p.add_argument('--tshark', dest='tshark', 60 | default='tshark', 61 | metavar='TSHARK', 62 | help='tshark binary',) 63 | p.add_argument('-i', '--input', dest='infile', default=None, 64 | metavar='INPUT-FILE', 65 | help='input file',) 66 | p.add_argument('-o', '--output', dest='outfile', default=None, 67 | metavar='OUTPUT-FILE', 68 | help='output file',) 69 | p.add_argument('--type', action='store', 70 | dest='analysis_type', default='flow', 71 | metavar='ANALYSIS_TYPE', 72 | help='set the analysis type (flow, packet)') 73 | p.add_argument('--src-reverse', dest='src_reverse', default=None, 74 | metavar='SRC-REVERSE', 75 | help='any packet from a src definition (cidr) as reverse',) 76 | # plot-only arguments 77 | parser_plot.add_argument('--title', action='store', 78 | dest='plot_title', default='', 79 | metavar='PLOT_TITLE', 80 | help='set the plot title') 81 | parser_plot.add_argument('--format', action='store', 82 | dest='plot_format', default='pdf', 83 | metavar='PLOT_FORMAT', 84 | help='set the plot format') 85 | # do the parsing 86 | options = parser.parse_args(argv[1:]) 87 | if options.subcommand == 'help': 88 | parser.print_help() 89 | sys.exit(0) 90 | return options 91 | 92 | 93 | def main(argv): 94 | # parse options 95 | options = get_options(argv) 96 | # get infile/outfile 97 | if options.infile in (None, '-'): 98 | options.infile = sys.stdin 99 | else: 100 | # ensure file exists 101 | assert os.path.isfile(options.infile), ( 102 | 'File %s does not exist' % options.infile) 103 | if options.outfile in (None, '-'): 104 | options.outfile = sys.stdout 105 | # print results 106 | if options.debug > 0: 107 | sys.stderr.write('%s\n' % options) 108 | # do something 109 | if options.subcommand == 'analyze': 110 | packet_dumper = PacketDumper(options.tshark, 111 | options.infile, 112 | options.outfile, 113 | options.analysis_type, 114 | options.debug) 115 | packet_dumper.run() 116 | 117 | elif options.subcommand == 'plot': 118 | plotter = Plotter(options.infile, 119 | options.outfile, 120 | options.analysis_type, 121 | options.plot_format, 122 | options.plot_title, 123 | options.src_reverse, 124 | options.debug) 125 | plotter.run() 126 | 127 | 128 | if __name__ == '__main__': 129 | main(sys.argv) 130 | -------------------------------------------------------------------------------- /trace_info.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright 2017 Google Inc. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | """Class containing info about a full trace.""" 19 | 20 | 21 | import collections 22 | import sys 23 | 24 | from common import endpoint_cmp 25 | from connection_info import ConnectionInfo 26 | 27 | 28 | class TraceInfo(object): 29 | """A class containing a summary about a full packet trace.""" 30 | 31 | ANALYSIS_TYPES = ['flow', 'packet'] 32 | 33 | def __init__(self, f, analysis_type, debug=0): 34 | self._f = f 35 | assert analysis_type in self.ANALYSIS_TYPES 36 | self._analysis_type = analysis_type 37 | self._debug = debug 38 | self._conn = collections.OrderedDict() 39 | self._f.write(ConnectionInfo.header(self._analysis_type) + '\n') 40 | 41 | def __del__(self): 42 | # print connection data to out file 43 | for connhash in self._conn.keys(): 44 | self._conn[connhash].print_connection_info() 45 | 46 | @classmethod 47 | def get_hash(cls, packet): 48 | return (('%s:%s-%s:%s-%s' % (packet.ip_src, packet.sport, packet.ip_dst, 49 | packet.dport, packet.ip_proto)) 50 | if (endpoint_cmp(packet.ip_src, packet.sport, packet.ip_dst, 51 | packet.dport) <= 0) else 52 | ('%s:%s-%s:%s-%s' % (packet.ip_dst, packet.dport, packet.ip_src, 53 | packet.sport, packet.ip_proto))) 54 | 55 | def process_packet(self, packet): 56 | """Process a packet.""" 57 | # get a 4-tuple hash 58 | connhash = self.get_hash(packet) 59 | if self._debug > 0: 60 | sys.stderr.write('%s %s %s %s %s %s %s\n' % ( 61 | connhash, packet.ip_src, packet.ip_dst, packet.sport, packet.dport, 62 | packet.timestamp, packet.ip_len)) 63 | # only process tcp, udp, and sctp packets 64 | if (packet.ip_proto != 6 and packet.ip_proto != 17 and 65 | packet.ip_proto != 132): 66 | return 67 | # process the packet 68 | if connhash not in self._conn: 69 | self._conn[connhash] = ConnectionInfo(self._analysis_type, 70 | connhash, self._f, self._debug) 71 | self._conn[connhash].process_packet(packet) 72 | --------------------------------------------------------------------------------