├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ └── test.yml ├── .gitignore ├── LICENSE.txt ├── README.md ├── requirements.txt ├── src ├── MANIFEST.in ├── README.txt ├── pyshark │ ├── __init__.py │ ├── cache.py │ ├── capture │ │ ├── __init__.py │ │ ├── capture.py │ │ ├── file_capture.py │ │ ├── inmem_capture.py │ │ ├── live_capture.py │ │ ├── live_ring_capture.py │ │ ├── pipe_capture.py │ │ └── remote_capture.py │ ├── config.ini │ ├── config.py │ ├── ek_field_mapping.py │ ├── packet │ │ ├── __init__.py │ │ ├── common.py │ │ ├── consts.py │ │ ├── fields.py │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── ek_layer.py │ │ │ ├── json_layer.py │ │ │ └── xml_layer.py │ │ ├── packet.py │ │ └── packet_summary.py │ └── tshark │ │ ├── __init__.py │ │ ├── output_parser │ │ ├── __init__.py │ │ ├── base_parser.py │ │ ├── tshark_ek.py │ │ ├── tshark_json.py │ │ └── tshark_xml.py │ │ └── tshark.py ├── setup.py └── tox.ini └── tests ├── capture ├── test_capture.py ├── test_inmem_capture.py └── test_live_capture.py ├── conftest.py ├── data ├── capture_test.pcapng ├── ek_field_mapping.json ├── packet.json ├── packet.xml └── packet_ek.json ├── packet └── test_fields.py ├── test_basic_parsing.py ├── test_cap_operations.py ├── test_ek_field_mapping.py ├── test_packet_operations.py └── tshark ├── test_tshark.py ├── test_tshark_ek.py ├── test_tshark_json.py └── test_tshark_xml.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 16 | **Expected behavior** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **Versions (please complete the following information):** 20 | - OS: [e.g. Windows/Linux/OSX] 21 | - pyshark version: 22 | - tshark version: [see with tshark --version] 23 | 24 | **Example pcap / packet** 25 | If applicable, add an example pcap file as an attachment, or post the packet as a hex string or a JSON/XML (export packet dissection in wireshark/tshark). 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: run-tests 2 | on: [push, pull_request] 3 | jobs: 4 | build: 5 | runs-on: ubuntu-latest 6 | strategy: 7 | matrix: 8 | python-version: ["3.11", "3.10", "3.9", "3.8", "3.7",] 9 | 10 | steps: 11 | - name: Chckout code 12 | uses: actions/checkout@v3 13 | - name: Set up Python ${{ matrix.python-version }} 14 | uses: actions/setup-python@v3 15 | with: 16 | python-version: ${{ matrix.python-version }} 17 | - name: Install dependencies 18 | run: | 19 | sudo apt update 20 | sudo apt install tshark 21 | python -m pip install --upgrade pip 22 | pip install pytest 23 | pip install -e ./src/ 24 | 25 | - name: Test with pytest 26 | run: | 27 | python -m pytest -v 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ################# 2 | ## Eclipse 3 | ################# 4 | 5 | *.pydevproject 6 | .project 7 | .metadata 8 | bin/ 9 | tmp/ 10 | *.tmp 11 | *.bak 12 | *.swp 13 | *~.nib 14 | local.properties 15 | .classpath 16 | .settings/ 17 | .loadpath 18 | 19 | # External tool builders 20 | .externalToolBuilders/ 21 | 22 | # Locally stored "Eclipse launch configurations" 23 | *.launch 24 | 25 | # CDT-specific 26 | .cproject 27 | 28 | # PDT-specific 29 | .buildpath 30 | 31 | 32 | ################# 33 | ## Visual Studio 34 | ################# 35 | 36 | ## Ignore Visual Studio temporary files, build results, and 37 | ## files generated by popular Visual Studio add-ons. 38 | 39 | # User-specific files 40 | *.suo 41 | *.user 42 | *.sln.docstates 43 | 44 | # Build results 45 | 46 | [Dd]ebug/ 47 | [Rr]elease/ 48 | x64/ 49 | build/ 50 | [Bb]in/ 51 | [Oo]bj/ 52 | 53 | # MSTest test Results 54 | [Tt]est[Rr]esult*/ 55 | [Bb]uild[Ll]og.* 56 | 57 | *_i.c 58 | *_p.c 59 | *.ilk 60 | *.meta 61 | *.obj 62 | *.pch 63 | *.pdb 64 | *.pgc 65 | *.pgd 66 | *.rsp 67 | *.sbr 68 | *.tlb 69 | *.tli 70 | *.tlh 71 | *.tmp 72 | *.tmp_proj 73 | *.log 74 | *.vspscc 75 | *.vssscc 76 | .builds 77 | *.pidb 78 | *.log 79 | *.scc 80 | 81 | # Visual C++ cache files 82 | ipch/ 83 | *.aps 84 | *.ncb 85 | *.opensdf 86 | *.sdf 87 | *.cachefile 88 | 89 | # Visual Studio profiler 90 | *.psess 91 | *.vsp 92 | *.vspx 93 | 94 | # Guidance Automation Toolkit 95 | *.gpState 96 | 97 | # ReSharper is a .NET coding add-in 98 | _ReSharper*/ 99 | *.[Rr]e[Ss]harper 100 | 101 | # TeamCity is a build add-in 102 | _TeamCity* 103 | 104 | # DotCover is a Code Coverage Tool 105 | *.dotCover 106 | 107 | # NCrunch 108 | *.ncrunch* 109 | .*crunch*.local.xml 110 | 111 | # Installshield output folder 112 | [Ee]xpress/ 113 | 114 | # DocProject is a documentation generator add-in 115 | DocProject/buildhelp/ 116 | DocProject/Help/*.HxT 117 | DocProject/Help/*.HxC 118 | DocProject/Help/*.hhc 119 | DocProject/Help/*.hhk 120 | DocProject/Help/*.hhp 121 | DocProject/Help/Html2 122 | DocProject/Help/html 123 | 124 | # Click-Once directory 125 | publish/ 126 | 127 | # Publish Web Output 128 | *.Publish.xml 129 | *.pubxml 130 | 131 | # NuGet Packages Directory 132 | ## TODO: If you have NuGet Package Restore enabled, uncomment the next line 133 | #packages/ 134 | 135 | # Windows Azure Build Output 136 | csx 137 | *.build.csdef 138 | 139 | # Windows Store app package directory 140 | AppPackages/ 141 | 142 | # Others 143 | sql/ 144 | *.Cache 145 | ClientBin/ 146 | [Ss]tyle[Cc]op.* 147 | ~$* 148 | *~ 149 | *.dbmdl 150 | *.[Pp]ublish.xml 151 | *.pfx 152 | *.publishsettings 153 | 154 | # RIA/Silverlight projects 155 | Generated_Code/ 156 | 157 | # Backup & report files from converting an old project file to a newer 158 | # Visual Studio version. Backup files are not needed, because we have git ;-) 159 | _UpgradeReport_Files/ 160 | Backup*/ 161 | UpgradeLog*.XML 162 | UpgradeLog*.htm 163 | 164 | # SQL Server files 165 | App_Data/*.mdf 166 | App_Data/*.ldf 167 | 168 | ############# 169 | ## Windows detritus 170 | ############# 171 | 172 | # Windows image file caches 173 | Thumbs.db 174 | ehthumbs.db 175 | 176 | # Folder config file 177 | Desktop.ini 178 | 179 | # Recycle Bin used on file shares 180 | $RECYCLE.BIN/ 181 | 182 | # Mac crap 183 | .DS_Store 184 | 185 | 186 | ############# 187 | ## Python 188 | ############# 189 | 190 | *.py[co] 191 | 192 | # Packages 193 | *.egg 194 | *.egg-info 195 | dist/ 196 | build/ 197 | eggs/ 198 | parts/ 199 | var/ 200 | sdist/ 201 | develop-eggs/ 202 | .installed.cfg 203 | 204 | # Installer logs 205 | pip-log.txt 206 | 207 | # Unit test / coverage reports 208 | .coverage 209 | .tox 210 | 211 | #Translations 212 | *.mo 213 | 214 | #Mr Developer 215 | .mr.developer.cfg 216 | 217 | # Pycharm 218 | .idea/ 219 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Dor Green 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyshark 2 | 3 | Python wrapper for tshark, allowing python packet parsing using wireshark dissectors. 4 | 5 | Extended documentation: http://kiminewt.github.io/pyshark 6 | 7 | **Looking for contributors** - for various reasons I have a hard time finding time to maintain and enhance the package at the moment. Any pull-requests will be reviewed and if any one is interested and is suitable, I will be happy to include them in the project. Feel free to mail me at dorgreen1 at gmail. 8 | 9 | There are quite a few python packet parsing modules, this one is different because it doesn't actually parse any packets, it simply uses tshark's (wireshark command-line utility) ability to export XMLs to use its parsing. 10 | 11 | This package allows parsing from a capture file or a live capture, using all wireshark dissectors you have installed. 12 | Tested on windows/linux. 13 | 14 | ## Installation 15 | 16 | ### Version support 17 | Python 3.7+ is supported. An unsupported Python 2 version exists as [pyshark-legacy](https://github.com/KimiNewt/pyshark-legacy). 18 | 19 | Supports all modern versions of tshark / wireshark but certain features may be unavailable on older versions. 20 | 21 | ### All Platforms 22 | Simply run the following to install the latest from pypi 23 | ```bash 24 | pip install pyshark 25 | ``` 26 | 27 | Or install from the git repository: 28 | ```bash 29 | git clone https://github.com/KimiNewt/pyshark.git 30 | cd pyshark/src 31 | python setup.py install 32 | ``` 33 | 34 | 35 | ### Mac OS X 36 | You may have to install libxml which can be unexpected. If you receive an error from clang or an error message about libxml, run the following: 37 | ```bash 38 | xcode-select --install 39 | pip install libxml 40 | ``` 41 | You will probably have to accept a EULA for XCode so be ready to click an "Accept" dialog in the GUI. 42 | 43 | 44 | 45 | ## Usage 46 | 47 | ### Reading from a capture file: 48 | 49 | ```python 50 | >>> import pyshark 51 | >>> cap = pyshark.FileCapture('/tmp/mycapture.cap') 52 | >>> cap 53 | 54 | >>> print cap[0] 55 | Packet (Length: 698) 56 | Layer ETH: 57 | Destination: BLANKED 58 | Source: BLANKED 59 | Type: IP (0x0800) 60 | Layer IP: 61 | Version: 4 62 | Header Length: 20 bytes 63 | Differentiated Services Field: 0x00 (DSCP 0x00: Default; ECN: 0x00: Not-ECT (Not ECN-Capable Transport)) 64 | Total Length: 684 65 | Identification: 0x254f (9551) 66 | Flags: 0x00 67 | Fragment offset: 0 68 | Time to live: 1 69 | Protocol: UDP (17) 70 | Header checksum: 0xe148 [correct] 71 | Source: BLANKED 72 | Destination: BLANKED 73 | ... 74 | ``` 75 | 76 | #### Other options 77 | 78 | * **param keep_packets**: Whether to keep packets after reading them via next(). 79 | Used to conserve memory when reading large caps. 80 | * **param input_file**: Either a path or a file-like object containing either a 81 | packet capture file (PCAP, PCAP-NG..) or a TShark xml. 82 | * **param display_filter**: A display (wireshark) filter to apply on the cap 83 | before reading it. 84 | * **param only_summaries**: Only produce packet summaries, much faster but includes 85 | very little information 86 | * **param disable_protocol**: Disable detection of a protocol (tshark > version 2) 87 | * **param decryption_key**: Key used to encrypt and decrypt captured traffic. 88 | * **param encryption_type**: Standard of encryption used in captured traffic (must 89 | be either 'WEP', 'WPA-PWD', or 'WPA-PWK'. Defaults to WPA-PWK. 90 | * **param tshark_path**: Path of the tshark binary 91 | 92 | ### Reading from a live interface: 93 | 94 | ```python 95 | >>> capture = pyshark.LiveCapture(interface='eth0') 96 | >>> capture.sniff(timeout=50) 97 | >>> capture 98 | 99 | >>> capture[3] 100 | 101 | 102 | for packet in capture.sniff_continuously(packet_count=5): 103 | print('Just arrived:', packet) 104 | ``` 105 | 106 | #### Other options 107 | 108 | * **param interface**: Name of the interface to sniff on. If not given, takes 109 | the first available. 110 | * **param bpf_filter**: BPF filter to use on packets. 111 | * **param display_filter**: Display (wireshark) filter to use. 112 | * **param only_summaries**: Only produce packet summaries, much faster but 113 | includes very little information 114 | * **param disable_protocol**: Disable detection of a protocol (tshark > version 2) 115 | * **param decryption_key**: Key used to encrypt and decrypt captured traffic. 116 | * **param encryption_type**: Standard of encryption used in captured traffic 117 | (must be either 'WEP', 'WPA-PWD', or 'WPA-PWK'. Defaults to WPA-PWK). 118 | * **param tshark_path**: Path of the tshark binary 119 | * **param output_file**: Additionally save captured packets to this file. 120 | 121 | ### Reading from a live interface using a ring buffer 122 | ```python 123 | >>> capture = pyshark.LiveRingCapture(interface='eth0') 124 | >>> capture.sniff(timeout=50) 125 | >>> capture 126 | 127 | >>> capture[3] 128 | 129 | 130 | for packet in capture.sniff_continuously(packet_count=5): 131 | print('Just arrived:', packet) 132 | ``` 133 | 134 | #### Other options 135 | * **param ring_file_size**: Size of the ring file in kB, default is 1024 136 | * **param num_ring_files**: Number of ring files to keep, default is 1 137 | * **param ring_file_name**: Name of the ring file, default is /tmp/pyshark.pcap 138 | * **param interface**: Name of the interface to sniff on. If not given, takes 139 | the first available. 140 | * **param bpf_filter**: BPF filter to use on packets. 141 | * **param display_filter**: Display (wireshark) filter to use. 142 | * **param only_summaries**: Only produce packet summaries, much faster but 143 | includes very little information 144 | * **param disable_protocol**: Disable detection of a protocol (tshark > version 2) 145 | * **param decryption_key**: Key used to encrypt and decrypt captured traffic. 146 | * **param encryption_type**: Standard of encryption used in captured traffic 147 | (must be either 'WEP', 'WPA-PWD', or 'WPA-PWK'. Defaults to WPA-PWK). 148 | * **param tshark_path**: Path of the tshark binary 149 | * **param output_file**: Additionally save captured packets to this file. 150 | 151 | ### Reading from a live remote interface: 152 | 153 | ```python 154 | >>> capture = pyshark.RemoteCapture('192.168.1.101', 'eth0') 155 | >>> capture.sniff(timeout=50) 156 | >>> capture 157 | ``` 158 | 159 | #### Other options 160 | 161 | * **param remote_host**: The remote host to capture on (IP or hostname). 162 | Should be running rpcapd. 163 | * **param remote_interface**: The remote interface on the remote machine to 164 | capture on. Note that on windows it is not the device display name but the 165 | true interface name (i.e. \\Device\\NPF_..). 166 | * **param remote_port**: The remote port the rpcapd service is listening on 167 | * **param bpf_filter**: A BPF (tcpdump) filter to apply on the cap before 168 | reading. 169 | * **param only_summaries**: Only produce packet summaries, much faster but 170 | includes very little information 171 | * **param disable_protocol**: Disable detection of a protocol (tshark > version 2) 172 | * **param decryption_key**: Key used to encrypt and decrypt captured traffic. 173 | * **param encryption_type**: Standard of encryption used in captured traffic 174 | (must be either 'WEP', 'WPA-PWD', or 'WPA-PWK'. Defaults to WPA-PWK). 175 | * **param tshark_path**: Path of the tshark binary 176 | 177 | ### Accessing packet data: 178 | 179 | Data can be accessed in multiple ways. 180 | Packets are divided into layers, first you have to reach the appropriate layer and then you can select your field. 181 | 182 | All of the following work: 183 | 184 | ```python 185 | >>> packet['ip'].dst 186 | 192.168.0.1 187 | >>> packet.ip.src 188 | 192.168.0.100 189 | >>> packet[2].src 190 | 192.168.0.100 191 | ``` 192 | 193 | To test whether a layer is in a packet, you can use its name: 194 | 195 | ```python 196 | >>> 'IP' in packet 197 | True 198 | ``` 199 | 200 | To see all possible field names, use the `packet.layer.field_names` attribute (i.e. `packet.ip.field_names`) or the autocomplete function on your interpreter. 201 | 202 | You can also get the original binary data of a field, or a pretty description of it: 203 | 204 | ```python 205 | >>> p.ip.addr.showname 206 | Source or Destination Address: 10.0.0.10 (10.0.0.10) 207 | # And some new attributes as well: 208 | >>> p.ip.addr.int_value 209 | 167772170 210 | >>> p.ip.addr.binary_value 211 | b'\n\x00\x00\n' 212 | ``` 213 | 214 | 215 | ### Decrypting packet captures 216 | 217 | Pyshark supports automatic decryption of traces using the WEP, WPA-PWD, and WPA-PSK standards (WPA-PWD is the default). 218 | 219 | ```python 220 | >>> cap1 = pyshark.FileCapture('/tmp/capture1.cap', decryption_key='password') 221 | >>> cap2 = pyshark.LiveCapture(interface='wi0', decryption_key='password', encryption_type='wpa-psk') 222 | ``` 223 | 224 | A tuple of supported encryption standards, SUPPORTED_ENCRYPTION_STANDARDS, 225 | exists in each capture class. 226 | 227 | ```python 228 | >>> pyshark.FileCapture.SUPPORTED_ENCRYPTION_STANDARDS 229 | ('wep', 'wpa-pwd', 'wpa-psk') 230 | >>> pyshark.LiveCapture.SUPPORTED_ENCRYPTION_STANDARDS 231 | ('wep', 'wpa-pwd', 'wpa-psk') 232 | ``` 233 | 234 | ### Reading from a file using a display filter 235 | 236 | Pyshark display filters can be helpful in analyzing application focused traffic. 237 | BPF filters do not offer as much flexibility as Wireshark's display filters. 238 | 239 | ```python 240 | >>> cap1 = pyshark.FileCapture('/tmp/capture1.cap', display_filter="dns") 241 | >>> cap2 = pyshark.LiveCapture(interface='en0', display_filter="tcp.analysis.retransmission") 242 | ``` 243 | ## License 244 | This project is licensed under MIT. Contributions to this project are accepted under the same license. 245 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Development requirements. Only required for testing 2 | appdirs 3 | py 4 | pytest 5 | lxml 6 | packaging 7 | -------------------------------------------------------------------------------- /src/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include pyshark/config.ini -------------------------------------------------------------------------------- /src/README.txt: -------------------------------------------------------------------------------- 1 | Python wrapper for tshark, allowing python packet parsing using wireshark dissectors. 2 | 3 | See https://github.com/KimiNewt/pyshark/ for documentation. 4 | -------------------------------------------------------------------------------- /src/pyshark/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | 4 | class UnsupportedVersionException(Exception): 5 | pass 6 | 7 | 8 | if sys.version_info[0] < 3 or (sys.version_info[0] == 3 and sys.version_info[1] < 5): 9 | raise UnsupportedVersionException("Your version of Python is unsupported. " 10 | "Pyshark requires Python >= 3.5 & Wireshark >= 2.2.0. " 11 | " Please upgrade or use pyshark-legacy, or pyshark version 0.3.8") 12 | 13 | from pyshark.capture.live_capture import LiveCapture 14 | from pyshark.capture.live_ring_capture import LiveRingCapture 15 | from pyshark.capture.file_capture import FileCapture 16 | from pyshark.capture.remote_capture import RemoteCapture 17 | from pyshark.capture.inmem_capture import InMemCapture 18 | from pyshark.capture.pipe_capture import PipeCapture -------------------------------------------------------------------------------- /src/pyshark/cache.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import shutil 3 | 4 | import appdirs 5 | 6 | 7 | def get_cache_dir(tshark_version) -> pathlib.Path: 8 | cache_dir = pathlib.Path(appdirs.user_cache_dir(appname="pyshark", version=tshark_version)) 9 | if not cache_dir.exists(): 10 | cache_dir.mkdir(parents=True) 11 | return cache_dir 12 | 13 | 14 | def clear_cache(tshark_version=None): 15 | shutil.rmtree(get_cache_dir(tshark_version)) 16 | -------------------------------------------------------------------------------- /src/pyshark/capture/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KimiNewt/pyshark/4517bdfed7a65ac9a6a31354f086e75ddd5dac3b/src/pyshark/capture/__init__.py -------------------------------------------------------------------------------- /src/pyshark/capture/capture.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import contextlib 3 | import inspect 4 | import os 5 | import threading 6 | import subprocess 7 | import concurrent.futures 8 | import sys 9 | import logging 10 | import warnings 11 | 12 | from pyshark import ek_field_mapping 13 | from pyshark.packet.packet import Packet 14 | from pyshark.tshark.output_parser import tshark_ek 15 | from pyshark.tshark.output_parser import tshark_json 16 | from pyshark.tshark.output_parser import tshark_xml 17 | from pyshark.tshark.tshark import get_process_path, get_tshark_display_filter_flag, \ 18 | tshark_supports_json, TSharkVersionException, get_tshark_version, tshark_supports_duplicate_keys 19 | 20 | 21 | if sys.version_info < (3, 8): 22 | asyncTimeoutError = concurrent.futures.TimeoutError 23 | else: 24 | asyncTimeoutError = asyncio.exceptions.TimeoutError 25 | 26 | 27 | class TSharkCrashException(Exception): 28 | pass 29 | 30 | 31 | class UnknownEncryptionStandardException(Exception): 32 | pass 33 | 34 | 35 | class RawMustUseJsonException(Exception): 36 | """If the use_raw argument is True, so should the use_json argument""" 37 | 38 | 39 | class StopCapture(Exception): 40 | """Exception that the user can throw anywhere in packet-handling to stop the capture process.""" 41 | pass 42 | 43 | 44 | class Capture: 45 | """Base class for packet captures.""" 46 | SUMMARIES_BATCH_SIZE = 64 47 | DEFAULT_LOG_LEVEL = logging.CRITICAL 48 | SUPPORTED_ENCRYPTION_STANDARDS = ["wep", "wpa-pwk", "wpa-pwd", "wpa-psk"] 49 | 50 | def __init__(self, display_filter=None, only_summaries=False, eventloop=None, 51 | decryption_key=None, encryption_type="wpa-pwd", output_file=None, 52 | decode_as=None, disable_protocol=None, tshark_path=None, 53 | override_prefs=None, capture_filter=None, use_json=False, include_raw=False, 54 | use_ek=False, custom_parameters=None, debug=False): 55 | 56 | self.loaded = False 57 | self.tshark_path = tshark_path 58 | self._override_prefs = override_prefs 59 | self.debug = debug 60 | self.use_json = use_json 61 | self._use_ek = use_ek 62 | self.include_raw = include_raw 63 | self._packets = [] 64 | self._current_packet = 0 65 | self._display_filter = display_filter 66 | self._capture_filter = capture_filter 67 | self._only_summaries = only_summaries 68 | self._output_file = output_file 69 | self._running_processes = set() 70 | self._decode_as = decode_as 71 | self._disable_protocol = disable_protocol 72 | self._log = logging.Logger( 73 | self.__class__.__name__, level=self.DEFAULT_LOG_LEVEL) 74 | self._closed = False 75 | self._custom_parameters = custom_parameters 76 | self._eof_reached = False 77 | self._last_error_line = None 78 | self._stderr_handling_tasks = [] 79 | self.__tshark_version = None 80 | 81 | if include_raw and not (use_json or use_ek): 82 | raise RawMustUseJsonException( 83 | "use_json/use_ek must be True if include_raw") 84 | 85 | if self.debug: 86 | self.set_debug() 87 | 88 | self.eventloop = eventloop 89 | if self.eventloop is None: 90 | self._setup_eventloop() 91 | if encryption_type and encryption_type.lower() in self.SUPPORTED_ENCRYPTION_STANDARDS: 92 | self.encryption = (decryption_key, encryption_type.lower()) 93 | else: 94 | standards = ", ".join(self.SUPPORTED_ENCRYPTION_STANDARDS) 95 | raise UnknownEncryptionStandardException(f"Only the following standards are supported: {standards}.") 96 | 97 | def __getitem__(self, item): 98 | """Gets the packet in the given index. 99 | 100 | :param item: packet index 101 | :return: Packet object. 102 | """ 103 | return self._packets[item] 104 | 105 | def __len__(self): 106 | return len(self._packets) 107 | 108 | def next(self) -> Packet: 109 | return self.next_packet() 110 | 111 | # Allows for child classes to call next() from super() without 2to3 "fixing" 112 | # the call 113 | def next_packet(self) -> Packet: 114 | if self._current_packet >= len(self._packets): 115 | raise StopIteration() 116 | cur_packet = self._packets[self._current_packet] 117 | self._current_packet += 1 118 | return cur_packet 119 | 120 | def clear(self): 121 | """Empties the capture of any saved packets.""" 122 | self._packets = [] 123 | self._current_packet = 0 124 | 125 | def reset(self): 126 | """Starts iterating packets from the first one.""" 127 | self._current_packet = 0 128 | 129 | def load_packets(self, packet_count=0, timeout=None): 130 | """Reads the packets from the source (cap, interface, etc.) and adds it to the internal list. 131 | 132 | If 0 as the packet_count is given, reads forever 133 | 134 | :param packet_count: The amount of packets to add to the packet list (0 to read forever) 135 | :param timeout: If given, automatically stops after a given amount of time. 136 | """ 137 | initial_packet_amount = len(self._packets) 138 | 139 | def keep_packet(pkt): 140 | self._packets.append(pkt) 141 | 142 | if packet_count != 0 and len(self._packets) - initial_packet_amount >= packet_count: 143 | raise StopCapture() 144 | 145 | try: 146 | self.apply_on_packets( 147 | keep_packet, timeout=timeout, packet_count=packet_count) 148 | self.loaded = True 149 | except asyncTimeoutError: 150 | pass 151 | 152 | def set_debug(self, set_to=True, log_level=logging.DEBUG): 153 | """Sets the capture to debug mode (or turns it off if specified).""" 154 | if set_to: 155 | handler = logging.StreamHandler(sys.stdout) 156 | handler.setFormatter(logging.Formatter( 157 | "%(asctime)s - %(name)s - %(levelname)s - %(message)s")) 158 | self._log.addHandler(handler) 159 | self._log.level = log_level 160 | self.debug = set_to 161 | 162 | def _verify_capture_parameters(self): 163 | """Optionally verify that the capture's parameters are valid. 164 | 165 | Should raise an exception if they are not valid. 166 | """ 167 | pass 168 | 169 | def _setup_eventloop(self): 170 | """Sets up a new eventloop as the current one according to the OS.""" 171 | if os.name == "nt": 172 | current_eventloop = asyncio.get_event_loop_policy().get_event_loop() 173 | if isinstance(current_eventloop, asyncio.ProactorEventLoop): 174 | self.eventloop = current_eventloop 175 | else: 176 | # On Python before 3.8, Proactor is not the default eventloop type, so we have to create a new one. 177 | # If there was an existing eventloop this can create issues, since we effectively disable it here. 178 | if asyncio.all_tasks(): 179 | warnings.warn("The running eventloop has tasks but pyshark must set a new eventloop to continue. " 180 | "Existing tasks may not run.") 181 | self.eventloop = asyncio.ProactorEventLoop() 182 | asyncio.set_event_loop(self.eventloop) 183 | else: 184 | try: 185 | self.eventloop = asyncio.get_event_loop_policy().get_event_loop() 186 | except RuntimeError: 187 | if threading.current_thread() != threading.main_thread(): 188 | # Ran not in main thread, make a new eventloop 189 | self.eventloop = asyncio.new_event_loop() 190 | asyncio.set_event_loop(self.eventloop) 191 | else: 192 | raise 193 | if os.name == "posix" and isinstance(threading.current_thread(), threading._MainThread): 194 | # The default child watchers (ThreadedChildWatcher) attach_loop method is empty! 195 | # While using pyshark with ThreadedChildWatcher, asyncio could raise a ChildProcessError 196 | # "Unknown child process pid %d, will report returncode 255" 197 | # This led to a TSharkCrashException in _cleanup_subprocess. 198 | # Using the SafeChildWatcher fixes this issue, but it is slower. 199 | # SafeChildWatcher O(n) -> large numbers of processes are slow 200 | # ThreadedChildWatcher O(1) -> independent of process number 201 | # asyncio.get_child_watcher().attach_loop(self.eventloop) 202 | asyncio.set_child_watcher(asyncio.SafeChildWatcher()) 203 | asyncio.get_child_watcher().attach_loop(self.eventloop) 204 | 205 | def _packets_from_tshark_sync(self, packet_count=None, existing_process=None): 206 | """Returns a generator of packets. 207 | 208 | This is the sync version of packets_from_tshark. It wait for the completion of each coroutine and 209 | reimplements reading packets in a sync way, yielding each packet as it arrives. 210 | 211 | :param packet_count: If given, stops after this amount of packets is captured. 212 | """ 213 | # NOTE: This has code duplication with the async version, think about how to solve this 214 | tshark_process = existing_process or self.eventloop.run_until_complete( 215 | self._get_tshark_process()) 216 | parser = self._setup_tshark_output_parser() 217 | packets_captured = 0 218 | 219 | data = b"" 220 | try: 221 | while True: 222 | try: 223 | packet, data = self.eventloop.run_until_complete( 224 | parser.get_packets_from_stream(tshark_process.stdout, data, 225 | got_first_packet=packets_captured > 0)) 226 | 227 | except EOFError: 228 | self._log.debug("EOF reached (sync)") 229 | self._eof_reached = True 230 | break 231 | 232 | if packet: 233 | packets_captured += 1 234 | yield packet 235 | if packet_count and packets_captured >= packet_count: 236 | break 237 | finally: 238 | if tshark_process in self._running_processes: 239 | self.eventloop.run_until_complete( 240 | self._cleanup_subprocess(tshark_process)) 241 | 242 | def apply_on_packets(self, callback, timeout=None, packet_count=None): 243 | """Runs through all packets and calls the given callback (a function) with each one as it is read. 244 | 245 | If the capture is infinite (i.e. a live capture), it will run forever, otherwise it will complete after all 246 | packets have been read. 247 | 248 | Example usage: 249 | def print_callback(pkt): 250 | print(pkt) 251 | capture.apply_on_packets(print_callback) 252 | 253 | If a timeout is given, raises a Timeout error if not complete before the timeout (in seconds) 254 | """ 255 | coro = self.packets_from_tshark(callback, packet_count=packet_count) 256 | if timeout is not None: 257 | coro = asyncio.wait_for(coro, timeout) 258 | return self.eventloop.run_until_complete(coro) 259 | 260 | async def packets_from_tshark(self, packet_callback, packet_count=None, close_tshark=True): 261 | """ 262 | A coroutine which creates a tshark process, runs the given callback on each packet that is received from it and 263 | closes the process when it is done. 264 | 265 | Do not use interactively. Can be used in order to insert packets into your own eventloop. 266 | """ 267 | tshark_process = await self._get_tshark_process(packet_count=packet_count) 268 | try: 269 | await self._go_through_packets_from_fd(tshark_process.stdout, packet_callback, packet_count=packet_count) 270 | except StopCapture: 271 | pass 272 | finally: 273 | if close_tshark: 274 | await self.close_async() 275 | 276 | async def _go_through_packets_from_fd(self, fd, packet_callback, packet_count=None): 277 | """A coroutine which goes through a stream and calls a given callback for each XML packet seen in it.""" 278 | packets_captured = 0 279 | self._log.debug("Starting to go through packets") 280 | 281 | parser = self._setup_tshark_output_parser() 282 | data = b"" 283 | 284 | while True: 285 | try: 286 | packet, data = await parser.get_packets_from_stream(fd, data, 287 | got_first_packet=packets_captured > 0) 288 | except EOFError: 289 | self._log.debug("EOF reached") 290 | self._eof_reached = True 291 | break 292 | 293 | if packet: 294 | packets_captured += 1 295 | try: 296 | if inspect.iscoroutinefunction(packet_callback): 297 | await packet_callback(packet) 298 | else: 299 | packet_callback(packet) 300 | except StopCapture: 301 | self._log.debug("User-initiated capture stop in callback") 302 | break 303 | 304 | if packet_count and packets_captured >= packet_count: 305 | break 306 | 307 | def _create_stderr_handling_task(self, stderr): 308 | self._stderr_handling_tasks.append(asyncio.ensure_future(self._handle_process_stderr_forever(stderr))) 309 | 310 | async def _handle_process_stderr_forever(self, stderr): 311 | while True: 312 | stderr_line = await stderr.readline() 313 | if not stderr_line: 314 | break 315 | stderr_line = stderr_line.decode().strip() 316 | self._last_error_line = stderr_line 317 | self._log.debug(stderr_line) 318 | 319 | def _get_tshark_path(self): 320 | return get_process_path(self.tshark_path) 321 | 322 | def _get_tshark_version(self): 323 | if self.__tshark_version is None: 324 | self.__tshark_version = get_tshark_version(self.tshark_path) 325 | return self.__tshark_version 326 | 327 | async def _get_tshark_process(self, packet_count=None, stdin=None): 328 | """Returns a new tshark process with previously-set parameters.""" 329 | self._verify_capture_parameters() 330 | 331 | output_parameters = [] 332 | if self.use_json or self._use_ek: 333 | if not tshark_supports_json(self._get_tshark_version()): 334 | raise TSharkVersionException( 335 | "JSON only supported on Wireshark >= 2.2.0") 336 | 337 | if self.use_json: 338 | output_type = "json" 339 | if tshark_supports_duplicate_keys(self._get_tshark_version()): 340 | output_parameters.append("--no-duplicate-keys") 341 | elif self._use_ek: 342 | output_type = "ek" 343 | else: 344 | output_type = "psml" if self._only_summaries else "pdml" 345 | parameters = [self._get_tshark_path(), "-l", "-n", "-T", output_type] + \ 346 | self.get_parameters(packet_count=packet_count) + output_parameters 347 | 348 | self._log.debug( 349 | "Creating TShark subprocess with parameters: " + " ".join(parameters)) 350 | self._log.debug("Executable: %s", parameters[0]) 351 | tshark_process = await asyncio.create_subprocess_exec(*parameters, 352 | stdout=subprocess.PIPE, 353 | stderr=subprocess.PIPE, 354 | stdin=stdin) 355 | self._create_stderr_handling_task(tshark_process.stderr) 356 | self._created_new_process(parameters, tshark_process) 357 | return tshark_process 358 | 359 | def _created_new_process(self, parameters, process, process_name="TShark"): 360 | self._log.debug( 361 | process_name + f" subprocess (pid {process.pid}) created") 362 | if process.returncode is not None and process.returncode != 0: 363 | raise TSharkCrashException( 364 | f"{process_name} seems to have crashed. Try updating it. (command ran: '{' '.join(parameters)}')") 365 | self._running_processes.add(process) 366 | 367 | async def _cleanup_subprocess(self, process): 368 | """Kill the given process and properly closes any pipes connected to it.""" 369 | self._log.debug(f"Cleanup Subprocess (pid {process.pid})") 370 | if process.returncode is None: 371 | try: 372 | process.kill() 373 | return await asyncio.wait_for(process.wait(), 1) 374 | except asyncTimeoutError: 375 | self._log.debug( 376 | "Waiting for process to close failed, may have zombie process.") 377 | except ProcessLookupError: 378 | pass 379 | except OSError: 380 | if os.name != "nt": 381 | raise 382 | elif process.returncode > 0: 383 | if process.returncode != 1 or self._eof_reached: 384 | raise TSharkCrashException(f"TShark (pid {process.pid}) seems to have crashed (retcode: {process.returncode}).\n" 385 | f"Last error line: {self._last_error_line}\n" 386 | "Try rerunning in debug mode [ capture_obj.set_debug() ] or try updating tshark.") 387 | 388 | def _setup_tshark_output_parser(self): 389 | if self.use_json: 390 | return tshark_json.TsharkJsonParser(self._get_tshark_version()) 391 | if self._use_ek: 392 | ek_field_mapping.MAPPING.load_mapping(str(self._get_tshark_version()), 393 | tshark_path=self.tshark_path) 394 | return tshark_ek.TsharkEkJsonParser() 395 | return tshark_xml.TsharkXmlParser(parse_summaries=self._only_summaries) 396 | 397 | def close(self): 398 | self.eventloop.run_until_complete(self.close_async()) 399 | 400 | async def close_async(self): 401 | for process in self._running_processes.copy(): 402 | await self._cleanup_subprocess(process) 403 | self._running_processes.clear() 404 | 405 | # Wait for all stderr handling to finish 406 | for task in self._stderr_handling_tasks: 407 | task.cancel() 408 | with contextlib.suppress(asyncio.CancelledError): 409 | await task 410 | 411 | def __del__(self): 412 | if self._running_processes: 413 | self.close() 414 | 415 | def __enter__(self): return self 416 | async def __aenter__(self): return self 417 | def __exit__(self, exc_type, exc_val, exc_tb): self.close() 418 | 419 | async def __aexit__(self, exc_type, exc_val, 420 | exc_tb): await self.close_async() 421 | 422 | def get_parameters(self, packet_count=None): 423 | """Returns the special tshark parameters to be used according to the configuration of this class.""" 424 | params = [] 425 | if self._capture_filter: 426 | params += ["-f", self._capture_filter] 427 | if self._display_filter: 428 | params += [get_tshark_display_filter_flag(self._get_tshark_version(),), 429 | self._display_filter] 430 | # Raw is only enabled when JSON is also enabled. 431 | if self.include_raw: 432 | params += ["-x"] 433 | if packet_count: 434 | params += ["-c", str(packet_count)] 435 | 436 | if self._custom_parameters: 437 | if isinstance(self._custom_parameters, list): 438 | params += self._custom_parameters 439 | elif isinstance(self._custom_parameters, dict): 440 | for key, val in self._custom_parameters.items(): 441 | params += [key, val] 442 | else: 443 | raise TypeError("Custom parameters type not supported.") 444 | 445 | if all(self.encryption): 446 | params += ["-o", "wlan.enable_decryption:TRUE", "-o", 'uat:80211_keys:"' + self.encryption[1] + '","' + 447 | self.encryption[0] + '"'] 448 | if self._override_prefs: 449 | for preference_name, preference_value in self._override_prefs.items(): 450 | if all(self.encryption) and preference_name in ("wlan.enable_decryption", "uat:80211_keys"): 451 | continue # skip if override preferences also given via --encryption options 452 | params += ["-o", f"{preference_name}:{preference_value}"] 453 | 454 | if self._output_file: 455 | params += ["-w", self._output_file] 456 | 457 | if self._decode_as: 458 | for criterion, decode_as_proto in self._decode_as.items(): 459 | params += ["-d", 460 | ",".join([criterion.strip(), decode_as_proto.strip()])] 461 | 462 | if self._disable_protocol: 463 | params += ["--disable-protocol", self._disable_protocol.strip()] 464 | 465 | return params 466 | 467 | def __iter__(self): 468 | if self.loaded: 469 | return iter(self._packets) 470 | else: 471 | return self._packets_from_tshark_sync() 472 | 473 | def __repr__(self): 474 | return f"<{self.__class__.__name__} ({len(self._packets)} packets)>" 475 | -------------------------------------------------------------------------------- /src/pyshark/capture/file_capture.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | from pyshark.capture.capture import Capture 4 | from pyshark.packet.packet import Packet 5 | 6 | 7 | class FileCapture(Capture): 8 | """A class representing a capture read from a file.""" 9 | 10 | def __init__(self, input_file=None, keep_packets=True, display_filter=None, only_summaries=False, 11 | decryption_key=None, encryption_type="wpa-pwk", decode_as=None, 12 | disable_protocol=None, tshark_path=None, override_prefs=None, 13 | use_json=False, use_ek=False, 14 | output_file=None, include_raw=False, eventloop=None, custom_parameters=None, 15 | debug=False): 16 | """Creates a packet capture object by reading from file. 17 | 18 | :param keep_packets: Whether to keep packets after reading them via next(). Used to conserve memory when reading 19 | large caps (can only be used along with the "lazy" option!) 20 | :param input_file: File path of the capture (PCAP, PCAPNG) 21 | :param display_filter: A display (wireshark) filter to apply on the cap before reading it. 22 | :param only_summaries: Only produce packet summaries, much faster but includes very little information. 23 | :param decryption_key: Optional key used to encrypt and decrypt captured traffic. 24 | :param encryption_type: Standard of encryption used in captured traffic (must be either 'WEP', 'WPA-PWD', or 25 | 'WPA-PWK'. Defaults to WPA-PWK). 26 | :param decode_as: A dictionary of {decode_criterion_string: decode_as_protocol} that are used to tell tshark 27 | to decode protocols in situations it wouldn't usually, for instance {'tcp.port==8888': 'http'} would make 28 | it attempt to decode any port 8888 traffic as HTTP. See tshark documentation for details. 29 | :param tshark_path: Path of the tshark binary 30 | :param override_prefs: A dictionary of tshark preferences to override, {PREFERENCE_NAME: PREFERENCE_VALUE, ...}. 31 | :param disable_protocol: Tells tshark to remove a dissector for a specific protocol. 32 | :param use_ek: Uses tshark in EK JSON mode. It is faster than XML but has slightly less data. 33 | :param use_json: DEPRECATED. Use use_ek instead. 34 | :param output_file: A string of a file to write every read packet into (useful when filtering). 35 | :param custom_parameters: A dict of custom parameters to pass to tshark, i.e. {"--param": "value"} 36 | or else a list of parameters in the format ["--foo", "bar", "--baz", "foo"]. 37 | """ 38 | super(FileCapture, self).__init__(display_filter=display_filter, only_summaries=only_summaries, 39 | decryption_key=decryption_key, encryption_type=encryption_type, 40 | decode_as=decode_as, disable_protocol=disable_protocol, 41 | tshark_path=tshark_path, override_prefs=override_prefs, 42 | use_json=use_json, use_ek=use_ek, output_file=output_file, 43 | include_raw=include_raw, eventloop=eventloop, 44 | custom_parameters=custom_parameters, debug=debug) 45 | self.input_filepath = pathlib.Path(input_file) 46 | if not self.input_filepath.exists(): 47 | raise FileNotFoundError(f"[Errno 2] No such file or directory: {self.input_filepath}") 48 | if not self.input_filepath.is_file(): 49 | raise FileNotFoundError(f"{self.input_filepath} is a directory") 50 | 51 | self.keep_packets = keep_packets 52 | self._packet_generator = self._packets_from_tshark_sync() 53 | 54 | def next(self) -> Packet: 55 | """Returns the next packet in the cap. 56 | 57 | If the capture's keep_packets flag is True, will also keep it in the internal packet list. 58 | """ 59 | if not self.keep_packets: 60 | return self._packet_generator.send(None) 61 | elif self._current_packet >= len(self._packets): 62 | packet = self._packet_generator.send(None) 63 | self._packets += [packet] 64 | return super(FileCapture, self).next_packet() 65 | 66 | def __getitem__(self, packet_index): 67 | if not self.keep_packets: 68 | raise NotImplementedError("Cannot use getitem if packets are not kept") 69 | # We may not yet have this packet 70 | while packet_index >= len(self._packets): 71 | try: 72 | self.next() 73 | except StopIteration: 74 | # We read the whole file, and there's still not such packet. 75 | raise KeyError(f"Packet of index {packet_index} does not exist in capture") 76 | return super(FileCapture, self).__getitem__(packet_index) 77 | 78 | def get_parameters(self, packet_count=None): 79 | return super(FileCapture, self).get_parameters(packet_count=packet_count) + [ 80 | "-r", self.input_filepath.as_posix()] 81 | 82 | def _verify_capture_parameters(self): 83 | try: 84 | with self.input_filepath.open("rb"): 85 | pass 86 | except PermissionError: 87 | raise PermissionError(f"Permission denied for file {self.input_filepath}") 88 | 89 | def __repr__(self): 90 | if self.keep_packets: 91 | return f"<{self.__class__.__name__} {self.input_filepath.as_posix()}>" 92 | else: 93 | return f"<{self.__class__.__name__} {self.input_filepath.as_posix()} ({len(self._packets)} packets)>" 94 | -------------------------------------------------------------------------------- /src/pyshark/capture/inmem_capture.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import datetime 3 | import itertools 4 | import subprocess 5 | import os 6 | import struct 7 | import time 8 | import warnings 9 | from packaging import version 10 | 11 | from pyshark.capture.capture import Capture, StopCapture 12 | 13 | DEFAULT_TIMEOUT = 30 14 | 15 | 16 | class LinkTypes(object): 17 | NULL = 0 18 | ETHERNET = 1 19 | IEEE802_5 = 6 20 | PPP = 9 21 | IEEE802_11 = 105 22 | 23 | 24 | class InMemCapture(Capture): 25 | 26 | def __init__(self, bpf_filter=None, display_filter=None, only_summaries=False, 27 | decryption_key=None, encryption_type='wpa-pwk', decode_as=None, 28 | disable_protocol=None, tshark_path=None, override_prefs=None, use_json=False, use_ek=False, 29 | linktype=LinkTypes.ETHERNET, include_raw=False, eventloop=None, custom_parameters=None, 30 | debug=False): 31 | """Creates a new in-mem capture, a capture capable of receiving binary packets and parsing them using tshark. 32 | 33 | Significantly faster if packets are added in a batch. 34 | 35 | :param bpf_filter: BPF filter to use on packets. 36 | :param display_filter: Display (wireshark) filter to use. 37 | :param only_summaries: Only produce packet summaries, much faster but includes very little information 38 | :param decryption_key: Key used to encrypt and decrypt captured traffic. 39 | :param encryption_type: Standard of encryption used in captured traffic (must be either 'WEP', 'WPA-PWD', 40 | or 'WPA-PWK'. Defaults to WPA-PWK). 41 | :param decode_as: A dictionary of {decode_criterion_string: decode_as_protocol} that are used to tell tshark 42 | to decode protocols in situations it wouldn't usually, for instance {'tcp.port==8888': 'http'} would make 43 | it attempt to decode any port 8888 traffic as HTTP. See tshark documentation for details. 44 | :param tshark_path: Path of the tshark binary 45 | :param override_prefs: A dictionary of tshark preferences to override, {PREFERENCE_NAME: PREFERENCE_VALUE, ...}. 46 | :param disable_protocol: Tells tshark to remove a dissector for a specifc protocol. 47 | :param custom_parameters: A dict of custom parameters to pass to tshark, i.e. {"--param": "value"} 48 | or else a list of parameters in the format ["--foo", "bar", "--baz", "foo"]. 49 | """ 50 | super(InMemCapture, self).__init__(display_filter=display_filter, only_summaries=only_summaries, 51 | decryption_key=decryption_key, encryption_type=encryption_type, 52 | decode_as=decode_as, disable_protocol=disable_protocol, 53 | tshark_path=tshark_path, override_prefs=override_prefs, 54 | use_json=use_json, use_ek=use_ek, 55 | include_raw=include_raw, eventloop=eventloop, 56 | custom_parameters=custom_parameters, debug=debug) 57 | self.bpf_filter = bpf_filter 58 | self._packets_to_write = None 59 | self._current_linktype = linktype 60 | self._current_tshark = None 61 | 62 | def get_parameters(self, packet_count=None): 63 | """Returns the special tshark parameters to be used according to the configuration of this class.""" 64 | params = super(InMemCapture, self).get_parameters( 65 | packet_count=packet_count) 66 | params += ['-i', '-'] 67 | return params 68 | 69 | async def _get_tshark_process(self, packet_count=None): 70 | if self._current_tshark: 71 | return self._current_tshark 72 | proc = await super(InMemCapture, self)._get_tshark_process(packet_count=packet_count, stdin=subprocess.PIPE) 73 | self._current_tshark = proc 74 | 75 | # Create PCAP header 76 | header = struct.pack("IHHIIII", 0xa1b2c3d4, 2, 4, 77 | 0, 0, 0x7fff, self._current_linktype) 78 | proc.stdin.write(header) 79 | 80 | return proc 81 | 82 | def _get_json_separators(self): 83 | """"Returns the separators between packets in a JSON output 84 | 85 | Returns a tuple of (packet_separator, end_of_file_separator, characters_to_disregard). 86 | The latter variable being the number of characters to ignore in order to pass the packet (i.e. extra newlines, 87 | commas, parenthesis). 88 | """ 89 | if self._get_tshark_version() >= version.parse("2.6.7"): 90 | return f"{os.linesep} }}".encode(), f"}}{os.linesep}]".encode(), 0 91 | else: 92 | return f'}}{os.linesep}{os.linesep}'.encode(), f"}}{os.linesep}{os.linesep}]", 1 93 | 94 | def _write_packet(self, packet, sniff_time): 95 | if sniff_time is None: 96 | now = time.time() 97 | elif isinstance(sniff_time, datetime.datetime): 98 | now = sniff_time.timestamp() 99 | else: 100 | now = float(sniff_time) 101 | secs = int(now) 102 | usecs = int((now * 1000000) % 1000000) 103 | # Write packet header 104 | self._current_tshark.stdin.write(struct.pack( 105 | "IIII", secs, usecs, len(packet), len(packet))) 106 | self._current_tshark.stdin.write(packet) 107 | 108 | def parse_packet(self, binary_packet, sniff_time=None, timeout=DEFAULT_TIMEOUT): 109 | """Parses a single binary packet and returns its parsed version. 110 | 111 | DOES NOT CLOSE tshark. It must be closed manually by calling close() when you're done 112 | working with it. 113 | Use parse_packets when parsing multiple packets for faster parsing 114 | """ 115 | if sniff_time is not None: 116 | sniff_time = [sniff_time] 117 | return self.parse_packets([binary_packet], sniff_time, timeout)[0] 118 | 119 | def parse_packets(self, binary_packets, sniff_times=None, timeout=DEFAULT_TIMEOUT): 120 | """Parses binary packets and return a list of parsed packets. 121 | 122 | DOES NOT CLOSE tshark. It must be closed manually by calling close() when you're done 123 | working with it. 124 | """ 125 | if self.eventloop is None: 126 | self._setup_eventloop() 127 | return self.eventloop.run_until_complete(self.parse_packets_async(binary_packets, sniff_times, timeout)) 128 | 129 | async def parse_packets_async(self, binary_packets, sniff_times=None, timeout=DEFAULT_TIMEOUT): 130 | """A coroutine which parses binary packets and return a list of parsed packets. 131 | 132 | DOES NOT CLOSE tshark. It must be closed manually by calling close() when you're done 133 | working with it. 134 | """ 135 | parsed_packets = [] 136 | if sniff_times is None: 137 | sniff_times = [] 138 | if not self._current_tshark: 139 | await self._get_tshark_process() 140 | for binary_packet, sniff_time in itertools.zip_longest(binary_packets, sniff_times): 141 | self._write_packet(binary_packet, sniff_time) 142 | 143 | def callback(pkt): 144 | parsed_packets.append(pkt) 145 | if len(parsed_packets) == len(binary_packets): 146 | raise StopCapture() 147 | 148 | await self._get_parsed_packet_from_tshark(callback, timeout) 149 | return parsed_packets 150 | 151 | async def _get_parsed_packet_from_tshark(self, callback, timeout): 152 | await self._current_tshark.stdin.drain() 153 | try: 154 | await asyncio.wait_for(self.packets_from_tshark(callback, close_tshark=False), timeout) 155 | except asyncio.TimeoutError: 156 | await self.close_async() 157 | raise asyncio.TimeoutError("Timed out while waiting for tshark to parse packet. " 158 | "Try rerunning with cap.set_debug() to see tshark errors. " 159 | "Closing tshark..") 160 | 161 | async def close_async(self): 162 | self._current_tshark = None 163 | await super(InMemCapture, self).close_async() 164 | 165 | def feed_packet(self, binary_packet, linktype=LinkTypes.ETHERNET, timeout=DEFAULT_TIMEOUT): 166 | """ 167 | DEPRECATED. Use parse_packet instead. 168 | This function adds the packet to the packets list, and also closes and reopens tshark for 169 | each packet. 170 | ============== 171 | 172 | Gets a binary (string) packet and parses & adds it to this capture. 173 | Returns the added packet. 174 | 175 | Use feed_packets if you have multiple packets to insert. 176 | 177 | By default, assumes the packet is an ethernet packet. For another link type, supply the linktype argument (most 178 | can be found in the class LinkTypes) 179 | """ 180 | warnings.warn( 181 | "Deprecated method. Use InMemCapture.parse_packet() instead.") 182 | self._current_linktype = linktype 183 | pkt = self.parse_packet(binary_packet, timeout=timeout) 184 | self.close() 185 | self._packets.append(pkt) 186 | return pkt 187 | 188 | def feed_packets(self, binary_packets, linktype=LinkTypes.ETHERNET, timeout=DEFAULT_TIMEOUT): 189 | """Gets a list of binary packets, parses them using tshark and returns their parsed values. 190 | 191 | Keeps the packets in the internal packet list as well. 192 | 193 | By default, assumes the packets are ethernet packets. For another link type, supply the linktype argument (most 194 | can be found in the class LinkTypes) 195 | """ 196 | self._current_linktype = linktype 197 | parsed_packets = self.parse_packets(binary_packets, timeout=timeout) 198 | self._packets.extend(parsed_packets) 199 | self.close() 200 | return parsed_packets 201 | -------------------------------------------------------------------------------- /src/pyshark/capture/live_capture.py: -------------------------------------------------------------------------------- 1 | import os 2 | import asyncio 3 | import subprocess 4 | 5 | from packaging import version 6 | 7 | from pyshark.capture.capture import Capture 8 | from pyshark.tshark import tshark 9 | from pyshark.tshark.tshark import get_tshark_interfaces, get_process_path 10 | 11 | 12 | class UnknownInterfaceException(Exception): 13 | pass 14 | 15 | 16 | class LiveCapture(Capture): 17 | """Represents a live capture on a network interface.""" 18 | 19 | def __init__(self, interface=None, bpf_filter=None, display_filter=None, only_summaries=False, 20 | decryption_key=None, encryption_type='wpa-pwk', output_file=None, decode_as=None, 21 | disable_protocol=None, tshark_path=None, override_prefs=None, capture_filter=None, 22 | monitor_mode=False, use_json=False, use_ek=False, 23 | include_raw=False, eventloop=None, custom_parameters=None, 24 | debug=False): 25 | """Creates a new live capturer on a given interface. Does not start the actual capture itself. 26 | 27 | :param interface: Name of the interface to sniff on or a list of names (str). If not given, runs on all interfaces. 28 | :param bpf_filter: BPF filter to use on packets. 29 | :param display_filter: Display (wireshark) filter to use. 30 | :param only_summaries: Only produce packet summaries, much faster but includes very little information 31 | :param decryption_key: Optional key used to encrypt and decrypt captured traffic. 32 | :param encryption_type: Standard of encryption used in captured traffic (must be either 'WEP', 'WPA-PWD', or 33 | 'WPA-PWK'. Defaults to WPA-PWK). 34 | :param output_file: Additionally save live captured packets to this file. 35 | :param decode_as: A dictionary of {decode_criterion_string: decode_as_protocol} that are used to tell tshark 36 | to decode protocols in situations it wouldn't usually, for instance {'tcp.port==8888': 'http'} would make 37 | it attempt to decode any port 8888 traffic as HTTP. See tshark documentation for details. 38 | :param tshark_path: Path of the tshark binary 39 | :param override_prefs: A dictionary of tshark preferences to override, {PREFERENCE_NAME: PREFERENCE_VALUE, ...}. 40 | :param capture_filter: Capture (wireshark) filter to use. 41 | :param disable_protocol: Tells tshark to remove a dissector for a specifc protocol. 42 | :param use_ek: Uses tshark in EK JSON mode. It is faster than XML but has slightly less data. 43 | :param use_json: DEPRECATED. Use use_ek instead. 44 | :param custom_parameters: A dict of custom parameters to pass to tshark, i.e. {"--param": "value"} or 45 | else a list of parameters in the format ["--foo", "bar", "--baz", "foo"]. 46 | """ 47 | super(LiveCapture, self).__init__(display_filter=display_filter, only_summaries=only_summaries, 48 | decryption_key=decryption_key, encryption_type=encryption_type, 49 | output_file=output_file, decode_as=decode_as, disable_protocol=disable_protocol, 50 | tshark_path=tshark_path, override_prefs=override_prefs, 51 | capture_filter=capture_filter, use_json=use_json, use_ek=use_ek, 52 | include_raw=include_raw, 53 | eventloop=eventloop, custom_parameters=custom_parameters, 54 | debug=debug) 55 | self.bpf_filter = bpf_filter 56 | self.monitor_mode = monitor_mode 57 | 58 | all_interfaces = get_tshark_interfaces(tshark_path) 59 | if interface is None: 60 | self.interfaces = all_interfaces 61 | elif isinstance(interface, str): 62 | self.interfaces = [interface] 63 | else: 64 | self.interfaces = interface 65 | 66 | def get_parameters(self, packet_count=None): 67 | """Returns the special tshark parameters to be used according to the configuration of this class.""" 68 | params = super(LiveCapture, self).get_parameters(packet_count=packet_count) 69 | # Read from STDIN 70 | params += ["-i", "-"] 71 | return params 72 | 73 | def _verify_capture_parameters(self): 74 | all_interfaces_names = tshark.get_all_tshark_interfaces_names(self.tshark_path) 75 | all_interfaces_lowercase = [interface.lower() for interface in all_interfaces_names] 76 | for each_interface in self.interfaces: 77 | if each_interface.startswith("rpcap://"): 78 | continue 79 | if each_interface.isnumeric(): 80 | continue 81 | if each_interface.lower() not in all_interfaces_lowercase: 82 | raise UnknownInterfaceException( 83 | f"Interface '{each_interface}' does not exist, unable to initiate capture. " 84 | f"Perhaps permissions are missing?\n" 85 | f"Possible interfaces: {os.linesep.join(all_interfaces_names)}") 86 | 87 | def _get_dumpcap_parameters(self): 88 | # Don't report packet counts. 89 | params = ["-q"] 90 | if self._get_tshark_version() < version.parse("2.5.0"): 91 | # Tshark versions older than 2.5 don't support pcapng. This flag forces dumpcap to output pcap. 92 | params += ["-P"] 93 | if self.bpf_filter: 94 | params += ["-f", self.bpf_filter] 95 | if self.monitor_mode: 96 | params += ["-I"] 97 | for interface in self.interfaces: 98 | params += ["-i", interface] 99 | # Write to STDOUT 100 | params += ["-w", "-"] 101 | return params 102 | 103 | async def _get_tshark_process(self, packet_count=None, stdin=None): 104 | read, write = os.pipe() 105 | 106 | dumpcap_params = [get_process_path(process_name="dumpcap", tshark_path=self.tshark_path)] + self._get_dumpcap_parameters() 107 | 108 | self._log.debug("Creating Dumpcap subprocess with parameters: %s", " ".join(dumpcap_params)) 109 | dumpcap_process = await asyncio.create_subprocess_exec(*dumpcap_params, stdout=write, 110 | stderr=subprocess.PIPE) 111 | self._create_stderr_handling_task(dumpcap_process.stderr) 112 | self._created_new_process(dumpcap_params, dumpcap_process, process_name="Dumpcap") 113 | 114 | tshark = await super(LiveCapture, self)._get_tshark_process(packet_count=packet_count, stdin=read) 115 | return tshark 116 | 117 | # Backwards compatibility 118 | sniff = Capture.load_packets 119 | 120 | def sniff_continuously(self, packet_count=None): 121 | """Captures from the set interface, returning a generator which returns packets continuously. 122 | 123 | Can be used as follows: 124 | for packet in capture.sniff_continuously(): 125 | print('Woo, another packet:', packet) 126 | 127 | Note: you can also call capture.apply_on_packets(packet_callback) which should have a slight performance boost. 128 | 129 | :param packet_count: an amount of packets to capture, then stop. 130 | """ 131 | # Retained for backwards compatibility and to add documentation. 132 | return self._packets_from_tshark_sync(packet_count=packet_count) 133 | -------------------------------------------------------------------------------- /src/pyshark/capture/live_ring_capture.py: -------------------------------------------------------------------------------- 1 | from pyshark import LiveCapture 2 | 3 | 4 | class LiveRingCapture(LiveCapture): 5 | """Represents a live ringbuffer capture on a network interface.""" 6 | 7 | def __init__(self, ring_file_size=1024, num_ring_files=1, ring_file_name='/tmp/pyshark.pcap', interface=None, 8 | bpf_filter=None, display_filter=None, only_summaries=False, decryption_key=None, 9 | encryption_type='wpa-pwk', decode_as=None, disable_protocol=None, 10 | tshark_path=None, override_prefs=None, capture_filter=None, 11 | use_json=False, use_ek=False, include_raw=False, eventloop=None, 12 | custom_parameters=None, debug=False): 13 | """ 14 | Creates a new live capturer on a given interface. Does not start the actual capture itself. 15 | :param ring_file_size: Size of the ring file in kB, default is 1024 16 | :param num_ring_files: Number of ring files to keep, default is 1 17 | :param ring_file_name: Name of the ring file, default is /tmp/pyshark.pcap 18 | :param interface: Name of the interface to sniff on or a list of names (str). If not given, runs on all interfaces. 19 | :param bpf_filter: BPF filter to use on packets. 20 | :param display_filter: Display (wireshark) filter to use. 21 | :param only_summaries: Only produce packet summaries, much faster but includes very little information 22 | :param decryption_key: Optional key used to encrypt and decrypt captured traffic. 23 | :param encryption_type: Standard of encryption used in captured traffic (must be either 'WEP', 'WPA-PWD', or 24 | 'WPA-PWK'. Defaults to WPA-PWK). 25 | :param decode_as: A dictionary of {decode_criterion_string: decode_as_protocol} that are used to tell tshark 26 | to decode protocols in situations it wouldn't usually, for instance {'tcp.port==8888': 'http'} would make 27 | it attempt to decode any port 8888 traffic as HTTP. See tshark documentation for details. 28 | :param tshark_path: Path of the tshark binary 29 | :param override_prefs: A dictionary of tshark preferences to override, {PREFERENCE_NAME: PREFERENCE_VALUE, ...}. 30 | :param capture_filter: Capture (wireshark) filter to use. 31 | :param disable_protocol: Tells tshark to remove a dissector for a specifc protocol. 32 | :param use_ek: Uses tshark in EK JSON mode. It is faster than XML but has slightly less data. 33 | :param use_json: DEPRECATED. Use use_ek instead. 34 | :param custom_parameters: A dict of custom parameters to pass to tshark, i.e. {"--param": "value"} 35 | or else a list of parameters in the format ["--foo", "bar", "--baz", "foo"]. or else a list of parameters in the format ["--foo", "bar", "--baz", "foo"]. 36 | """ 37 | super(LiveRingCapture, self).__init__(interface, bpf_filter=bpf_filter, display_filter=display_filter, only_summaries=only_summaries, 38 | decryption_key=decryption_key, encryption_type=encryption_type, 39 | tshark_path=tshark_path, decode_as=decode_as, disable_protocol=disable_protocol, 40 | override_prefs=override_prefs, capture_filter=capture_filter, 41 | use_json=use_json, use_ek=use_ek, include_raw=include_raw, eventloop=eventloop, 42 | custom_parameters=custom_parameters, debug=debug) 43 | 44 | self.ring_file_size = ring_file_size 45 | self.num_ring_files = num_ring_files 46 | self.ring_file_name = ring_file_name 47 | 48 | def get_parameters(self, packet_count=None): 49 | params = super(LiveRingCapture, self).get_parameters(packet_count=packet_count) 50 | params += ['-b', 'filesize:' + str(self.ring_file_size), '-b', 'files:' + str(self.num_ring_files), 51 | '-w', self.ring_file_name, '-P', '-V'] 52 | return params 53 | 54 | def _get_dumpcap_parameters(self): 55 | params = super(LiveRingCapture, self)._get_dumpcap_parameters() 56 | params += ['-P'] 57 | return params 58 | -------------------------------------------------------------------------------- /src/pyshark/capture/pipe_capture.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from pyshark.capture.capture import Capture 4 | 5 | 6 | class PipeCapture(Capture): 7 | def __init__(self, pipe, display_filter=None, only_summaries=False, 8 | decryption_key=None, encryption_type='wpa-pwk', decode_as=None, 9 | disable_protocol=None, tshark_path=None, override_prefs=None, use_json=False, 10 | use_ek=False, include_raw=False, eventloop=None, custom_parameters=None, debug=False): 11 | """Receives a file-like and reads the packets from there (pcap format). 12 | 13 | :param bpf_filter: BPF filter to use on packets. 14 | :param display_filter: Display (wireshark) filter to use. 15 | :param only_summaries: Only produce packet summaries, much faster but includes very little information 16 | :param decryption_key: Key used to encrypt and decrypt captured traffic. 17 | :param encryption_type: Standard of encryption used in captured traffic (must be either 'WEP', 'WPA-PWD', 18 | or 'WPA-PWK'. Defaults to WPA-PWK). 19 | :param decode_as: A dictionary of {decode_criterion_string: decode_as_protocol} that are used to tell tshark 20 | to decode protocols in situations it wouldn't usually, for instance {'tcp.port==8888': 'http'} would make 21 | it attempt to decode any port 8888 traffic as HTTP. See tshark documentation for details. 22 | :param tshark_path: Path of the tshark binary 23 | :param override_prefs: A dictionary of tshark preferences to override, {PREFERENCE_NAME: PREFERENCE_VALUE, ...}. 24 | :param disable_protocol: Tells tshark to remove a dissector for a specifc protocol. 25 | :param custom_parameters: A dict of custom parameters to pass to tshark, i.e. {"--param": "value"} 26 | or else a list of parameters in the format ["--foo", "bar", "--baz", "foo"]. 27 | """ 28 | super(PipeCapture, self).__init__(display_filter=display_filter, 29 | only_summaries=only_summaries, 30 | decryption_key=decryption_key, 31 | encryption_type=encryption_type, 32 | decode_as=decode_as, disable_protocol=disable_protocol, 33 | tshark_path=tshark_path, override_prefs=override_prefs, 34 | use_json=use_json, use_ek=use_ek, include_raw=include_raw, eventloop=eventloop, 35 | custom_parameters=custom_parameters, debug=debug) 36 | self._pipe = pipe 37 | 38 | def get_parameters(self, packet_count=None): 39 | """ 40 | Returns the special tshark parameters to be used according to the configuration of this class. 41 | """ 42 | params = super(PipeCapture, self).get_parameters(packet_count=packet_count) 43 | params += ['-r', '-'] 44 | return params 45 | 46 | async def _get_tshark_process(self, packet_count=None): 47 | return await super(PipeCapture, self)._get_tshark_process(packet_count=packet_count, stdin=self._pipe) 48 | 49 | def close(self): 50 | # Close pipe 51 | os.close(self._pipe) 52 | super(PipeCapture, self).close() 53 | 54 | def sniff_continuously(self, packet_count=None): 55 | """ 56 | Captures from the set interface, returning a generator which returns packets continuously. 57 | 58 | Can be used as follows: 59 | for packet in capture.sniff_continuously(); 60 | print 'Woo, another packet:', packet 61 | 62 | Note: you can also call capture.apply_on_packets(packet_callback) which should have a slight performance boost. 63 | 64 | :param packet_count: an amount of packets to capture, then stop. 65 | """ 66 | # Retained for backwards compatibility and to add documentation. 67 | return self._packets_from_tshark_sync(packet_count=packet_count) 68 | -------------------------------------------------------------------------------- /src/pyshark/capture/remote_capture.py: -------------------------------------------------------------------------------- 1 | from pyshark import LiveCapture 2 | 3 | 4 | class RemoteCapture(LiveCapture): 5 | """A capture which is performed on a remote machine which has an rpcapd service running.""" 6 | 7 | def __init__( 8 | self, 9 | remote_host, 10 | remote_interface, 11 | *args, 12 | remote_port=2002, 13 | bpf_filter=None, 14 | only_summaries=False, 15 | decryption_key=None, 16 | encryption_type="wpa-pwk", 17 | decode_as=None, 18 | disable_protocol=None, 19 | tshark_path=None, 20 | override_prefs=None, 21 | eventloop=None, 22 | debug=False, 23 | **kwargs 24 | ): 25 | """ 26 | Creates a new remote capture which will connect to a remote machine which is running rpcapd. Use the sniff() 27 | method to get packets. 28 | Note: The remote machine should have rpcapd running in null authentication mode (-n). Be warned that the traffic 29 | is unencrypted! 30 | 31 | Note: 32 | *args and **kwargs are passed to LiveCature's __init__ method. 33 | 34 | 35 | :param remote_host: The remote host to capture on (IP or hostname). Should be running rpcapd. 36 | :param remote_interface: The remote interface on the remote machine to capture on. Note that on windows it is 37 | not the device display name but the true interface name (i.e. \\Device\\NPF_..). 38 | :param remote_port: The remote port the rpcapd service is listening on 39 | :param bpf_filter: A BPF (tcpdump) filter to apply on the cap before reading. 40 | :param only_summaries: Only produce packet summaries, much faster but includes very little information 41 | :param decryption_key: Key used to encrypt and decrypt captured traffic. 42 | :param encryption_type: Standard of encryption used in captured traffic (must be either 'WEP', 'WPA-PWD', 43 | or 'WPA-PWK'. Defaults to WPA-PWK). 44 | :param decode_as: A dictionary of {decode_criterion_string: decode_as_protocol} that are used to tell tshark 45 | to decode protocols in situations it wouldn't usually, for instance {'tcp.port==8888': 'http'} would make 46 | it attempt to decode any port 8888 traffic as HTTP. See tshark documentation for details. 47 | :param tshark_path: Path of the tshark binary 48 | :param override_prefs: A dictionary of tshark preferences to override, {PREFERENCE_NAME: PREFERENCE_VALUE, ...}. 49 | :param disable_protocol: Tells tshark to remove a dissector for a specifc protocol. 50 | """ 51 | interface = f'rpcap://{remote_host}:{remote_port}/{remote_interface}' 52 | super(RemoteCapture, self).__init__( 53 | interface, 54 | *args, 55 | bpf_filter=bpf_filter, 56 | only_summaries=only_summaries, 57 | decryption_key=decryption_key, 58 | encryption_type=encryption_type, 59 | tshark_path=tshark_path, 60 | decode_as=decode_as, 61 | disable_protocol=disable_protocol, 62 | override_prefs=override_prefs, 63 | eventloop=eventloop, 64 | debug=debug, 65 | **kwargs 66 | ) 67 | -------------------------------------------------------------------------------- /src/pyshark/config.ini: -------------------------------------------------------------------------------- 1 | [tshark] 2 | # Specify the path to the tshark executable. 3 | # If the configured path does not exist, these locations will be searched: 4 | # (Linux): /usr/bin/tshark 5 | # (Linux): /usr/sbin/tshark 6 | # (Linux): /usr/lib/tshark/tshark 7 | # (Linux): /usr/local/bin/tshark 8 | # (Linux): /bin/tshark 9 | # (Windows): %ProgramFiles%\Wireshark\tshark.exe 10 | # (Windows): %ProgramFiles(x86)%\Wireshark\tshark.exe 11 | tshark_path = C:\Program Files\Wireshark\tshark.exe 12 | 13 | [dumpcap] 14 | dumpcap_path = C:\Program Files\Wireshark\dumpcap.exe 15 | -------------------------------------------------------------------------------- /src/pyshark/config.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from configparser import ConfigParser 4 | 5 | import pyshark 6 | 7 | 8 | fp_config_path = Path.cwd() / 'config.ini' # get config from the current directory 9 | pyshark_config_path = Path(pyshark.__file__).parent / 'config.ini' 10 | 11 | 12 | def get_config(): 13 | if fp_config_path.exists(): 14 | config_path = fp_config_path 15 | elif pyshark_config_path.exists(): 16 | config_path = pyshark_config_path 17 | else: 18 | return None 19 | 20 | config = ConfigParser() 21 | config.read(config_path) 22 | return config 23 | -------------------------------------------------------------------------------- /src/pyshark/ek_field_mapping.py: -------------------------------------------------------------------------------- 1 | import binascii 2 | import json 3 | 4 | from pyshark import cache 5 | from pyshark.tshark import tshark 6 | 7 | 8 | _MAPPING_CACHE_NAME = "ek_field_mapping.json" 9 | 10 | 11 | class FieldNotFound(Exception): 12 | pass 13 | 14 | 15 | class ProtocolMappingNotInitialized(Exception): 16 | pass 17 | 18 | 19 | class _EkFieldMapping: 20 | 21 | def __init__(self): 22 | self._protocol_to_mapping = {} 23 | 24 | def load_mapping(self, tshark_version, tshark_path=None): 25 | if self._protocol_to_mapping: 26 | return 27 | 28 | mapping_cache_file = cache.get_cache_dir(tshark_version).joinpath(_MAPPING_CACHE_NAME) 29 | if mapping_cache_file.exists(): 30 | self._protocol_to_mapping = json.load(mapping_cache_file.open()) 31 | else: 32 | self._protocol_to_mapping = tshark.get_ek_field_mapping(tshark_path=tshark_path) 33 | mapping_cache_file.open("w").write(json.dumps(self._protocol_to_mapping)) 34 | 35 | def cast_field_value(self, protocol, field_name, field_value): 36 | """Casts the field value to its proper type according to the mapping""" 37 | if isinstance(field_value, list): 38 | return [self.cast_field_value(protocol, field_name, item) for item in field_value] 39 | if not isinstance(field_value, str): 40 | return field_value 41 | field_type = self.get_field_type(protocol, field_name) 42 | if field_type == str: 43 | return field_value 44 | if field_type == int and field_value.startswith("0x"): 45 | return int(field_value, 16) 46 | if field_type == bytes: 47 | try: 48 | return binascii.unhexlify(field_value.replace(":", "")) 49 | except binascii.Error: 50 | return field_value 51 | 52 | try: 53 | return field_type(field_value) 54 | except ValueError: 55 | return field_value 56 | 57 | def get_field_type(self, protocol, field_name): 58 | """Gets the Python type for the given field (only for EK fields). 59 | 60 | If we are unfamiliar with the type, str will be returned. 61 | """ 62 | if not self._protocol_to_mapping: 63 | raise ProtocolMappingNotInitialized("Protocol mapping not initialized. Call load_mapping() first") 64 | if protocol not in self._protocol_to_mapping: 65 | raise FieldNotFound(f"Type mapping for protocol {protocol} not found") 66 | 67 | fields = self._protocol_to_mapping[protocol]["properties"] 68 | if field_name not in fields: 69 | return str 70 | return self._get_python_type_for_field_type(fields[field_name]["type"]) 71 | 72 | def clear(self): 73 | self._protocol_to_mapping.clear() 74 | 75 | @classmethod 76 | def _get_python_type_for_field_type(cls, field_type): 77 | if field_type in ("integer", "long", "short"): 78 | return int 79 | if field_type == "float": 80 | return float 81 | if field_type == "date": 82 | # We don't use datetime.datetime because these can be timedeltas as well. 83 | # Better let the user decide. 84 | return float 85 | if field_type == "byte": 86 | return bytes 87 | # Other known types are IP. Retain as str 88 | return str 89 | 90 | 91 | MAPPING = _EkFieldMapping() 92 | -------------------------------------------------------------------------------- /src/pyshark/packet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KimiNewt/pyshark/4517bdfed7a65ac9a6a31354f086e75ddd5dac3b/src/pyshark/packet/__init__.py -------------------------------------------------------------------------------- /src/pyshark/packet/common.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import functools 3 | 4 | import termcolor 5 | 6 | 7 | class Pickleable(object): 8 | """ 9 | Base class that implements getstate/setstate, since most of the classes are overriding getattr. 10 | """ 11 | 12 | def __getstate__(self): 13 | return self.__dict__ 14 | 15 | def __setstate__(self, data): 16 | self.__dict__.update(data) 17 | 18 | 19 | class SlotsPickleable(object): 20 | __slots__ = [] 21 | 22 | def __getstate__(self): 23 | ret = {} 24 | for slot in self.__slots__: 25 | ret[slot] = getattr(self, slot) 26 | return ret 27 | 28 | def __setstate__(self, data): 29 | for key, val in data.items(): 30 | setattr(self, key, val) 31 | 32 | 33 | @functools.wraps(termcolor.colored) 34 | def colored(text, *args, **kwargs): 35 | try: 36 | enable_color = sys.stdout.isatty() 37 | except (AttributeError, NotImplementedError, FileNotFoundError): 38 | enable_color = False 39 | if enable_color: 40 | return termcolor.colored(text, *args, **kwargs) 41 | return text 42 | -------------------------------------------------------------------------------- /src/pyshark/packet/consts.py: -------------------------------------------------------------------------------- 1 | TRANSPORT_LAYERS = ['UDP', 'TCP'] 2 | -------------------------------------------------------------------------------- /src/pyshark/packet/fields.py: -------------------------------------------------------------------------------- 1 | import binascii 2 | import typing 3 | 4 | from pyshark.packet.common import Pickleable, SlotsPickleable 5 | 6 | 7 | class LayerField(SlotsPickleable): 8 | """Holds all data about a field of a layer, both its actual value and its name and nice representation.""" 9 | 10 | # Note: We use this object with slots and not just a dict because 11 | # it's much more memory-efficient (cuts about a third of the memory). 12 | __slots__ = ['name', 'showname', 'raw_value', 'show', 'hide', 'pos', 'size', 'unmaskedvalue'] 13 | 14 | def __init__(self, name=None, showname=None, value=None, show=None, hide=None, pos=None, size=None, unmaskedvalue=None): 15 | self.name = name 16 | self.showname = showname 17 | self.raw_value = value 18 | self.show = show 19 | self.pos = pos 20 | self.size = size 21 | self.unmaskedvalue = unmaskedvalue 22 | 23 | if hide and hide == 'yes': 24 | self.hide = True 25 | else: 26 | self.hide = False 27 | 28 | def __repr__(self): 29 | return f'' 30 | 31 | def get_default_value(self) -> str: 32 | """Gets the best 'value' string this field has.""" 33 | val = self.show 34 | if not val: 35 | val = self.raw_value 36 | if not val: 37 | val = self.showname 38 | return val 39 | 40 | @property 41 | def showname_value(self) -> typing.Union[str, None]: 42 | """The "pretty value" (as displayed by Wireshark) of the field.""" 43 | if self.showname and ': ' in self.showname: 44 | return self.showname.split(': ', 1)[1] 45 | return None 46 | 47 | @property 48 | def showname_key(self) -> typing.Union[str, None]: 49 | """The "pretty name" (as displayed by Wireshark) of the field.""" 50 | if self.showname and ': ' in self.showname: 51 | return self.showname.split(': ', 1)[0] 52 | return None 53 | 54 | @property 55 | def binary_value(self) -> bytes: 56 | """Converts this field to binary (assuming it's a binary string)""" 57 | str_raw_value = str(self.raw_value) 58 | if len(str_raw_value) % 2 == 1: 59 | str_raw_value = '0' + str_raw_value 60 | 61 | return binascii.unhexlify(str_raw_value) 62 | 63 | @property 64 | def int_value(self) -> int: 65 | """Returns the int value of this field (assuming it's represented as a decimal integer).""" 66 | return int(self.raw_value) 67 | 68 | @property 69 | def hex_value(self) -> int: 70 | """Returns the int value of this field if it's in base 16 71 | 72 | (either as a normal number or in a "0xFFFF"-style hex value) 73 | """ 74 | return int(self.raw_value, 16) 75 | 76 | base16_value = hex_value 77 | 78 | 79 | class LayerFieldsContainer(str, Pickleable): 80 | """An object which contains one or more fields (of the same name). 81 | 82 | When accessing member, such as showname, raw_value, etc. the appropriate member of the main (first) field saved 83 | in this container will be shown. 84 | """ 85 | 86 | def __new__(cls, main_field, *args, **kwargs): 87 | if hasattr(main_field, 'get_default_value'): 88 | obj = str.__new__(cls, main_field.get_default_value(), *args, **kwargs) 89 | else: 90 | obj = str.__new__(cls, main_field, *args, **kwargs) 91 | obj.fields = [main_field] 92 | return obj 93 | 94 | def __dir__(self): 95 | return dir(type(self)) + list(self.__dict__.keys()) + dir(self.main_field) 96 | 97 | def add_field(self, field) -> None: 98 | self.fields.append(field) 99 | 100 | @property 101 | def all_fields(self) -> list: 102 | """Returns all fields in a list, the main field followed by the alternate fields.""" 103 | return self.fields 104 | 105 | @property 106 | def main_field(self) -> LayerField: 107 | return self.fields[0] 108 | 109 | @property 110 | def alternate_fields(self) -> list: 111 | """Return the alternate values of this field containers (non-main ones).""" 112 | return self.fields[1:] 113 | 114 | def __getattr__(self, item): 115 | return getattr(self.main_field, item) 116 | -------------------------------------------------------------------------------- /src/pyshark/packet/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KimiNewt/pyshark/4517bdfed7a65ac9a6a31354f086e75ddd5dac3b/src/pyshark/packet/layers/__init__.py -------------------------------------------------------------------------------- /src/pyshark/packet/layers/base.py: -------------------------------------------------------------------------------- 1 | import os 2 | import typing 3 | import io 4 | import sys 5 | 6 | from pyshark.packet import common 7 | 8 | DATA_LAYER_NAME = "DATA" 9 | 10 | 11 | class BaseLayer(common.SlotsPickleable): 12 | """An object representing a Packet layer.""" 13 | __slots__ = ["_layer_name"] 14 | 15 | def __init__(self, layer_name): 16 | self._layer_name = layer_name 17 | 18 | def get_field(self, name): 19 | raise NotImplementedError() 20 | 21 | @property 22 | def field_names(self) -> typing.List[str]: 23 | """Gets all XML field names of this layer.""" 24 | raise NotImplementedError() 25 | 26 | def has_field(self, name): 27 | return name in self.field_names 28 | 29 | @property 30 | def layer_name(self): 31 | return self._layer_name 32 | 33 | def get(self, item, default=None): 34 | """Gets a field in the layer, or the default if not found. 35 | 36 | Works the same way as getattr, but returns the given default if not the field was not found""" 37 | try: 38 | return getattr(self, item) 39 | except AttributeError: 40 | return default 41 | 42 | def __dir__(self): 43 | return dir(type(self)) + self.field_names 44 | 45 | def __getattr__(self, item): 46 | val = self.get_field(item) 47 | if val is None: 48 | raise AttributeError(f"{item} does not exist in Layer") 49 | return val 50 | 51 | def pretty_print(self, writer=None): 52 | if not writer: 53 | writer = sys.stdout 54 | if self.layer_name == DATA_LAYER_NAME: 55 | writer.write('DATA') 56 | return 57 | 58 | text = f'Layer {self.layer_name.upper()}{os.linesep}:' 59 | writer.write(common.colored(text, color="yellow", attrs=["bold"])) 60 | self._pretty_print_layer_fields(writer) 61 | 62 | def _pretty_print_layer_fields(self, terminal_writer: io.IOBase): 63 | raise NotImplementedError() 64 | 65 | def __repr__(self): 66 | return f'<{self.layer_name.upper()} Layer>' 67 | 68 | def __str__(self): 69 | writer = io.StringIO() 70 | self.pretty_print(writer=writer) 71 | return writer.getvalue() 72 | -------------------------------------------------------------------------------- /src/pyshark/packet/layers/ek_layer.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import os 3 | import io 4 | 5 | import typing 6 | 7 | from pyshark.packet.common import colored 8 | from pyshark import ek_field_mapping 9 | from pyshark.packet.layers.base import BaseLayer 10 | 11 | 12 | class _EkLayerHelperFuncsMixin(abc.ABC): 13 | """For methods shared between the EK layer and sublayers""" 14 | 15 | def get_field_as_list(self, name) -> list: 16 | """Helper function to get a certain field always as a list. 17 | 18 | Some fields may appear once or more in the packet. The field will appear as a list if it appears more 19 | than once. In order to avoid checking certain fields if they're lists or not, this function will 20 | return the field inside a list at all times. 21 | 22 | For example, in a DNS packet there may be one or more responses. 23 | A packet with with one response (www.google.com) will return: 24 | >>> print(pkt.dns.resp_name) 25 | "www.google.com" 26 | While a packet with two responses will return: 27 | >>> print(pkt.dns.resp_name) 28 | ["www.google.com", "www.google2.com"] 29 | 30 | To avoid this changing behaviour, use: 31 | >>> print(pkt.dns.get_field_as_list("resp_name")) 32 | ["www.google.com"] 33 | """ 34 | field_value = self.get_field(name) 35 | if isinstance(field_value, list): 36 | return field_value 37 | return [field_value] 38 | 39 | 40 | class EkLayer(BaseLayer, _EkLayerHelperFuncsMixin): 41 | __slots__ = ["_layer_name", "_fields_dict"] 42 | 43 | def __init__(self, layer_name, layer_dict): 44 | super().__init__(layer_name) 45 | self._fields_dict = layer_dict 46 | 47 | def get_field(self, name) -> typing.Union["EkMultiField", None, str, int, bool, bytes, list]: 48 | name = name.replace(".", "_") 49 | if name in self._fields_dict: 50 | # For cases like "text" 51 | return self._get_field_value(name) 52 | 53 | for prefix in self._get_possible_layer_prefixes(): 54 | nested_field = self._get_nested_field(prefix, name) 55 | if nested_field is not None: 56 | return nested_field 57 | 58 | return None 59 | 60 | def has_field(self, name) -> bool: 61 | """Checks if the field exists, either a nested field or a regular field""" 62 | return name in self.field_names or name in self.all_field_names 63 | 64 | @property 65 | def field_names(self): 66 | return list({field_name.split("_", 1)[0] for field_name in self.all_field_names}) 67 | 68 | @property 69 | def all_field_names(self): 70 | """Gets all field names, including subfields""" 71 | names = set() 72 | for field_name in self._fields_dict: 73 | for prefix in self._get_possible_layer_prefixes(): 74 | if field_name.startswith(prefix): 75 | names.add(_remove_ek_prefix(prefix, field_name)) 76 | break 77 | return list(names) 78 | 79 | def _get_field_value(self, full_field_name): 80 | """Gets the field value, optionally casting it using the cached field mapping""" 81 | field_value = self._fields_dict[full_field_name] 82 | return ek_field_mapping.MAPPING.cast_field_value(self._layer_name, full_field_name, field_value) 83 | 84 | def _get_nested_field(self, prefix, name): 85 | """Gets a field that is directly on the layer 86 | 87 | Returns either a multifield or a raw value. 88 | """ 89 | # TODO: Optimize 90 | field_ek_name = f"{prefix}_{name}" 91 | if field_ek_name in self._fields_dict: 92 | if self._field_has_subfields(field_ek_name): 93 | return EkMultiField(self, self._fields_dict, name, 94 | value=self._get_field_value(field_ek_name)) 95 | return self._get_field_value(field_ek_name) 96 | 97 | for possible_nested_name in self._fields_dict: 98 | if possible_nested_name.startswith(f"{field_ek_name}_"): 99 | return EkMultiField(self, self._fields_dict, name, value=None) 100 | 101 | return None 102 | 103 | def _field_has_subfields(self, field_ek_name): 104 | field_ek_name_with_ext = f"{field_ek_name}_" 105 | for field_name in self._fields_dict: 106 | if field_name.startswith(field_ek_name_with_ext): 107 | return True 108 | return False 109 | 110 | def _pretty_print_layer_fields(self, file: io.IOBase): 111 | for field_name in self.field_names: 112 | field = self.get_field(field_name) 113 | self._pretty_print_field(field_name, field, file, indent=1) 114 | 115 | def _pretty_print_field(self, field_name, field, file, indent=0): 116 | prefix = "\t" * indent 117 | if isinstance(field, EkMultiField): 118 | file.write(colored(f"{prefix}{field_name}: ", "green", attrs=["bold"])) 119 | if field.value is not None: 120 | file.write(str(field.value)) 121 | file.write(os.linesep) 122 | for subfield in field.subfields: 123 | self._pretty_print_field(subfield, field.get_field(subfield), file, 124 | indent=indent + 1) 125 | else: 126 | file.write(colored(f"{prefix}{field_name}: ", "green", attrs=["bold"])) 127 | file.write(f"{field}{os.linesep}") 128 | 129 | def _get_possible_layer_prefixes(self): 130 | """Gets the possible prefixes for a field under this layer. 131 | 132 | The order matters, longest must be first 133 | """ 134 | return [f"{self._layer_name}_{self._layer_name}", self._layer_name] 135 | 136 | 137 | class EkMultiField(_EkLayerHelperFuncsMixin): 138 | __slots__ = ["_containing_layer", "_full_name", "_all_fields", "value"] 139 | 140 | def __init__(self, containing_layer: EkLayer, all_fields, full_name, value=None): 141 | self._containing_layer = containing_layer 142 | self._full_name = full_name 143 | self._all_fields = all_fields 144 | self.value = value 145 | 146 | def get_field(self, field_name): 147 | return self._containing_layer.get_field(f"{self._full_name}_{field_name}") 148 | 149 | @property 150 | def subfields(self): 151 | names = set() 152 | for field_name in self._containing_layer.all_field_names: 153 | if field_name != self._full_name and field_name.startswith(f"{self._full_name}_"): 154 | names.add(field_name[len(self._full_name):].split("_")[1]) 155 | return list(names) 156 | 157 | @property 158 | def field_name(self): 159 | return self._full_name.split("_")[-1] 160 | 161 | def __getattr__(self, item): 162 | value = self.get_field(item) 163 | if value is None: 164 | raise AttributeError(f"Subfield {item} not found") 165 | return value 166 | 167 | def __repr__(self): 168 | value = f": {self.value}" if self.value else "" 169 | return f"" 170 | 171 | def __dir__(self) -> typing.Iterable[str]: 172 | return dir(type(self)) + self.subfields 173 | 174 | 175 | def _remove_ek_prefix(prefix, value): 176 | """Removes prefix given and the underscore after it""" 177 | return value[len(prefix) + 1:] 178 | 179 | 180 | def _get_subfields(all_fields, field_ek_name): 181 | subfield_names = [] 182 | for field in all_fields: 183 | if field != field_ek_name and field.startswith(field_ek_name): 184 | subfield_names.append(_remove_ek_prefix(field_ek_name, field)) 185 | return subfield_names 186 | -------------------------------------------------------------------------------- /src/pyshark/packet/layers/json_layer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import io 3 | 4 | from pyshark.packet.common import colored 5 | from pyshark.packet.fields import LayerField 6 | from pyshark.packet.fields import LayerFieldsContainer 7 | from pyshark.packet.layers.base import BaseLayer 8 | 9 | 10 | class JsonLayer(BaseLayer): 11 | __slots__ = [ 12 | "duplicate_layers", 13 | "_showname_fields_converted_to_regular", 14 | "_full_name", 15 | "_is_intermediate", 16 | "_wrapped_fields", 17 | "value", 18 | "_all_fields" 19 | ] + BaseLayer.__slots__ 20 | 21 | def __init__(self, layer_name, layer_dict, full_name=None, is_intermediate=False): 22 | """Creates a JsonLayer. All sublayers and fields are created lazily later.""" 23 | super().__init__(layer_name) 24 | self.duplicate_layers = [] 25 | self._showname_fields_converted_to_regular = False 26 | if not full_name: 27 | self._full_name = self._layer_name 28 | else: 29 | self._full_name = full_name 30 | self._is_intermediate = is_intermediate 31 | self._wrapped_fields = {} 32 | if isinstance(layer_dict, list): 33 | self.duplicate_layers = [JsonLayer(layer_name, duplicate_dict, 34 | full_name=full_name, is_intermediate=is_intermediate) 35 | for duplicate_dict in layer_dict[1:]] 36 | layer_dict = layer_dict[0] 37 | if not isinstance(layer_dict, dict): 38 | self.value = layer_dict 39 | self._all_fields = {} 40 | return 41 | 42 | self._all_fields = layer_dict 43 | 44 | def get_field(self, name): 45 | """Gets a field by its full or partial name.""" 46 | # We only make the wrappers here (lazily) to avoid creating a ton of objects needlessly. 47 | self._convert_showname_field_names_to_field_names() 48 | field = self._wrapped_fields.get(name) 49 | if field is None: 50 | is_fake = False 51 | field = self._get_internal_field_by_name(name) 52 | if field is None: 53 | # Might be a "fake" field in JSON 54 | is_fake = self._is_fake_field(name) 55 | if not is_fake: 56 | raise AttributeError(f"No such field {name}") 57 | field = self._make_wrapped_field(name, field, is_fake=is_fake) 58 | self._wrapped_fields[name] = field 59 | return field 60 | 61 | @property 62 | def field_names(self): 63 | self._convert_showname_field_names_to_field_names() 64 | return list(set([self._sanitize_field_name(name) for name in self._all_fields 65 | if name.startswith(self._full_name)] + 66 | [name.rsplit('.', 1)[1] for name in self._all_fields if '.' in name])) 67 | 68 | def has_field(self, dotted_name) -> bool: 69 | """Checks whether the layer has the given field name. 70 | 71 | Can get a dotted name, i.e. layer.sublayer.subsublayer.field 72 | """ 73 | parts = dotted_name.split('.') 74 | cur_layer = self 75 | for part in parts: 76 | if part in cur_layer.field_names: 77 | cur_layer = cur_layer.get_field(part) 78 | else: 79 | return False 80 | return True 81 | 82 | def _pretty_print_layer_fields(self, file: io.IOBase): 83 | for field_line in self._get_all_field_lines(): 84 | if ':' in field_line: 85 | field_name, field_line = field_line.split(':', 1) 86 | file.write(colored(field_name + ':', "green", attrs=["bold"])) 87 | file.write(colored(field_line, attrs=["bold"])) 88 | 89 | def _get_all_field_lines(self): 90 | """Returns all lines that represent the fields of the layer (both their names and values).""" 91 | for field in self._get_all_fields_with_alternates(): 92 | yield from self._get_field_or_layer_repr(field) 93 | 94 | def _get_field_or_layer_repr(self, field): 95 | if isinstance(field, JsonLayer): 96 | yield "\t" + field.layer_name + ":" + os.linesep 97 | for line in field._get_all_field_lines(): 98 | yield "\t" + line 99 | elif isinstance(field, list): 100 | for subfield_or_layer in field: 101 | yield from self._get_field_or_layer_repr(subfield_or_layer) 102 | else: 103 | yield f"\t{self._sanitize_field_name(field.name)}: {field.raw_value}{os.linesep}" 104 | 105 | def _sanitize_field_name(self, field_name): 106 | return field_name.replace(self._full_name + '.', '') 107 | 108 | def _field_name_from_showname(self, field_name): 109 | """Converts a 'showname'-like field key to a regular field name 110 | 111 | Sometimes in the JSON, there are "text" type fields which might look like this: 112 | "my_layer": 113 | { 114 | "my_layer.some_field": 1, 115 | "Something Special: it's special": { 116 | "my_layer.special_field": "it's special" 117 | } 118 | } 119 | 120 | We convert the showname key into the field name. The internals will turn into a fake layer. 121 | In this case the field will be accessible by pkt.my_layer.something_special.special_field 122 | """ 123 | showname_key = field_name.split(":", 1)[0] 124 | return self._full_name + "." + showname_key.lower().replace(" ", "_") 125 | 126 | def _get_all_fields_with_alternates(self): 127 | return [self.get_field(name) for name in self.field_names] 128 | 129 | def _convert_showname_field_names_to_field_names(self): 130 | """Converts all fields that don't have a proper name (they have a showname name) to a regular name 131 | 132 | See self._field_name_from_showname docs for more. 133 | """ 134 | if self._showname_fields_converted_to_regular: 135 | return 136 | for field_name in list(self._all_fields): 137 | if ":" in field_name: 138 | field_value = self._all_fields.pop(field_name) 139 | if isinstance(field_value, dict): 140 | # Save the showname 141 | field_value["showname"] = field_name 142 | # Convert the old name to the new name. 143 | self._all_fields[ 144 | self._field_name_from_showname(field_name)] = field_value 145 | 146 | self._showname_fields_converted_to_regular = True 147 | 148 | def _get_internal_field_by_name(self, name): 149 | """Gets the field by name, or None if not found.""" 150 | field = self._all_fields.get(name, self._all_fields.get(f"{self._full_name}.{name}")) 151 | if field is not None: 152 | return field 153 | for field_name in self._all_fields: 154 | # Specific name 155 | if field_name.endswith(f'.{name}'): 156 | return self._all_fields[field_name] 157 | 158 | def _is_fake_field(self, name): 159 | # Some fields include parts that are not reflected in the JSON dictionary 160 | # i.e. a possible json is: 161 | # { 162 | # foo: { 163 | # foo.bar.baz: { 164 | # foo.baz: 3 165 | # } 166 | # } 167 | # So in this case we must create a fake layer for "bar". 168 | field_full_name = f"{self._full_name}.{name}." 169 | for name, field in self._all_fields.items(): 170 | if name.startswith(field_full_name): 171 | return True 172 | return False 173 | 174 | def _make_wrapped_field(self, name, field, is_fake=False, full_name=None): 175 | """Creates the field lazily. 176 | 177 | If it's a simple field, wraps it in a container that adds extra features. 178 | If it's a nested layer, creates a layer for it. 179 | If it's an intermediate layer, copies over the relevant fields and creates a new layer for 180 | it. 181 | """ 182 | if not full_name: 183 | full_name = f"{self._full_name}.{name}" 184 | 185 | if is_fake: 186 | # Populate with all fields that are supposed to be inside of it 187 | field = {key: value for key, value in self._all_fields.items() 188 | if key.startswith(full_name)} 189 | if isinstance(field, dict): 190 | if name.endswith('_tree'): 191 | name = name.replace('_tree', '') 192 | full_name = f'{self._full_name}.{name}' 193 | return JsonLayer(name, field, full_name=full_name, is_intermediate=is_fake) 194 | elif isinstance(field, list): 195 | # For whatever reason in list-type object it goes back to using the original parent name 196 | return [self._make_wrapped_field(name, field_part, 197 | full_name=self._full_name.split('.')[0]) 198 | for field_part in field] 199 | 200 | return LayerFieldsContainer(LayerField(name=name, value=field)) 201 | -------------------------------------------------------------------------------- /src/pyshark/packet/layers/xml_layer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import typing 3 | import io 4 | 5 | from pyshark.packet.common import colored 6 | from pyshark.packet.fields import LayerField, LayerFieldsContainer 7 | from pyshark.packet.layers import base 8 | 9 | 10 | class XmlLayer(base.BaseLayer): 11 | __slots__ = [ 12 | "raw_mode", 13 | "_all_fields" 14 | ] + base.BaseLayer.__slots__ 15 | 16 | def __init__(self, xml_obj=None, raw_mode=False): 17 | super().__init__(xml_obj.attrib['name']) 18 | self.raw_mode = raw_mode 19 | 20 | self._all_fields = {} 21 | 22 | # We copy over all the fields from the XML object 23 | # Note: we don't read lazily from the XML because the lxml objects are very memory-inefficient 24 | # so we'd rather not save them. 25 | for field in xml_obj.findall('.//field'): 26 | attributes = dict(field.attrib) 27 | field_obj = LayerField(**attributes) 28 | if attributes['name'] in self._all_fields: 29 | # Field name already exists, add this field to the container. 30 | self._all_fields[attributes['name']].add_field(field_obj) 31 | else: 32 | self._all_fields[attributes['name']] = LayerFieldsContainer(field_obj) 33 | 34 | def get_field(self, name) -> typing.Union[LayerFieldsContainer, None]: 35 | """Gets the XML field object of the given name.""" 36 | # Quicker in case the exact name was used. 37 | field = self._all_fields.get(name) 38 | if field is not None: 39 | return field 40 | 41 | for field_name, field in self._all_fields.items(): 42 | if self._sanitize_field_name(name) == self._sanitize_field_name(field_name): 43 | return field 44 | return None 45 | 46 | def get_field_value(self, name, raw=False) -> typing.Union[LayerFieldsContainer, None]: 47 | """Tries getting the value of the given field. 48 | 49 | Tries it in the following order: show (standard nice display), value (raw value), 50 | showname (extended nice display). 51 | 52 | :param name: The name of the field 53 | :param raw: Only return raw value 54 | :return: str of value 55 | """ 56 | field = self.get_field(name) 57 | if field is None: 58 | return None 59 | 60 | if raw: 61 | return field.raw_value 62 | 63 | return field 64 | 65 | @property 66 | def field_names(self) -> typing.List[str]: 67 | """Gets all XML field names of this layer.""" 68 | return [self._sanitize_field_name(field_name) for field_name in self._all_fields] 69 | 70 | @property 71 | def layer_name(self): 72 | if self._layer_name == 'fake-field-wrapper': 73 | return base.DATA_LAYER_NAME 74 | return super().layer_name 75 | 76 | def __getattr__(self, item): 77 | val = self.get_field(item) 78 | if val is None: 79 | raise AttributeError() 80 | if self.raw_mode: 81 | return val.raw_value 82 | return val 83 | 84 | @property 85 | def _field_prefix(self) -> str: 86 | """Prefix to field names in the XML.""" 87 | if self.layer_name == 'geninfo': 88 | return '' 89 | return self.layer_name + '.' 90 | 91 | def _sanitize_field_name(self, field_name): 92 | """Sanitizes an XML field name 93 | 94 | An xml field might have characters which would make it inaccessible as a python attribute). 95 | """ 96 | field_name = field_name.replace(self._field_prefix, '') 97 | return field_name.replace('.', '_').replace('-', '_').lower() 98 | 99 | def _pretty_print_layer_fields(self, file: io.IOBase): 100 | for field_line in self._get_all_field_lines(): 101 | if ':' in field_line: 102 | field_name, field_line = field_line.split(':', 1) 103 | file.write(colored(field_name + ':', "green", attrs=["bold"])) 104 | file.write(colored(field_line, attrs=["bold"])) 105 | 106 | def _get_all_fields_with_alternates(self): 107 | all_fields = list(self._all_fields.values()) 108 | all_fields += sum([field.alternate_fields for field in all_fields 109 | if isinstance(field, LayerFieldsContainer)], []) 110 | return all_fields 111 | 112 | def _get_all_field_lines(self): 113 | """Returns all lines that represent the fields of the layer (both their names and values).""" 114 | for field in self._get_all_fields_with_alternates(): 115 | yield from self._get_field_or_layer_repr(field) 116 | 117 | def _get_field_or_layer_repr(self, field): 118 | field_repr = self._get_field_repr(field) 119 | if field_repr: 120 | yield f"\t{field_repr}{os.linesep}" 121 | 122 | def _get_field_repr(self, field): 123 | if field.hide: 124 | return 125 | if field.showname: 126 | return field.showname 127 | elif field.show: 128 | return field.show 129 | elif field.raw_value: 130 | return f"{self._sanitize_field_name(field.name)}: {field.raw_value}" 131 | 132 | def get_field_by_showname(self, showname) -> typing.Union[LayerFieldsContainer, None]: 133 | """Gets a field by its "showname" 134 | This is the name that appears in Wireshark's detailed display i.e. in 'User-Agent: Mozilla...', 135 | 'User-Agent' is the .showname 136 | Returns None if not found. 137 | """ 138 | for field in self._get_all_fields_with_alternates(): 139 | if field.showname_key == showname: 140 | # Return it if "XXX: whatever == XXX" 141 | return field 142 | return None 143 | -------------------------------------------------------------------------------- /src/pyshark/packet/packet.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import binascii 4 | import typing 5 | 6 | from pyshark.packet import consts 7 | from pyshark.packet.common import Pickleable 8 | from pyshark.packet.layers.base import BaseLayer 9 | 10 | 11 | class Packet(Pickleable): 12 | """A packet object which contains layers. 13 | 14 | Layers can be accessed via index or name. 15 | """ 16 | 17 | def __init__(self, layers=None, frame_info=None, number=None, 18 | length=None, captured_length=None, sniff_time=None, interface_captured=None): 19 | """ 20 | Creates a Packet object with the given layers and info. 21 | 22 | :param layers: A list of BaseLayer objects. 23 | :param frame_info: Layer object for the entire packet frame (information like frame length, packet number, etc. 24 | :param length: Length of the actual packet. 25 | :param captured_length: The length of the packet that was actually captured (could be less then length) 26 | :param sniff_time: The time the packet was captured (timestamp) 27 | :param interface_captured: The interface the packet was captured in. 28 | """ 29 | if layers is None: 30 | self.layers = [] 31 | else: 32 | self.layers = layers 33 | self.frame_info = frame_info 34 | self.number = number 35 | self.interface_captured = interface_captured 36 | self.captured_length = captured_length 37 | self.length = length 38 | self.sniff_timestamp = sniff_time 39 | 40 | def __getitem__(self, item): 41 | """ 42 | Gets a layer according to its index or its name 43 | 44 | :param item: layer index or name 45 | :return: BaseLayer object. 46 | """ 47 | if isinstance(item, int): 48 | return self.layers[item] 49 | for layer in self.layers: 50 | if layer.layer_name.lower() == item.lower(): 51 | return layer 52 | raise KeyError('Layer does not exist in packet') 53 | 54 | def __contains__(self, item): 55 | """Checks if the layer is inside the packet. 56 | 57 | :param item: name of the layer 58 | """ 59 | try: 60 | self[item] 61 | return True 62 | except KeyError: 63 | return False 64 | 65 | def __dir__(self): 66 | return dir(type(self)) + list(self.__dict__.keys()) + [l.layer_name for l in self.layers] 67 | 68 | def get_raw_packet(self) -> bytes: 69 | assert "FRAME_RAW" in self, "Packet contains no raw data. In order to contains it, " \ 70 | "make sure that use_json and include_raw are set to True " \ 71 | "in the Capture object" 72 | raw_packet = b'' 73 | byte_values = [''.join(x) for x in zip(self.frame_raw.value[0::2], self.frame_raw.value[1::2])] 74 | for value in byte_values: 75 | raw_packet += binascii.unhexlify(value) 76 | return raw_packet 77 | 78 | def __len__(self): 79 | return int(self.length) 80 | 81 | def __bool__(self): 82 | return True 83 | 84 | @property 85 | def sniff_time(self) -> datetime.datetime: 86 | try: 87 | timestamp = float(self.sniff_timestamp) 88 | except ValueError: 89 | # If the value after the decimal point is negative, discard it 90 | # Google: wireshark fractional second 91 | timestamp = float(self.sniff_timestamp.split(".")[0]) 92 | return datetime.datetime.fromtimestamp(timestamp) 93 | 94 | def __repr__(self): 95 | transport_protocol = '' 96 | if self.transport_layer != self.highest_layer and self.transport_layer is not None: 97 | transport_protocol = self.transport_layer + '/' 98 | 99 | return f'<{transport_protocol}{self.highest_layer} Packet>' 100 | 101 | def __str__(self): 102 | s = self._packet_string 103 | for layer in self.layers: 104 | s += str(layer) 105 | return s 106 | 107 | @property 108 | def _packet_string(self): 109 | """A simple pretty string that represents the packet.""" 110 | return f'Packet (Length: {self.length}){os.linesep}' 111 | 112 | def pretty_print(self): 113 | for layer in self.layers: 114 | layer.pretty_print() 115 | # Alias 116 | show = pretty_print 117 | 118 | def __getattr__(self, item): 119 | """ 120 | Allows layers to be retrieved via get attr. For instance: pkt.ip 121 | """ 122 | for layer in self.layers: 123 | if layer.layer_name.lower() == item.lower(): 124 | return layer 125 | raise AttributeError(f"No attribute named {item}") 126 | 127 | @property 128 | def highest_layer(self) -> BaseLayer: 129 | return self.layers[-1].layer_name.upper() 130 | 131 | @property 132 | def transport_layer(self) -> BaseLayer: 133 | for layer in consts.TRANSPORT_LAYERS: 134 | if layer in self: 135 | return layer 136 | 137 | def get_multiple_layers(self, layer_name) -> typing.List[BaseLayer]: 138 | """Returns a list of all the layers in the packet that are of the layer type (an incase-sensitive string). 139 | 140 | This is in order to retrieve layers which appear multiple times in the same packet (i.e. double VLAN) 141 | which cannot be retrieved by easier means. 142 | """ 143 | return [layer for layer in self.layers if layer.layer_name.lower() == layer_name.lower()] 144 | -------------------------------------------------------------------------------- /src/pyshark/packet/packet_summary.py: -------------------------------------------------------------------------------- 1 | class PacketSummary(object): 2 | """A simple object containing a psml summary. 3 | 4 | Can contain various summary information about a packet. 5 | """ 6 | 7 | def __init__(self, structure, values): 8 | self._fields = {} 9 | self._field_order = [] 10 | 11 | for key, val in zip(structure, values): 12 | key, val = str(key), str(val) 13 | self._fields[key] = val 14 | self._field_order.append(key) 15 | setattr(self, key.lower().replace('.', '').replace(',', ''), val) 16 | 17 | def __repr__(self): 18 | protocol, src, dst = self._fields.get('Protocol', '?'), self._fields.get('Source', '?'),\ 19 | self._fields.get('Destination', '?') 20 | return f'<{self.__class__.__name__} {protocol}: {src} to {dst}>' 21 | 22 | def __str__(self): 23 | return self.summary_line 24 | 25 | @property 26 | def summary_line(self) -> str: 27 | return ' '.join([self._fields[key] for key in self._field_order]) 28 | -------------------------------------------------------------------------------- /src/pyshark/tshark/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KimiNewt/pyshark/4517bdfed7a65ac9a6a31354f086e75ddd5dac3b/src/pyshark/tshark/__init__.py -------------------------------------------------------------------------------- /src/pyshark/tshark/output_parser/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KimiNewt/pyshark/4517bdfed7a65ac9a6a31354f086e75ddd5dac3b/src/pyshark/tshark/output_parser/__init__.py -------------------------------------------------------------------------------- /src/pyshark/tshark/output_parser/base_parser.py: -------------------------------------------------------------------------------- 1 | class BaseTsharkOutputParser: 2 | DEFAULT_BATCH_SIZE = 2 ** 16 3 | 4 | async def get_packets_from_stream(self, stream, existing_data, got_first_packet=True): 5 | """A coroutine which returns a single packet if it can be read from the given StreamReader. 6 | 7 | :return a tuple of (packet, remaining_data). The packet will be None if there was not enough XML data to create 8 | a packet. remaining_data is the leftover data which was not enough to create a packet from. 9 | :raises EOFError if EOF was reached. 10 | """ 11 | # yield each packet in existing_data 12 | packet, existing_data = self._extract_packet_from_data(existing_data, 13 | got_first_packet=got_first_packet) 14 | if packet: 15 | packet = self._parse_single_packet(packet) 16 | return packet, existing_data 17 | 18 | new_data = await stream.read(self.DEFAULT_BATCH_SIZE) 19 | existing_data += new_data 20 | 21 | if not new_data: 22 | raise EOFError() 23 | return None, existing_data 24 | 25 | def _parse_single_packet(self, packet): 26 | raise NotImplementedError() 27 | 28 | def _extract_packet_from_data(self, data, got_first_packet=True): 29 | """Returns a packet's data and any remaining data after reading that first packet""" 30 | raise NotImplementedError() 31 | -------------------------------------------------------------------------------- /src/pyshark/tshark/output_parser/tshark_ek.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from pyshark.tshark.output_parser.base_parser import BaseTsharkOutputParser 5 | 6 | try: 7 | import ujson 8 | USE_UJSON = True 9 | except ImportError: 10 | USE_UJSON = False 11 | 12 | from pyshark.packet.layers.ek_layer import EkLayer 13 | from pyshark.packet.packet import Packet 14 | 15 | _ENCODED_OS_LINESEP = os.linesep.encode() 16 | 17 | 18 | class TsharkEkJsonParser(BaseTsharkOutputParser): 19 | 20 | def _parse_single_packet(self, packet): 21 | return packet_from_ek_packet(packet) 22 | 23 | def _extract_packet_from_data(self, data, got_first_packet=True): 24 | """Returns a packet's data and any remaining data after reading that first packet""" 25 | start_index = 0 26 | data = data.lstrip() 27 | if data.startswith(b'{"ind'): 28 | # Skip the 'index' JSONs, generated for Elastic. 29 | # See: https://bugs.wireshark.org/bugzilla/show_bug.cgi?id=16656 30 | start_index = data.find(_ENCODED_OS_LINESEP) + 1 31 | linesep_location = data.find(_ENCODED_OS_LINESEP, start_index) 32 | if linesep_location == -1: 33 | return None, data 34 | 35 | return data[start_index:linesep_location], data[linesep_location + 1:] 36 | 37 | 38 | def packet_from_ek_packet(json_pkt): 39 | if USE_UJSON: 40 | pkt_dict = ujson.loads(json_pkt) 41 | else: 42 | pkt_dict = json.loads(json_pkt.decode('utf-8')) 43 | 44 | # We use the frame dict here and not the object access because it's faster. 45 | frame_dict = pkt_dict['layers'].pop('frame') 46 | layers = [] 47 | for layer in frame_dict['frame_frame_protocols'].split(':'): 48 | layer_dict = pkt_dict['layers'].pop(layer, None) 49 | if layer_dict is not None: 50 | layers.append(EkLayer(layer, layer_dict)) 51 | # Add all leftovers 52 | for name, layer in pkt_dict['layers'].items(): 53 | layers.append(EkLayer(name, layer)) 54 | 55 | return Packet(layers=layers, frame_info=EkLayer('frame', frame_dict), 56 | number=int(frame_dict.get('frame_frame_number', 0)), 57 | length=int(frame_dict['frame_frame_len']), 58 | sniff_time=frame_dict['frame_frame_time_epoch'], 59 | interface_captured=frame_dict.get('rame_frame_interface_id')) 60 | -------------------------------------------------------------------------------- /src/pyshark/tshark/output_parser/tshark_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from packaging import version 5 | 6 | from pyshark.packet.layers.json_layer import JsonLayer 7 | from pyshark.packet.packet import Packet 8 | from pyshark.tshark.output_parser.base_parser import BaseTsharkOutputParser 9 | from pyshark.tshark import tshark 10 | 11 | try: 12 | import ujson 13 | USE_UJSON = True 14 | except ImportError: 15 | USE_UJSON = False 16 | 17 | 18 | class TsharkJsonParser(BaseTsharkOutputParser): 19 | 20 | def __init__(self, tshark_version=None): 21 | super().__init__() 22 | self._tshark_version = tshark_version 23 | 24 | def _parse_single_packet(self, packet): 25 | json_has_duplicate_keys = tshark.tshark_supports_duplicate_keys(self._tshark_version) 26 | return packet_from_json_packet(packet, deduplicate_fields=json_has_duplicate_keys) 27 | 28 | def _extract_packet_from_data(self, data, got_first_packet=True): 29 | """Returns a packet's data and any remaining data after reading that first packet""" 30 | tag_start = 0 31 | if not got_first_packet: 32 | tag_start = data.find(b"{") 33 | if tag_start == -1: 34 | return None, data 35 | packet_separator, end_separator, end_tag_strip_length = self._get_json_separators() 36 | found_separator = None 37 | 38 | tag_end = data.find(packet_separator) 39 | if tag_end == -1: 40 | # Not end of packet, maybe it has end of entire file? 41 | tag_end = data.find(end_separator) 42 | if tag_end != -1: 43 | found_separator = end_separator 44 | else: 45 | # Found a single packet, just add the separator without extras 46 | found_separator = packet_separator 47 | 48 | if found_separator: 49 | tag_end += len(found_separator) - end_tag_strip_length 50 | return data[tag_start:tag_end].strip().strip(b","), data[tag_end + 1:] 51 | return None, data 52 | 53 | def _get_json_separators(self): 54 | """"Returns the separators between packets in a JSON output 55 | 56 | Returns a tuple of (packet_separator, end_of_file_separator, characters_to_disregard). 57 | The latter variable being the number of characters to ignore in order to pass the packet (i.e. extra newlines, 58 | commas, parenthesis). 59 | """ 60 | if not self._tshark_version or self._tshark_version >= version.parse("3.0.0"): 61 | return f"{os.linesep} }},{os.linesep}".encode(), f"}}{os.linesep}]".encode(), 1 + len(os.linesep) 62 | else: 63 | return f"}}{os.linesep}{os.linesep} ,".encode(), f"}}{os.linesep}{os.linesep}]".encode(), 1 64 | 65 | 66 | def duplicate_object_hook(ordered_pairs): 67 | """Make lists out of duplicate keys.""" 68 | json_dict = {} 69 | for key, val in ordered_pairs: 70 | existing_val = json_dict.get(key) 71 | if not existing_val: 72 | json_dict[key] = val 73 | else: 74 | if isinstance(existing_val, list): 75 | existing_val.append(val) 76 | else: 77 | json_dict[key] = [existing_val, val] 78 | 79 | return json_dict 80 | 81 | 82 | def packet_from_json_packet(json_pkt, deduplicate_fields=True): 83 | """Creates a Pyshark Packet from a tshark json single packet. 84 | 85 | Before tshark 2.6, there could be duplicate keys in a packet json, which creates the need for 86 | deduplication and slows it down significantly. 87 | """ 88 | if deduplicate_fields: 89 | # NOTE: We can use ujson here for ~25% speed-up, however since we can't use hooks in ujson 90 | # we lose the ability to view duplicates. This might still be a good option later on. 91 | pkt_dict = json.loads(json_pkt.decode('utf-8'), object_pairs_hook=duplicate_object_hook) 92 | else: 93 | if USE_UJSON: 94 | pkt_dict = ujson.loads(json_pkt) 95 | else: 96 | pkt_dict = json.loads(json_pkt.decode('utf-8')) 97 | # We use the frame dict here and not the object access because it's faster. 98 | frame_dict = pkt_dict['_source']['layers'].pop('frame') 99 | layers = [] 100 | for layer in frame_dict['frame.protocols'].split(':'): 101 | layer_dict = pkt_dict['_source']['layers'].pop(layer, None) 102 | if layer_dict is not None: 103 | layers.append(JsonLayer(layer, layer_dict)) 104 | # Add all leftovers 105 | for name, layer in pkt_dict['_source']['layers'].items(): 106 | layers.append(JsonLayer(name, layer)) 107 | 108 | return Packet(layers=layers, frame_info=JsonLayer('frame', frame_dict), 109 | number=int(frame_dict.get('frame.number', 0)), 110 | length=int(frame_dict['frame.len']), 111 | sniff_time=frame_dict['frame.time_epoch'], 112 | interface_captured=frame_dict.get('frame.interface_id')) 113 | -------------------------------------------------------------------------------- /src/pyshark/tshark/output_parser/tshark_xml.py: -------------------------------------------------------------------------------- 1 | """This module contains functions to turn TShark XML parts into Packet objects.""" 2 | import lxml.objectify 3 | 4 | from pyshark.packet.layers.xml_layer import XmlLayer 5 | from pyshark.packet.packet import Packet 6 | from pyshark.packet.packet_summary import PacketSummary 7 | 8 | from pyshark.tshark.output_parser.base_parser import BaseTsharkOutputParser 9 | 10 | # Prepare dictionary used with str.translate for removing invalid XML characters 11 | DEL_BAD_XML_CHARS = {bad_char: None for bad_char in range(0x00, 0x20) if not bad_char in (0x09, 0x0a, 0x0d)} 12 | DEL_BAD_XML_CHARS.update({bad_char: None for bad_char in range(0xd800, 0xe000)}) 13 | DEL_BAD_XML_CHARS.update({bad_char: None for bad_char in range(0xfffe, 0x10000)}) 14 | 15 | 16 | class TsharkXmlParser(BaseTsharkOutputParser): 17 | SUMMARIES_BATCH_SIZE = 64 18 | 19 | def __init__(self, parse_summaries=False): 20 | super().__init__() 21 | self._parse_summaries = parse_summaries 22 | self._psml_structure = None 23 | 24 | async def get_packets_from_stream(self, stream, existing_data, got_first_packet=True): 25 | if self._parse_summaries and self._psml_structure is None: 26 | existing_data = await self._get_psml_struct(stream) 27 | return await super().get_packets_from_stream(stream, existing_data, got_first_packet=got_first_packet) 28 | 29 | def _parse_single_packet(self, packet): 30 | return packet_from_xml_packet(packet, psml_structure=self._psml_structure) 31 | 32 | def _extract_packet_from_data(self, data, got_first_packet=True): 33 | """Gets data containing a (part of) tshark xml. 34 | 35 | If the given tag is found in it, returns the tag data and the remaining data. 36 | Otherwise returns None and the same data. 37 | 38 | :param data: string of a partial tshark xml. 39 | :return: a tuple of (tag, data). tag will be None if none is found. 40 | """ 41 | return _extract_tag_from_xml_data(data, tag_name=b"packet") 42 | 43 | async def _get_psml_struct(self, fd): 44 | """Gets the current PSML (packet summary xml) structure in a tuple ((None, leftover_data)), 45 | only if the capture is configured to return it, else returns (None, leftover_data). 46 | 47 | A coroutine. 48 | """ 49 | initial_data = b"" 50 | psml_struct = None 51 | 52 | # If summaries are read, we need the psdml structure which appears on top of the file. 53 | while not psml_struct: 54 | new_data = await fd.read(self.SUMMARIES_BATCH_SIZE) 55 | initial_data += new_data 56 | psml_struct, initial_data = _extract_tag_from_xml_data(initial_data, b"structure") 57 | if psml_struct: 58 | self._psml_structure = psml_structure_from_xml(psml_struct) 59 | elif not new_data: 60 | return initial_data 61 | return initial_data 62 | 63 | 64 | def psml_structure_from_xml(psml_structure): 65 | if not isinstance(psml_structure, lxml.objectify.ObjectifiedElement): 66 | psml_structure = lxml.objectify.fromstring(psml_structure) 67 | return psml_structure.findall('section') 68 | 69 | 70 | def packet_from_xml_packet(xml_pkt, psml_structure=None): 71 | """ 72 | Gets a TShark XML packet object or string, and returns a pyshark Packet objec.t 73 | 74 | :param xml_pkt: str or xml object. 75 | :param psml_structure: a list of the fields in each packet summary in the psml data. If given, packets will 76 | be returned as a PacketSummary object. 77 | :return: Packet object. 78 | """ 79 | if not isinstance(xml_pkt, lxml.objectify.ObjectifiedElement): 80 | parser = lxml.objectify.makeparser(huge_tree=True, recover=True, encoding='utf-8') 81 | xml_pkt = xml_pkt.decode(errors='ignore').translate(DEL_BAD_XML_CHARS) 82 | xml_pkt = lxml.objectify.fromstring(xml_pkt.encode('utf-8'), parser) 83 | if psml_structure: 84 | return _packet_from_psml_packet(xml_pkt, psml_structure) 85 | return _packet_from_pdml_packet(xml_pkt) 86 | 87 | 88 | def _packet_from_psml_packet(psml_packet, structure): 89 | return PacketSummary(structure, psml_packet.findall('section')) 90 | 91 | 92 | def _packet_from_pdml_packet(pdml_packet): 93 | layers = [XmlLayer(proto) for proto in pdml_packet.proto] 94 | geninfo, frame, layers = layers[0], layers[1], layers[2:] 95 | return Packet(layers=layers, frame_info=frame, number=geninfo.get_field_value('num'), 96 | length=geninfo.get_field_value('len'), sniff_time=geninfo.get_field_value('timestamp', raw=True), 97 | captured_length=geninfo.get_field_value('caplen'), 98 | interface_captured=frame.get_field_value('interface_id', raw=True)) 99 | 100 | 101 | def _extract_tag_from_xml_data(data, tag_name=b"packet"): 102 | """Gets data containing a (part of) tshark xml. 103 | 104 | If the given tag is found in it, returns the tag data and the remaining data. 105 | Otherwise returns None and the same data. 106 | 107 | :param data: string of a partial tshark xml. 108 | :param tag_name: A bytes string of the tag name 109 | :return: a tuple of (tag, data). tag will be None if none is found. 110 | """ 111 | opening_tag = b"<" + tag_name + b">" 112 | closing_tag = opening_tag.replace(b"<", b"= version.parse("2.6.7") 97 | 98 | 99 | def tshark_supports_json(tshark_version): 100 | return tshark_version >= version.parse("2.2.0") 101 | 102 | 103 | def get_tshark_display_filter_flag(tshark_version): 104 | """Returns '-Y' for tshark versions >= 1.10.0 and '-R' for older versions.""" 105 | if tshark_version >= version.parse("1.10.0"): 106 | return "-Y" 107 | else: 108 | return "-R" 109 | 110 | 111 | def get_tshark_interfaces(tshark_path=None): 112 | """Returns a list of interface numbers from the output tshark -D. 113 | 114 | Used internally to capture on multiple interfaces. 115 | """ 116 | parameters = [get_process_path(tshark_path), "-D"] 117 | with open(os.devnull, "w") as null: 118 | tshark_interfaces = subprocess.check_output(parameters, stderr=null).decode("utf-8") 119 | 120 | return [line.split(" ")[1] for line in tshark_interfaces.splitlines() if '\\\\.\\' not in line] 121 | 122 | 123 | def get_all_tshark_interfaces_names(tshark_path=None): 124 | """Returns a list of all possible interface names. Some interfaces may have aliases""" 125 | parameters = [get_process_path(tshark_path), "-D"] 126 | with open(os.devnull, "w") as null: 127 | tshark_interfaces = subprocess.check_output(parameters, stderr=null).decode("utf-8") 128 | 129 | all_interface_names = [] 130 | for line in tshark_interfaces.splitlines(): 131 | matches = _TSHARK_INTERFACE_ALIAS_PATTERN.findall(line) 132 | if matches: 133 | all_interface_names.extend([name for name in matches[0] if name]) 134 | return all_interface_names 135 | 136 | 137 | def get_ek_field_mapping(tshark_path=None): 138 | parameters = [get_process_path(tshark_path), "-G", "elastic-mapping"] 139 | with open(os.devnull, "w") as null: 140 | mapping = subprocess.check_output(parameters, stderr=null).decode("ascii") 141 | 142 | mapping = json.loads( 143 | mapping, 144 | object_pairs_hook=_duplicate_object_hook)["mappings"] 145 | # If using wireshark 4, the key "mapping" contains what we want, 146 | if "dynamic" in mapping and "properties" in mapping: 147 | pass 148 | # if using wireshark 3.5 to < 4 the data in "mapping.doc", 149 | elif "doc" in mapping: 150 | mapping = mapping["doc"] 151 | # or "mapping.pcap_file" if using wireshark < 3.5 152 | elif "pcap_file" in mapping: 153 | mapping = mapping["pcap_file"] 154 | else: 155 | raise TSharkVersionException(f"Your tshark version does not support elastic-mapping. Please upgrade.") 156 | 157 | return mapping["properties"]["layers"]["properties"] 158 | 159 | 160 | def _duplicate_object_hook(ordered_pairs): 161 | """Make lists out of duplicate keys.""" 162 | json_dict = {} 163 | for key, val in ordered_pairs: 164 | existing_val = json_dict.get(key) 165 | if not existing_val: 166 | json_dict[key] = val 167 | else: 168 | # There are duplicates without any data for some reason, if it's that - drop it 169 | # Otherwise, override 170 | if val.get("properties") != {}: 171 | json_dict[key] = val 172 | 173 | return json_dict 174 | -------------------------------------------------------------------------------- /src/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | 4 | with open(os.path.join(os.path.dirname(__file__), 'README.txt')) as f: 5 | long_description = f.read() 6 | 7 | setup( 8 | name="pyshark", 9 | version="0.6.1", 10 | packages=find_packages(), 11 | package_data={'': ['*.ini', '*.pcapng']}, 12 | install_requires=['lxml', 'termcolor', 'packaging', 'appdirs'], 13 | tests_require=['pytest'], 14 | url="https://github.com/KimiNewt/pyshark", 15 | license="MIT", 16 | long_description=long_description, 17 | author="KimiNewt", 18 | description="Python wrapper for tshark, allowing python packet parsing using wireshark dissectors", 19 | keywords="wireshark capture packets parsing packet", 20 | 21 | classifiers=[ 22 | 'License :: OSI Approved :: MIT License', 23 | 'Programming Language :: Python :: 3.7', 24 | 'Programming Language :: Python :: 3.8', 25 | 'Programming Language :: Python :: 3.9', 26 | 'Programming Language :: Python :: 3.10', 27 | ], 28 | ) 29 | -------------------------------------------------------------------------------- /src/tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py{35,36,37,38,39,310} 3 | 4 | [testenv] 5 | deps = 6 | appdirs 7 | termcolor 8 | pytest 9 | mock 10 | lxml 11 | packaging 12 | commands = pytest .. -s 13 | -------------------------------------------------------------------------------- /tests/capture/test_capture.py: -------------------------------------------------------------------------------- 1 | from pyshark.capture.capture import Capture 2 | 3 | 4 | def test_capture_gets_decoding_parameters(): 5 | c = Capture(decode_as={'tcp.port==8888': 'http'}) 6 | params = c.get_parameters() 7 | decode_index = params.index('-d') 8 | assert params[decode_index + 1] == 'tcp.port==8888,http' 9 | 10 | 11 | def test_capture_gets_multiple_decoding_parameters(): 12 | c = Capture(decode_as={'tcp.port==8888': 'http', 'tcp.port==6666': 'dns'}) 13 | params = c.get_parameters() 14 | decode_index = params.index('-d') 15 | possible_results = ['tcp.port==8888,http', 'tcp.port==6666,dns'] 16 | assert params[decode_index + 1] in possible_results 17 | possible_results.remove(params[decode_index + 1]) 18 | decode_index = params.index('-d', decode_index + 1) 19 | assert params[decode_index + 1] == possible_results[0] 20 | 21 | 22 | def test_capture_gets_override_perfs(): 23 | c = Capture(override_prefs={'esp.enable_null_encryption_decode_heuristic': 'TRUE'}) 24 | params = c.get_parameters() 25 | override_index = params.index('-o') 26 | override_actual_value = params[override_index +1] 27 | assert override_actual_value == 'esp.enable_null_encryption_decode_heuristic:TRUE' 28 | 29 | 30 | def test_capture_gets_multiple_override_perfs(): 31 | c = Capture(override_prefs={'esp.enable_null_encryption_decode_heuristic': 'TRUE', 32 | 'tcp.ls_payload_display_len':'80'}) 33 | params = c.get_parameters() 34 | expected_results = ('esp.enable_null_encryption_decode_heuristic:TRUE', 35 | 'tcp.ls_payload_display_len:80') 36 | start_idx = 0 37 | for count in range(len(expected_results)): 38 | override_index = params.index('-o', start_idx) 39 | override_actual_value = params[override_index +1] 40 | assert override_actual_value in expected_results 41 | # increment index 42 | start_idx = override_index + 1 43 | 44 | 45 | def test_capture_gets_encryption_and_override_perfs(): 46 | temp_c = Capture() 47 | for valid_encryption_type in temp_c.SUPPORTED_ENCRYPTION_STANDARDS: 48 | c = Capture(decryption_key='helloworld', 49 | encryption_type=valid_encryption_type, 50 | override_prefs={'esp.enable_null_encryption_decode_heuristic': 'TRUE', 51 | 'wlan.enable_decryption': 'TRUE', 52 | 'uat:80211_keys': f'"{valid_encryption_type}","helloworld"'}) 53 | params = c.get_parameters() 54 | expected_results = ('esp.enable_null_encryption_decode_heuristic:TRUE', 55 | 'wlan.enable_decryption:TRUE', 56 | f'uat:80211_keys:"{valid_encryption_type}","helloworld"') 57 | start_idx = 0 58 | actual_parameter_options = [] 59 | while True: 60 | try: 61 | override_index = params.index('-o', start_idx) 62 | except ValueError: 63 | # no more '-o' options 64 | break 65 | override_actual_value = params[override_index +1] 66 | actual_parameter_options.append(override_actual_value) 67 | assert override_actual_value in expected_results 68 | # increment index 69 | start_idx = override_index + 1 70 | assert set(actual_parameter_options) == set(expected_results) 71 | assert len(actual_parameter_options) == len(expected_results) 72 | 73 | -------------------------------------------------------------------------------- /tests/capture/test_inmem_capture.py: -------------------------------------------------------------------------------- 1 | import binascii 2 | import pytest 3 | import pyshark 4 | 5 | 6 | @pytest.fixture 7 | def inmem_capture(): 8 | return pyshark.InMemCapture() 9 | 10 | 11 | def arp_packet(last_byte='f'): 12 | """ 13 | Returns an ARP packet from aa:bb:cc:dd:ee:fX 14 | """ 15 | p = f"ffffffffffffaabbccddeef{last_byte}0806000108000604000104a151c32ad10a0000020000000000000a000001" 16 | return binascii.unhexlify(p) 17 | 18 | 19 | def test_can_read_binary_packet(inmem_capture): 20 | pkt = inmem_capture.parse_packet(arp_packet('f')) 21 | inmem_capture.close() 22 | assert pkt.eth.src == 'aa:bb:cc:dd:ee:ff' 23 | 24 | 25 | def test_can_read_multiple_binary_packet(inmem_capture): 26 | pkts = inmem_capture.feed_packets([arp_packet('1'), arp_packet('2'), arp_packet('3')]) 27 | assert len(pkts) == 3 28 | 29 | for i, pkt in enumerate(pkts): 30 | assert pkt.eth.src == 'aa:bb:cc:dd:ee:f' + str(i + 1) 31 | 32 | def test_fed_packets_are_added_to_the_list(inmem_capture): 33 | inmem_capture.feed_packets([arp_packet()]) 34 | assert len(inmem_capture) == 1 35 | 36 | inmem_capture.feed_packets([arp_packet(), arp_packet()]) 37 | assert len(inmem_capture) == 3 38 | -------------------------------------------------------------------------------- /tests/capture/test_live_capture.py: -------------------------------------------------------------------------------- 1 | try: 2 | import mock 3 | except ModuleNotFoundError: 4 | from unittest import mock 5 | import pytest 6 | 7 | import pyshark 8 | 9 | 10 | @pytest.fixture(params=[["wlan0"], ["wlan0mon", "wlan1mon"]]) 11 | def interfaces(request): 12 | with mock.patch("pyshark.tshark.tshark.get_tshark_interfaces", return_value=request.param): 13 | yield request.param 14 | 15 | 16 | @pytest.fixture 17 | def capture(interfaces): 18 | return pyshark.LiveCapture(interface=interfaces) 19 | 20 | 21 | @pytest.mark.parametrize("monitoring", [True, False]) 22 | def test_get_dumpcap_interface_parameter(capture, monitoring, interfaces): 23 | # type: (pyshark.LiveCapture, bool, list) -> None 24 | capture.monitor_mode = monitoring 25 | dumpcap_parameters = capture._get_dumpcap_parameters() 26 | dumpcap_interfaces = [dumpcap_parameters[index + 1] 27 | for index, value in enumerate(dumpcap_parameters) 28 | if value == "-i"] 29 | assert dumpcap_interfaces == interfaces 30 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | import pytest 4 | 5 | import pyshark 6 | 7 | 8 | @pytest.fixture 9 | def data_directory(): 10 | return pathlib.Path(__file__).parent.joinpath('data') 11 | 12 | 13 | @pytest.fixture 14 | def example_pcap_path(data_directory): 15 | return data_directory.joinpath('capture_test.pcapng') 16 | 17 | 18 | @pytest.fixture 19 | def lazy_simple_capture(example_pcap_path): 20 | with pyshark.FileCapture(example_pcap_path, debug=True) as pcap: 21 | yield pcap 22 | 23 | 24 | @pytest.fixture 25 | def simple_capture(lazy_simple_capture): 26 | """A capture already full of packets""" 27 | lazy_simple_capture.load_packets() 28 | return lazy_simple_capture 29 | 30 | 31 | @pytest.fixture 32 | def simple_summary_capture(example_pcap_path): 33 | with pyshark.FileCapture(example_pcap_path, debug=True, only_summaries=True) as pcap: 34 | yield pcap 35 | 36 | 37 | @pytest.fixture(params=[True, False]) 38 | def simple_xml_and_json_capture(request, example_pcap_path): 39 | with pyshark.FileCapture(example_pcap_path, debug=True, use_json=request.param) as pcap: 40 | yield pcap 41 | -------------------------------------------------------------------------------- /tests/data/capture_test.pcapng: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KimiNewt/pyshark/4517bdfed7a65ac9a6a31354f086e75ddd5dac3b/tests/data/capture_test.pcapng -------------------------------------------------------------------------------- /tests/data/ek_field_mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "ip": { 3 | "properties": { 4 | "ip_ip_version": { 5 | "type": "short" 6 | }, 7 | "ip_ip_hdr_len": { 8 | "type": "short" 9 | }, 10 | "ip_ip_dsfield": { 11 | "type": "short" 12 | }, 13 | "ip_ip_dsfield_dscp": { 14 | "type": "short" 15 | }, 16 | "ip_ip_dsfield_ecn": { 17 | "type": "short" 18 | }, 19 | "ip_ip_tos": { 20 | "type": "short" 21 | }, 22 | "ip_ip_tos_precedence": { 23 | "type": "short" 24 | }, 25 | "ip_ip_tos_delay": { 26 | "type": "boolean" 27 | }, 28 | "ip_ip_tos_throughput": { 29 | "type": "boolean" 30 | }, 31 | "ip_ip_tos_reliability": { 32 | "type": "boolean" 33 | }, 34 | "ip_ip_tos_cost": { 35 | "type": "boolean" 36 | }, 37 | "ip_ip_len": { 38 | "type": "integer" 39 | }, 40 | "ip_ip_id": { 41 | "type": "integer" 42 | }, 43 | "ip_ip_dst": { 44 | "type": "ip" 45 | }, 46 | "ip_ip_src": { 47 | "type": "ip" 48 | }, 49 | "ip_ip_addr": { 50 | "type": "ip" 51 | }, 52 | "ip_ip_geoip_asnum": { 53 | "type": "long" 54 | }, 55 | "ip_ip_geoip_lat": { 56 | "type": "float" 57 | }, 58 | "ip_ip_geoip_lon": { 59 | "type": "float" 60 | }, 61 | "ip_ip_geoip_src_asnum": { 62 | "type": "long" 63 | }, 64 | "ip_ip_geoip_src_lat": { 65 | "type": "float" 66 | }, 67 | "ip_ip_geoip_src_lon": { 68 | "type": "float" 69 | }, 70 | "ip_ip_geoip_dst_asnum": { 71 | "type": "long" 72 | }, 73 | "ip_ip_geoip_dst_lat": { 74 | "type": "float" 75 | }, 76 | "ip_ip_geoip_dst_lon": { 77 | "type": "float" 78 | }, 79 | "ip_ip_flags": { 80 | "type": "short" 81 | }, 82 | "ip_ip_flags_sf": { 83 | "type": "boolean" 84 | }, 85 | "ip_ip_flags_rb": { 86 | "type": "boolean" 87 | }, 88 | "ip_ip_flags_df": { 89 | "type": "boolean" 90 | }, 91 | "ip_ip_flags_mf": { 92 | "type": "boolean" 93 | }, 94 | "ip_ip_frag_offset": { 95 | "type": "integer" 96 | }, 97 | "ip_ip_ttl": { 98 | "type": "short" 99 | }, 100 | "ip_ip_proto": { 101 | "type": "short" 102 | }, 103 | "ip_ip_checksum": { 104 | "type": "integer" 105 | }, 106 | "ip_ip_checksum_calculated": { 107 | "type": "integer" 108 | }, 109 | "ip_ip_checksum_status": { 110 | "type": "short" 111 | }, 112 | "ip_ip_opt_type": { 113 | "type": "short" 114 | }, 115 | "ip_ip_opt_type_copy": { 116 | "type": "boolean" 117 | }, 118 | "ip_ip_opt_type_class": { 119 | "type": "short" 120 | }, 121 | "ip_ip_opt_type_number": { 122 | "type": "short" 123 | }, 124 | "ip_ip_opt_len": { 125 | "type": "short" 126 | }, 127 | "ip_ip_opt_ptr": { 128 | "type": "short" 129 | }, 130 | "ip_ip_opt_sid": { 131 | "type": "integer" 132 | }, 133 | "ip_ip_opt_mtu": { 134 | "type": "integer" 135 | }, 136 | "ip_ip_opt_id_number": { 137 | "type": "integer" 138 | }, 139 | "ip_ip_opt_ohc": { 140 | "type": "integer" 141 | }, 142 | "ip_ip_opt_rhc": { 143 | "type": "integer" 144 | }, 145 | "ip_ip_opt_originator": { 146 | "type": "ip" 147 | }, 148 | "ip_ip_opt_ra": { 149 | "type": "integer" 150 | }, 151 | "ip_ip_opt_addr": { 152 | "type": "ip" 153 | }, 154 | "ip_ip_opt_padding": { 155 | "type": "byte" 156 | }, 157 | "ip_ip_opt_qs_func": { 158 | "type": "short" 159 | }, 160 | "ip_ip_opt_qs_rate": { 161 | "type": "short" 162 | }, 163 | "ip_ip_opt_qs_ttl": { 164 | "type": "short" 165 | }, 166 | "ip_ip_opt_qs_ttl_diff": { 167 | "type": "short" 168 | }, 169 | "ip_ip_opt_qs_unused": { 170 | "type": "short" 171 | }, 172 | "ip_ip_opt_qs_nonce": { 173 | "type": "long" 174 | }, 175 | "ip_ip_opt_qs_reserved": { 176 | "type": "long" 177 | }, 178 | "ip_ip_opt_sec_rfc791_sec": { 179 | "type": "short" 180 | }, 181 | "ip_ip_opt_sec_rfc791_comp": { 182 | "type": "integer" 183 | }, 184 | "ip_ip_opt_sec_cl": { 185 | "type": "short" 186 | }, 187 | "ip_ip_opt_sec_prot_auth_flags": { 188 | "type": "short" 189 | }, 190 | "ip_ip_opt_sec_prot_auth_genser": { 191 | "type": "boolean" 192 | }, 193 | "ip_ip_opt_sec_prot_auth_siop_esi": { 194 | "type": "boolean" 195 | }, 196 | "ip_ip_opt_sec_prot_auth_sci": { 197 | "type": "boolean" 198 | }, 199 | "ip_ip_opt_sec_prot_auth_nsa": { 200 | "type": "boolean" 201 | }, 202 | "ip_ip_opt_sec_prot_auth_doe": { 203 | "type": "boolean" 204 | }, 205 | "ip_ip_opt_sec_prot_auth_unassigned": { 206 | "type": "short" 207 | }, 208 | "ip_ip_opt_sec_prot_auth_unassigned": { 209 | "type": "short" 210 | }, 211 | "ip_ip_opt_sec_prot_auth_fti": { 212 | "type": "boolean" 213 | }, 214 | "ip_ip_opt_ext_sec_add_sec_info_format_code": { 215 | "type": "short" 216 | }, 217 | "ip_ip_opt_ext_sec_add_sec_info": { 218 | "type": "byte" 219 | }, 220 | "ip_ip_rec_rt": { 221 | "type": "ip" 222 | }, 223 | "ip_ip_cur_rt": { 224 | "type": "ip" 225 | }, 226 | "ip_ip_src_rt": { 227 | "type": "ip" 228 | }, 229 | "ip_ip_empty_rt": { 230 | "type": "ip" 231 | }, 232 | "ip_ip_cipso_tag_type": { 233 | "type": "short" 234 | }, 235 | "ip_ip_fragment_overlap": { 236 | "type": "boolean" 237 | }, 238 | "ip_ip_fragment_overlap_conflict": { 239 | "type": "boolean" 240 | }, 241 | "ip_ip_fragment_multipletails": { 242 | "type": "boolean" 243 | }, 244 | "ip_ip_fragment_toolongfragment": { 245 | "type": "boolean" 246 | }, 247 | "ip_ip_fragment_error": { 248 | "type": "long" 249 | }, 250 | "ip_ip_fragment_count": { 251 | "type": "long" 252 | }, 253 | "ip_ip_fragment": { 254 | "type": "long" 255 | }, 256 | "ip_ip_fragments": { 257 | "type": "byte" 258 | }, 259 | "ip_ip_reassembled_in": { 260 | "type": "long" 261 | }, 262 | "ip_ip_reassembled_length": { 263 | "type": "long" 264 | }, 265 | "ip_ip_reassembled_data": { 266 | "type": "byte" 267 | }, 268 | "ip_ip_cipso_doi": { 269 | "type": "long" 270 | }, 271 | "ip_ip_cipso_sensitivity_level": { 272 | "type": "short" 273 | }, 274 | "ip_ip_cipso_tag_data": { 275 | "type": "byte" 276 | }, 277 | "ip_ip_opt_overflow": { 278 | "type": "short" 279 | }, 280 | "ip_ip_opt_flag": { 281 | "type": "short" 282 | }, 283 | "ip_ip_opt_time_stamp": { 284 | "type": "long" 285 | }, 286 | "ip_ip_opt_time_stamp_addr": { 287 | "type": "ip" 288 | } 289 | } 290 | } 291 | } 292 | -------------------------------------------------------------------------------- /tests/data/packet.json: -------------------------------------------------------------------------------- 1 | { 2 | "_index": "packets-2020-03-26", 3 | "_type": "doc", 4 | "_score": null, 5 | "_source": { 6 | "layers": { 7 | "frame": { 8 | "frame.encap_type": "1", 9 | "frame.time": "Mar 26, 2020 13:03:01.863675000 IST", 10 | "frame.offset_shift": "0.000000000", 11 | "frame.time_epoch": "1585220581.863675000", 12 | "frame.time_delta": "0.000000000", 13 | "frame.time_delta_displayed": "0.000000000", 14 | "frame.time_relative": "0.000000000", 15 | "frame.number": "1", 16 | "frame.len": "118", 17 | "frame.cap_len": "118", 18 | "frame.marked": "0", 19 | "frame.ignored": "0", 20 | "frame.protocols": "eth:ethertype:ip:tcp:data" 21 | }, 22 | "eth": { 23 | "eth.dst": "10:13:31:cc:b7:70", 24 | "eth.dst_tree": { 25 | "eth.dst_resolved": "Technico_cc:b7:70", 26 | "eth.dst.oui": "1053489", 27 | "eth.dst.oui_resolved": "Technicolor", 28 | "eth.addr": "10:13:31:cc:b7:70", 29 | "eth.addr_resolved": "Technico_cc:b7:70", 30 | "eth.addr.oui": "1053489", 31 | "eth.addr.oui_resolved": "Technicolor", 32 | "eth.dst.lg": "0", 33 | "eth.lg": "0", 34 | "eth.dst.ig": "0", 35 | "eth.ig": "0" 36 | }, 37 | "eth.src": "08:6d:41:e4:c2:ba", 38 | "eth.src_tree": { 39 | "eth.src_resolved": "Apple_e4:c2:ba", 40 | "eth.src.oui": "552257", 41 | "eth.src.oui_resolved": "Apple, Inc.", 42 | "eth.addr": "08:6d:41:e4:c2:ba", 43 | "eth.addr_resolved": "Apple_e4:c2:ba", 44 | "eth.addr.oui": "552257", 45 | "eth.addr.oui_resolved": "Apple, Inc.", 46 | "eth.src.lg": "0", 47 | "eth.lg": "0", 48 | "eth.src.ig": "0", 49 | "eth.ig": "0" 50 | }, 51 | "eth.type": "0x00000800" 52 | }, 53 | "ip": { 54 | "ip.version": "4", 55 | "ip.hdr_len": "20", 56 | "ip.dsfield": "0x00000010", 57 | "ip.dsfield_tree": { 58 | "ip.dsfield.dscp": "4", 59 | "ip.dsfield.ecn": "0" 60 | }, 61 | "ip.len": "104", 62 | "ip.id": "0x00007742", 63 | "ip.flags": "0x00000040", 64 | "ip.flags_tree": { 65 | "ip.flags.rb": "0", 66 | "ip.flags.df": "1", 67 | "ip.flags.mf": "0" 68 | }, 69 | "ip.frag_offset": "0", 70 | "ip.ttl": "64", 71 | "ip.proto": "6", 72 | "ip.checksum": "0x00003006", 73 | "ip.checksum.status": "1", 74 | "ip.checksum_calculated": "0x00003006", 75 | "ip.src": "192.168.1.180", 76 | "ip.addr": "192.168.1.180", 77 | "ip.src_host": "192.168.1.180", 78 | "ip.host": "192.168.1.180", 79 | "ip.dst": "3.224.204.251", 80 | "ip.addr": "3.224.204.251", 81 | "ip.dst_host": "3.224.204.251", 82 | "ip.host": "3.224.204.251" 83 | }, 84 | "tcp": { 85 | "tcp.srcport": "38570", 86 | "tcp.dstport": "2222", 87 | "tcp.port": "38570", 88 | "tcp.port": "2222", 89 | "tcp.stream": "0", 90 | "tcp.len": "52", 91 | "tcp.seq": "1", 92 | "tcp.seq_raw": "3950550451", 93 | "tcp.nxtseq": "53", 94 | "tcp.ack": "1", 95 | "tcp.ack_raw": "1547145221", 96 | "tcp.hdr_len": "32", 97 | "tcp.flags": "0x00000018", 98 | "tcp.flags_tree": { 99 | "tcp.flags.res": "0", 100 | "tcp.flags.ns": "0", 101 | "tcp.flags.cwr": "0", 102 | "tcp.flags.ecn": "0", 103 | "tcp.flags.urg": "0", 104 | "tcp.flags.ack": "1", 105 | "tcp.flags.push": "1", 106 | "tcp.flags.reset": "0", 107 | "tcp.flags.syn": "0", 108 | "tcp.flags.fin": "0", 109 | "tcp.flags.str": "·······AP···" 110 | }, 111 | "tcp.window_size_value": "501", 112 | "tcp.window_size": "501", 113 | "tcp.window_size_scalefactor": "-1", 114 | "tcp.checksum": "0x0000b71f", 115 | "tcp.checksum.status": "1", 116 | "tcp.checksum_calculated": "0x0000b71f", 117 | "tcp.urgent_pointer": "0", 118 | "tcp.options": "01:01:08:0a:db:09:8b:13:15:7a:89:e7", 119 | "tcp.options_tree": { 120 | "tcp.options.nop": "01", 121 | "tcp.options.nop_tree": { 122 | "tcp.option_kind": "1" 123 | }, 124 | "tcp.options.nop": "01", 125 | "tcp.options.nop_tree": { 126 | "tcp.option_kind": "1" 127 | }, 128 | "tcp.options.timestamp": "08:0a:db:09:8b:13:15:7a:89:e7", 129 | "tcp.options.timestamp_tree": { 130 | "tcp.option_kind": "8", 131 | "tcp.option_len": "10", 132 | "tcp.options.timestamp.tsval": "3674835731", 133 | "tcp.options.timestamp.tsecr": "360352231" 134 | } 135 | }, 136 | "tcp.analysis": { 137 | "tcp.analysis.bytes_in_flight": "52", 138 | "tcp.analysis.push_bytes_sent": "52" 139 | }, 140 | "Timestamps": { 141 | "tcp.time_relative": "0.000000000", 142 | "tcp.time_delta": "0.000000000" 143 | }, 144 | "tcp.payload": "43:f0:5d:02:7c:f3:26:59:3f:ef:d5:0d:81:c1:6b:e8:57:9e:3a:b2:18:42:a6:24:1c:74:2e:83:2d:d4:97:46:ee:25:6d:bb:5d:f8:71:00:79:94:7e:db:75:45:b7:dd:f8:45:1f:91" 145 | }, 146 | "data": { 147 | "data.data": "43:f0:5d:02:7c:f3:26:59:3f:ef:d5:0d:81:c1:6b:e8:57:9e:3a:b2:18:42:a6:24:1c:74:2e:83:2d:d4:97:46:ee:25:6d:bb:5d:f8:71:00:79:94:7e:db:75:45:b7:dd:f8:45:1f:91", 148 | "data.len": "52" 149 | } 150 | } 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /tests/data/packet.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /tests/data/packet_ek.json: -------------------------------------------------------------------------------- 1 | {"timestamp":"1585220581863","layers":{"frame":{"frame_frame_encap_type":"1","frame_frame_time":"2020-03-26T11:03:01.863675000Z","frame_frame_offset_shift":"0.000000000","frame_frame_time_epoch":"1585220581.863675000","frame_frame_time_delta":"0.000000000","frame_frame_time_delta_displayed":"0.000000000","frame_frame_time_relative":"0.000000000","frame_frame_number":"1","frame_frame_len":"118","frame_frame_cap_len":"118","frame_frame_marked":false,"frame_frame_ignored":false,"frame_frame_protocols":"eth:ethertype:ip:tcp:data"},"eth":{"eth_eth_dst":"10:13:31:cc:b7:70","eth_eth_dst_resolved":"Technico_cc:b7:70","eth_eth_dst_oui":"1053489","eth_eth_dst_oui_resolved":"Technicolor","eth_eth_addr":"10:13:31:cc:b7:70","eth_eth_addr_resolved":"Technico_cc:b7:70","eth_eth_addr_oui":"1053489","eth_eth_addr_oui_resolved":"Technicolor","eth_eth_dst_lg":false,"eth_eth_lg":false,"eth_eth_dst_ig":false,"eth_eth_ig":false,"eth_eth_src":"08:6d:41:e4:c2:ba","eth_eth_src_resolved":"Apple_e4:c2:ba","eth_eth_src_oui":"552257","eth_eth_src_oui_resolved":"Apple, Inc.","eth_eth_addr":"08:6d:41:e4:c2:ba","eth_eth_addr_resolved":"Apple_e4:c2:ba","eth_eth_addr_oui":"552257","eth_eth_addr_oui_resolved":"Apple, Inc.","eth_eth_src_lg":false,"eth_eth_lg":false,"eth_eth_src_ig":false,"eth_eth_ig":false,"eth_eth_type":"0x00000800"},"ip":{"ip_ip_version":"4","ip_ip_hdr_len":"20","ip_ip_dsfield":"0x00000010","ip_ip_dsfield_dscp":"4","ip_ip_dsfield_ecn":"0","ip_ip_len":"104","ip_ip_id":"0x00007742","ip_ip_flags":"0x00000040","ip_ip_flags_rb":false,"ip_ip_flags_df":true,"ip_ip_flags_mf":false,"ip_ip_frag_offset":"0","ip_ip_ttl":"64","ip_ip_proto":"6","ip_ip_checksum":"0x00003006","ip_ip_checksum_status":"1","ip_ip_checksum_calculated":"0x00003006","ip_ip_src":"192.168.1.180","ip_ip_addr":["192.168.1.180","3.224.204.251"],"ip_ip_src_host":"192.168.1.180","ip_ip_host":["192.168.1.180","3.224.204.251"],"ip_ip_dst":"3.224.204.251","ip_ip_dst_host":"3.224.204.251"},"tcp":{"tcp_tcp_srcport":"38570","tcp_tcp_dstport":"2222","tcp_tcp_port":["38570","2222"],"tcp_tcp_stream":"0","tcp_tcp_len":"52","tcp_tcp_seq":"1","tcp_tcp_seq_raw":"3950550451","tcp_tcp_nxtseq":"53","tcp_tcp_ack":"1","tcp_tcp_ack_raw":"1547145221","tcp_tcp_hdr_len":"32","tcp_tcp_flags":"0x00000018","tcp_tcp_flags_res":false,"tcp_tcp_flags_ns":false,"tcp_tcp_flags_cwr":false,"tcp_tcp_flags_ecn":false,"tcp_tcp_flags_urg":false,"tcp_tcp_flags_ack":true,"tcp_tcp_flags_push":true,"tcp_tcp_flags_reset":false,"tcp_tcp_flags_syn":false,"tcp_tcp_flags_fin":false,"tcp_tcp_flags_str":"·······AP···","tcp_tcp_window_size_value":"501","tcp_tcp_window_size":"501","tcp_tcp_window_size_scalefactor":"-1","tcp_tcp_checksum":"0x0000b71f","tcp_tcp_checksum_status":"1","tcp_tcp_checksum_calculated":"0x0000b71f","tcp_tcp_urgent_pointer":"0","tcp_tcp_options":"01:01:08:0a:db:09:8b:13:15:7a:89:e7","tcp_options_nop":["01","01"],"tcp_tcp_option_kind":["1","1"],"tcp_options_timestamp":"08:0a:db:09:8b:13:15:7a:89:e7","tcp_tcp_option_kind":"8","tcp_tcp_option_len":"10","tcp_tcp_options_timestamp_tsval":"3674835731","tcp_tcp_options_timestamp_tsecr":"360352231","tcp_tcp_analysis":null,"tcp_tcp_analysis_bytes_in_flight":"52","tcp_tcp_analysis_push_bytes_sent":"52","text":"Timestamps","tcp_tcp_time_relative":"0.000000000","tcp_tcp_time_delta":"0.000000000","tcp_tcp_payload":"43:f0:5d:02:7c:f3:26:59:3f:ef:d5:0d:81:c1:6b:e8:57:9e:3a:b2:18:42:a6:24:1c:74:2e:83:2d:d4:97:46:ee:25:6d:bb:5d:f8:71:00:79:94:7e:db:75:45:b7:dd:f8:45:1f:91"},"data":{"data_data_data":"43:f0:5d:02:7c:f3:26:59:3f:ef:d5:0d:81:c1:6b:e8:57:9e:3a:b2:18:42:a6:24:1c:74:2e:83:2d:d4:97:46:ee:25:6d:bb:5d:f8:71:00:79:94:7e:db:75:45:b7:dd:f8:45:1f:91","data_data_len":"52"}}} 2 | -------------------------------------------------------------------------------- /tests/packet/test_fields.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pyshark.packet.fields import LayerField, LayerFieldsContainer 4 | 5 | # Unit Tests - Layer Fields 6 | def test_layer_field_hide(): 7 | hide_yes = LayerField(hide="yes") 8 | hide_not_yes = LayerField(hide="not yes") 9 | assert hide_yes.hide == True and hide_not_yes.hide == False 10 | 11 | def test_layer_field_print_format(): 12 | name = "test_name" 13 | show = "test" 14 | test_layer_field = LayerField(name=name, show=show) 15 | str_layer_field = test_layer_field.__repr__() 16 | assert str_layer_field == f'' 17 | 18 | def test_layer_field_default_value_show(): 19 | show = "test" 20 | test_layer_field = LayerField(show=show) 21 | default_value = test_layer_field.get_default_value() 22 | assert default_value == show 23 | 24 | def test_layer_field_default_value_value(): 25 | value = "test" 26 | test_layer_field = LayerField(value=value) 27 | default_value = test_layer_field.get_default_value() 28 | assert default_value == value 29 | 30 | def test_layer_field_default_value_showname(): 31 | showname = "test" 32 | test_layer_field = LayerField(showname=showname) 33 | default_value = test_layer_field.get_default_value() 34 | assert default_value == showname 35 | 36 | def test_layer_field_showname_value(): 37 | value = "test_value" 38 | showname = f"test: {value}" 39 | test_layer_field = LayerField(showname=showname) 40 | assert value == test_layer_field.showname_value 41 | 42 | def test_layer_field_showname_key(): 43 | key = "test_key" 44 | showname = f"{key}: value" 45 | test_layer_field = LayerField(showname=showname) 46 | assert key == test_layer_field.showname_key 47 | 48 | def test_layer_field_showname_binary(): 49 | binary = b"\x124" 50 | test_layer_field = LayerField(value="1234") 51 | assert binary == test_layer_field.binary_value 52 | 53 | def test_layer_field_str_int_value(): 54 | str_int_value = "10" 55 | int_value = 10 56 | test_layer_field = LayerField(value=str_int_value) 57 | assert test_layer_field.int_value == int_value 58 | 59 | def test_layer_field_hex_value(): 60 | test_int_value = "0x75BCD15" 61 | expected_value = 123456789 62 | test_layer_field = LayerField(value=test_int_value) 63 | asserttest_layer_field.hex_value == expected_value 64 | 65 | # Test Data - Layer Fields Container 66 | test_layer_field = LayerField(name="test_field", value={"test": "value"}) 67 | 68 | @pytest.fixture 69 | def layer_fields_container(): 70 | return LayerFieldsContainer(test_layer_field) 71 | 72 | # Unit Tests - Layer Fields Container 73 | def test_layer_fields_container_adds_single_field(layer_fields_container): 74 | new_field = LayerField(name="new_field", value={"new_test": "new_test_value"}) 75 | layer_fields_container.add_field(new_field) 76 | fields = layer_fields_container.all_fields 77 | assert fields == [test_layer_field, new_field] 78 | 79 | def test_layer_fields_container_returns_all_fields(layer_fields_container): 80 | fields = layer_fields_container.all_fields 81 | assert fields == [test_layer_field] 82 | 83 | def test_layer_fields_container_gets_main_field(layer_fields_container): 84 | main_field = layer_fields_container.main_field 85 | assert main_field == test_layer_field 86 | 87 | def test_layer_fields_container_gets_alternate_fields(layer_fields_container): 88 | alternate_field_1 = LayerField(name="alt_field_1", value={"alt_field_1": "alt_value_1"}) 89 | alternate_field_2 = LayerField(name="alt_field_2", value={"alt_field_2": "alt_value_2"}) 90 | layer_fields_container.add_field(alternate_field_1) 91 | layer_fields_container.add_field(alternate_field_2) 92 | alternate_fields = layer_fields_container.alternate_fields 93 | assert alternate_fields == [alternate_field_1, alternate_field_2] 94 | -------------------------------------------------------------------------------- /tests/test_basic_parsing.py: -------------------------------------------------------------------------------- 1 | def test_count_packets(simple_xml_and_json_capture): 2 | """Test to make sure the right number of packets are read from a known 3 | capture""" 4 | packet_count = sum(1 for _ in simple_xml_and_json_capture) 5 | assert packet_count == 24 6 | 7 | 8 | def test_sum_lengths(simple_xml_and_json_capture): 9 | """Test to make sure that the right packet length is being read from 10 | tshark's output by comparing the aggregate length of all packets 11 | to a known value""" 12 | total_length = sum(int(packet.length) for packet in simple_xml_and_json_capture) 13 | assert total_length == 2178 14 | 15 | 16 | def test_layers(simple_xml_and_json_capture): 17 | """Test to make sure the correct protocols are reported for known 18 | packets""" 19 | packet_indexes = (0, 5, 6, 13, 14, 17, 23) 20 | test_values = [simple_xml_and_json_capture[i].highest_layer for i in packet_indexes] 21 | known_values = ['DNS', 'DNS', 'ICMP', 'ICMP', 'TCP', 'HTTP', 'TCP'] 22 | assert test_values == known_values 23 | 24 | 25 | def test_ethernet(simple_xml_and_json_capture): 26 | """Test to make sure Ethernet fields are being read properly by comparing 27 | packet dissection results to known values""" 28 | packet = simple_xml_and_json_capture[0] 29 | test_values = packet.eth.src, packet.eth.dst 30 | known_values = ('00:00:bb:10:20:10', '00:00:bb:02:04:01') 31 | assert test_values == known_values 32 | 33 | 34 | def test_icmp(simple_xml_and_json_capture): 35 | """Test to make sure ICMP fields are being read properly by comparing 36 | packet dissection results to known values""" 37 | packet = simple_xml_and_json_capture[11] 38 | # The value returned by tshark is locale-dependent. 39 | # Depending on the locale, a comma can be used instead of a dot 40 | # as decimal separator. 41 | resptime = packet.icmp.resptime.replace(',', '.') 42 | assert resptime == '1.667' 43 | -------------------------------------------------------------------------------- /tests/test_cap_operations.py: -------------------------------------------------------------------------------- 1 | import time 2 | from asyncio import TimeoutError 3 | from multiprocessing import Process 4 | from multiprocessing import Queue 5 | from multiprocessing.queues import Empty 6 | from unittest import mock 7 | 8 | import pytest 9 | 10 | from pyshark.packet.packet_summary import PacketSummary 11 | 12 | 13 | def test_packet_callback_called_for_each_packet(lazy_simple_capture): 14 | # Test cap has 24 packets 15 | mock_callback = mock.Mock() 16 | lazy_simple_capture.apply_on_packets(mock_callback) 17 | assert mock_callback.call_count == 24 18 | 19 | 20 | def test_async_packet_callback_called_for_each_packet(lazy_simple_capture): 21 | # Test cap has 24 packets 22 | mock_callback = mock.AsyncMock() 23 | lazy_simple_capture.apply_on_packets(mock_callback) 24 | assert mock_callback.call_count == 24 25 | mock_callback.assert_awaited() 26 | 27 | 28 | def test_apply_on_packet_stops_on_timeout(lazy_simple_capture): 29 | def wait(pkt): 30 | time.sleep(5) 31 | with pytest.raises(TimeoutError): 32 | lazy_simple_capture.apply_on_packets(wait, timeout=1) 33 | 34 | 35 | def test_lazy_loading_of_packets_on_getitem(lazy_simple_capture): 36 | # Seventh packet is ICMP 37 | assert 'ICMP' in lazy_simple_capture[6] 38 | 39 | 40 | def test_lazy_loading_of_packet_does_not_recreate_packets(lazy_simple_capture): 41 | # Seventh packet is ICMP 42 | icmp_packet_id = id(lazy_simple_capture[6]) 43 | # load some more 44 | lazy_simple_capture[8] 45 | assert icmp_packet_id == id(lazy_simple_capture[6]) 46 | 47 | 48 | def test_filling_cap_in_increments(lazy_simple_capture): 49 | lazy_simple_capture.load_packets(1) 50 | assert len(lazy_simple_capture) == 1 51 | lazy_simple_capture.load_packets(2) 52 | assert len(lazy_simple_capture) == 3 53 | 54 | 55 | def test_getting_packet_summary(simple_summary_capture): 56 | assert isinstance(simple_summary_capture[0], PacketSummary) 57 | 58 | # Since we cannot check the exact fields since they're dependent on wireshark configuration, 59 | # we'll at least make sure some data is in. 60 | assert simple_summary_capture[0]._fields 61 | 62 | 63 | def _iterate_capture_object(cap_obj, q): 64 | for _ in cap_obj: 65 | pass 66 | q.put(True) 67 | 68 | 69 | def test_iterate_empty_psml_capture(simple_summary_capture): 70 | simple_summary_capture.display_filter = "frame.len == 1" 71 | q = Queue() 72 | p = Process(target=_iterate_capture_object, 73 | args=(simple_summary_capture, q)) 74 | p.start() 75 | p.join(2) 76 | try: 77 | no_hang = q.get_nowait() 78 | except Empty: 79 | no_hang = False 80 | if p.is_alive(): 81 | p.terminate() 82 | assert no_hang # False here 83 | -------------------------------------------------------------------------------- /tests/test_ek_field_mapping.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | from unittest import mock 3 | 4 | import pytest 5 | 6 | from pyshark import ek_field_mapping 7 | 8 | 9 | @pytest.fixture(autouse=True) 10 | def fake_cache(tmp_path): 11 | with mock.patch.object(ek_field_mapping, "cache") as fake_cache_module: 12 | # Direct to the data directory which has the mapping for IP. 13 | fake_cache_module.get_cache_dir.return_value = pathlib.Path(__file__).parent.joinpath("data") 14 | yield fake_cache_module.get_cache_dir 15 | 16 | 17 | @pytest.fixture 18 | def mapping(): 19 | ek_field_mapping.MAPPING.load_mapping("foo") 20 | yield ek_field_mapping.MAPPING 21 | ek_field_mapping.MAPPING.clear() 22 | 23 | 24 | @pytest.mark.parametrize(["field_name", "expected_type"], [ 25 | ("ip_ip_hdr_len", int), 26 | ("ip_ip_src_rt", str), 27 | ("ip_ip_geoip_lat", float), 28 | ("ip_ip_tos_reliability", str), 29 | ("ip_ip_reassembled_data", bytes), 30 | ("missing_field", str), 31 | ]) 32 | def test_can_find_field_type_in_mapping(mapping, field_name, expected_type): 33 | assert mapping.get_field_type("ip", field_name) == expected_type 34 | 35 | 36 | @pytest.mark.parametrize(["field_name", "str_value", "casted_value"], [ 37 | ("ip_ip_hdr_len", "20", 20), 38 | ("ip_ip_src_rt", "1.1.1.1", "1.1.1.1"), 39 | ("ip_ip_geoip_lat", "15.5", 15.5), 40 | ("ip_ip_tos_reliability", "foo", "foo"), 41 | ("ip_ip_checksum", "0x3006", 0x3006), 42 | ("ip_ip_checksum", ["0x3006", "0x5"], [0x3006, 0x5]), 43 | ("ip_ip_reassembled_data", "ff:e0", b"\xff\xe0"), 44 | ]) 45 | def test_casts_field_value_to_correct_value(mapping, field_name, str_value, casted_value): 46 | assert mapping.cast_field_value("ip", field_name, str_value) == casted_value 47 | 48 | 49 | def test_doesnt_cast_non_str(mapping): 50 | assert mapping.cast_field_value("ip", "ip_ip_hdr_len", True) is True 51 | 52 | 53 | -------------------------------------------------------------------------------- /tests/test_packet_operations.py: -------------------------------------------------------------------------------- 1 | import binascii 2 | import pytest 3 | 4 | 5 | @pytest.fixture 6 | def icmp_packet(simple_capture): 7 | return simple_capture[7] 8 | 9 | 10 | @pytest.mark.parametrize('access_func', [ 11 | lambda pkt: pkt[-1], 12 | lambda pkt: pkt['icmp'], 13 | lambda pkt: pkt['ICMP'], 14 | lambda pkt: pkt.icmp, 15 | ]) 16 | def test_can_access_layer(icmp_packet, access_func): 17 | """Tests that layer access in various ways works the same way.""" 18 | assert access_func(icmp_packet).layer_name.upper() == 'ICMP' 19 | assert binascii.unhexlify(access_func(icmp_packet).data) == b'abcdefghijklmnopqrstuvwabcdefghi' 20 | 21 | 22 | def test_packet_contains_layer(icmp_packet): 23 | assert 'ICMP' in icmp_packet 24 | 25 | 26 | def test_raw_mode(icmp_packet): 27 | original = icmp_packet.ip.src 28 | raw = icmp_packet.ip.src.raw_value 29 | icmp_packet.ip.raw_mode = True 30 | assert icmp_packet.ip.src != original 31 | assert icmp_packet.ip.src == raw 32 | 33 | 34 | def test_frame_info_access(icmp_packet): 35 | actual = icmp_packet.frame_info.protocols 36 | expected = set(['eth:ip:icmp:data', 'eth:ethertype:ip:icmp:data']) 37 | assert actual in expected 38 | assert icmp_packet.frame_info.number == '8' 39 | -------------------------------------------------------------------------------- /tests/tshark/test_tshark.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | from unittest import mock 3 | from packaging import version 4 | 5 | import pytest 6 | 7 | from pyshark.tshark import tshark 8 | 9 | 10 | LINUX_INTERFACES_OUTPUT = b""" 11 | 10. br-15 12 | 11. any 13 | 12. lo (Loopback) 14 | 13. br-cc""" 15 | 16 | WINDOWS_INTERFACES_OUTPUT = rb""" 17 | 1. \Device\NPF_{1} (foo) 18 | 2. \Device\NPF_{2} (bar)""" 19 | 20 | 21 | @pytest.fixture 22 | def mock_check_output(): 23 | with mock.patch.object(subprocess, "check_output") as mock_check_output: 24 | yield mock_check_output 25 | 26 | 27 | @pytest.mark.parametrize(["tshark_output", "expected_interface_names"], 28 | [ 29 | (b"foo", []), 30 | (b"1. foo\n2. bar\n3. baz", ["foo", "bar", "baz"]), 31 | (b"1. foo\n2. bar (derp)\n3. baz", ["foo", "bar", "baz", "derp"]), 32 | (LINUX_INTERFACES_OUTPUT, ["br-15", "any", "lo", "Loopback", "br-cc"]), 33 | (WINDOWS_INTERFACES_OUTPUT, 34 | [r"\Device\NPF_{1}", r"\Device\NPF_{2}", "foo", "bar"]) 35 | ] 36 | ) 37 | def test_can_get_all_interface_names_and_aliases(mock_check_output, tshark_output, expected_interface_names): 38 | mock_check_output.return_value = tshark_output 39 | assert set(tshark.get_all_tshark_interfaces_names()) == set(expected_interface_names) 40 | 41 | 42 | @mock.patch('os.path.exists', autospec=True) 43 | def test_get_tshark_path(mock_exists): 44 | mock_exists.return_value = True 45 | actual = tshark.get_process_path("/some/path/tshark") 46 | expected = "/some/path/tshark" 47 | assert actual == expected 48 | 49 | 50 | @mock.patch('subprocess.check_output', autospec=True) 51 | def test_get_tshark_version(mock_check_output): 52 | mock_check_output.return_value = ( 53 | b'TShark 1.12.1 (Git Rev Unknown from unknown)\n\n'b'Copyright ' 54 | b'1998-2014 Gerald Combs and contributors.\n' 55 | ) 56 | actual = tshark.get_tshark_version() 57 | expected = version.parse('1.12.1') 58 | assert actual == expected 59 | 60 | 61 | def test_get_display_filter_flag(): 62 | actual = tshark.get_tshark_display_filter_flag(version.parse('1.10.0')) 63 | expected = '-Y' 64 | assert actual == expected 65 | 66 | actual = tshark.get_tshark_display_filter_flag(version.parse('1.6.0')) 67 | expected = '-R' 68 | assert actual == expected 69 | 70 | 71 | @mock.patch('subprocess.check_output', autospec=True) 72 | def test_get_tshark_interfaces(mock_check_output): 73 | mock_check_output.return_value = ( 74 | b'1. wlan0\n2. any\n3. lo (Loopback)\n4. eth0\n5. docker0\n' 75 | ) 76 | actual = tshark.get_tshark_interfaces() 77 | expected = ['wlan0', 'any', 'lo', 'eth0', 'docker0'] 78 | assert actual == expected 79 | 80 | -------------------------------------------------------------------------------- /tests/tshark/test_tshark_ek.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pyshark import ek_field_mapping 4 | from pyshark.tshark import tshark 5 | from pyshark.tshark.output_parser import tshark_ek 6 | 7 | 8 | @pytest.fixture 9 | def parsed_packet(data_directory): 10 | ek_field_mapping.MAPPING.load_mapping(str(tshark.get_tshark_version())) 11 | return tshark_ek.packet_from_ek_packet(data_directory.joinpath("packet_ek.json").read_bytes()) 12 | 13 | 14 | def test_can_access_simple_field(parsed_packet): 15 | assert parsed_packet.tcp.checksum.value == 0x0000b71f 16 | 17 | 18 | def test_can_access_subfield(parsed_packet): 19 | assert parsed_packet.tcp.flags.ack is True 20 | 21 | 22 | def test_can_access_subfield_by_dot_notations(parsed_packet): 23 | assert parsed_packet.tcp.get_field("flags.ack") is True 24 | 25 | 26 | def test_can_parse_duplicate_fields(parsed_packet): 27 | assert parsed_packet.tcp.options.timestamp.tsecr == 360352231 28 | assert parsed_packet.tcp.options.nop == ["01", "01"] 29 | 30 | 31 | def test_gets_layer_field_names(parsed_packet): 32 | assert set(parsed_packet.tcp.field_names) == {"checksum", 33 | "nxtseq", 34 | "flags", 35 | "dstport", 36 | "ack", 37 | "stream", 38 | "port", 39 | "seq", 40 | "srcport", 41 | "urgent", 42 | "option", 43 | "analysis", 44 | "options", 45 | "window", 46 | "payload", 47 | "len", 48 | "time", 49 | "hdr"} 50 | 51 | 52 | def test_gets_field_subfield_names(parsed_packet): 53 | assert set(parsed_packet.tcp.options.timestamp.subfields) == {"tsecr", "tsval"} 54 | -------------------------------------------------------------------------------- /tests/tshark/test_tshark_json.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pyshark.tshark.output_parser import tshark_json 4 | 5 | 6 | @pytest.fixture 7 | def parsed_packet(data_directory): 8 | return tshark_json.packet_from_json_packet(data_directory.joinpath("packet.json").read_bytes()) 9 | 10 | 11 | def test_can_access_simple_field(parsed_packet): 12 | assert parsed_packet.tcp.checksum == "0x0000b71f" 13 | 14 | 15 | def test_can_access_subfield(parsed_packet): 16 | assert parsed_packet.tcp.flags_tree.ack == "1" 17 | 18 | 19 | def test_can_duplicate_fields(parsed_packet): 20 | assert parsed_packet.tcp.options_tree.nop == ["01", "01"] 21 | assert parsed_packet.tcp.options_tree.timestamp_tree.option_kind == "8" 22 | 23 | 24 | -------------------------------------------------------------------------------- /tests/tshark/test_tshark_xml.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pyshark.tshark.output_parser import tshark_xml 4 | 5 | 6 | @pytest.fixture 7 | def parsed_packet(data_directory): 8 | return tshark_xml.packet_from_xml_packet(data_directory.joinpath("packet.xml").read_bytes()) 9 | 10 | 11 | def test_can_access_simple_field(parsed_packet): 12 | assert parsed_packet.tcp.checksum == "0x0000b71f" 13 | 14 | 15 | def test_can_access_field_showname(parsed_packet): 16 | assert parsed_packet.tcp.checksum.showname == "Checksum: 0xb71f [correct]" 17 | 18 | 19 | def test_can_access_raw_field(parsed_packet): 20 | assert parsed_packet.tcp.checksum.raw_value == "b71f" 21 | 22 | 23 | def test_can_access_subfield(parsed_packet): 24 | assert parsed_packet.tcp.flags_ack == "1" 25 | 26 | 27 | def test_can_duplicate_fields(parsed_packet): 28 | all_tcp_opts = parsed_packet.tcp.option_kind.all_fields 29 | assert {opt.get_default_value() for opt in all_tcp_opts} == {"1", "1", "8"} 30 | 31 | 32 | --------------------------------------------------------------------------------