├── .github ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING ├── FUNDING.yml ├── ISSUE_TEMPLATE │ └── bug_report.md ├── PULL_REQUEST_TEMPLATE.md ├── delete-merged-branch-config.yml └── workflows │ └── python-package.yml ├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── LICENSE.txt ├── MANIFEST.in ├── Makefile ├── README.rst ├── docs ├── Makefile ├── _autogen.txt ├── _gen.py ├── conf.py ├── extract_msg.attachments.custom_att_handler.rst ├── extract_msg.attachments.rst ├── extract_msg.constants.rst ├── extract_msg.encoding.rst ├── extract_msg.msg_classes.rst ├── extract_msg.properties.rst ├── extract_msg.rst ├── extract_msg.structures.rst ├── index.rst ├── make.bat ├── modules.rst ├── type-support.csv └── type_support.rst ├── example-msg-files ├── expected-outputs │ ├── 2013-11-18_0026 Test for TIF files │ │ ├── import OleFileIO.tif │ │ ├── message.text │ │ └── raised value error.tif │ └── 2016-02-23_0657 MSG Test File │ │ └── message.text ├── export-results │ ├── strangeDate.msg │ └── unicode.msg ├── strangeDate.msg └── unicode.msg ├── extract_msg ├── __init__.py ├── __main__.py ├── _rtf │ ├── __init__.py │ ├── create_doc.py │ ├── inject_rtf.py │ ├── token.py │ └── tokenize_rtf.py ├── attachments │ ├── __init__.py │ ├── attachment.py │ ├── attachment_base.py │ ├── broken_att.py │ ├── custom_att.py │ ├── custom_att_handler │ │ ├── __init__.py │ │ ├── custom_handler.py │ │ ├── lnk_obj_att.py │ │ ├── outlook_image_dib.py │ │ └── outlook_image_meta.py │ ├── emb_msg_att.py │ ├── signed_att.py │ ├── unsupported_att.py │ └── web_att.py ├── constants │ ├── __init__.py │ ├── ps.py │ ├── re.py │ └── st.py ├── data │ └── logging-config │ │ ├── logging-nt.json │ │ └── logging-posix.json ├── encoding │ ├── __init__.py │ ├── _dt │ │ ├── __init__.py │ │ ├── _mac_ce.py │ │ ├── _mac_cyrillic.py │ │ ├── _mac_greek.py │ │ ├── _mac_iceland.py │ │ ├── _mac_turkish.py │ │ ├── _win874_dec.py │ │ └── _win950_dec.py │ └── utils.py ├── enums.py ├── exceptions.py ├── msg_classes │ ├── __init__.py │ ├── appointment.py │ ├── calendar.py │ ├── calendar_base.py │ ├── contact.py │ ├── journal.py │ ├── meeting_cancellation.py │ ├── meeting_exception.py │ ├── meeting_forward.py │ ├── meeting_related.py │ ├── meeting_request.py │ ├── meeting_response.py │ ├── message.py │ ├── message_base.py │ ├── message_signed.py │ ├── message_signed_base.py │ ├── msg.py │ ├── post.py │ ├── sticky_note.py │ ├── task.py │ └── task_request.py ├── null_date.py ├── ole_writer.py ├── open_msg.py ├── properties │ ├── __init__.py │ ├── named.py │ ├── prop.py │ └── properties_store.py ├── py.typed ├── recipient.py ├── structures │ ├── __init__.py │ ├── _helpers.py │ ├── business_card.py │ ├── cfoas.py │ ├── contact_link_entry.py │ ├── dev_mode_a.py │ ├── dv_target_device.py │ ├── entry_id.py │ ├── misc_id.py │ ├── mon_stream.py │ ├── odt.py │ ├── ole_pres.py │ ├── ole_stream_struct.py │ ├── recurrence_pattern.py │ ├── report_tag.py │ ├── system_time.py │ ├── time_zone_definition.py │ ├── time_zone_struct.py │ ├── toc_entry.py │ └── tz_rule.py └── utils.py ├── extract_msg_tests ├── __init__.py ├── attachment_tests.py ├── cmd_line_tests.py ├── constants.py ├── ole_writer_tests.py ├── prop_tests.py ├── util_tests.py └── validation_tests.py ├── helper-scripts ├── README.md ├── detect-prop-overlap.py.old └── produce-dec-table.py ├── msg-documentation ├── [MS-OXMSG].pdf └── source.txt ├── notes ├── Custom Attachment CLSIDs.txt ├── README.md └── contact business card details.txt ├── pyrightconfig.json ├── readthedocs.yaml ├── requirements.txt ├── setup.cfg ├── setup.py ├── templates ├── logging-nt.json └── logging-posix.json └── tests.py /.github/CONTRIBUTING: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | ** This is a draft document** 4 | 5 | When contributing to this repository, please first discuss the change you wish to make via issue, 6 | email, or any other method with the owners of this repository before making a change. 7 | 8 | Please note we have a [code of conduct](CODE_OF_CONDUCT.md), please follow it in all your interactions with the project. 9 | 10 | ## Pull Request Process 11 | 12 | 1. Ensure an issue already exists for what you are fixing and link to it in your pull. 13 | 2. Ensure any install or build dependencies are removed before the end of the layer when doing a 14 | build. 15 | 3. Ensure you are not pulling to the master branch that is for final releases only. 16 | 3. Update the [CHANGELOG](CHANGELOG.md) with details of changes to the codebase, this includes new functionality, 17 | deprecated features, or any other material changes. 18 | 4. Increase the version numbers in any examples files and the README.md to the new version that this 19 | Pull Request would represent. The versioning scheme we use is [SemVer](http://semver.org/). (Usually done by the 20 | maintainers. 21 | 5. Submit tests with your pull request. 22 | 6. You may merge the Pull Request in once you have the sign-off of one or more other developers, or if you 23 | do not have permission to do that, you may request the second reviewer to merge it for you. 24 | 25 | ## Code of Conduct 26 | 27 | ### Our Pledge 28 | 29 | In the interest of fostering an open and welcoming environment, we as 30 | contributors and maintainers pledge to making participation in our project and 31 | our community a harassment-free experience for everyone, regardless of age, body 32 | size, disability, ethnicity, gender identity and expression, level of experience, 33 | nationality, personal appearance, race, religion, or sexual identity and 34 | orientation. 35 | 36 | ### Our Standards 37 | 38 | Examples of behavior that contributes to creating a positive environment 39 | include: 40 | 41 | * Using welcoming and inclusive language 42 | * Being respectful of differing viewpoints and experiences 43 | * Gracefully accepting constructive criticism 44 | * Focusing on what is best for the community 45 | * Showing empathy towards other community members 46 | 47 | Examples of unacceptable behavior by participants include: 48 | 49 | * The use of sexualized language or imagery and unwelcome sexual attention or 50 | advances 51 | * Trolling, insulting/derogatory comments, and personal or political attacks 52 | * Public or private harassment 53 | * Publishing others' private information, such as a physical or electronic 54 | address, without explicit permission 55 | * Other conduct which could reasonably be considered inappropriate in a 56 | professional setting 57 | 58 | ### Our Responsibilities 59 | 60 | Project maintainers are responsible for clarifying the standards of acceptable 61 | behavior and are expected to take appropriate and fair corrective action in 62 | response to any instances of unacceptable behavior. 63 | 64 | Project maintainers have the right and responsibility to remove, edit, or 65 | reject comments, commits, code, wiki edits, issues, and other contributions 66 | that are not aligned to this Code of Conduct, or to ban temporarily or 67 | permanently any contributor for other behaviors that they deem inappropriate, 68 | threatening, offensive, or harmful. 69 | 70 | See our [Code of Conduct](CODE_OF_CONDUCT.md) for more info. 71 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | ko_fi: DestructionE 2 | patreon: DestructionE 3 | custom: ['https://www.buymeacoffee.com/DestructionE'] 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | 5 | --- 6 | 7 | In order to get your bug addressed in a timely manner, or at all :smiley:, please fill out the below bug report. Please try to make it as easy as possible for us to understand what is going on. We may close out any bugs or issues without warning that are not complete or coherent. 8 | 9 | In the bug template below anything is [square brackets] should be filled out or removed if the item doesn't apply. 10 | 11 | Should you encounter an error that has not already been reported, please do the following when reporting it: 12 | **Bug Metadata** 13 | * Version of extract_msg: [x.x.x] 14 | * Your python version: Python [2.7 | 3.6.7] 15 | * How did you launch extract_msg? 16 | - [ ] My command line or 17 | - [ ] I used the extract_msg package 18 | 19 | **Describe the bug** 20 | A clear and concise description of what the bug is. 21 | 22 | [ If applicable ] 23 | **What code did you use or can we use to reproduce this error? 24 | 25 | ``` 26 | [put your code here] 27 | ``` 28 | 29 | Is there a message.msg file you want to share to help us reproduce this? 30 | - [ ] Uploaded message (drag and drop on this window) 31 | - [ ] Emailed message as an attachment to admins: [Enter Subject Line Here] 32 | 33 | **Traceback** 34 | ``` 35 | [Put your traceback here] 36 | ``` 37 | 38 | **Screenshots** 39 | [Insert any screenshots or debug pictures here] 40 | 41 | **Additional context** 42 | [Add any other context about the problem here.] 43 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | - [ ] Issue #: (Replace text with #) 2 | - [ ] Have you listed any changes to install or build dependencies? 3 | - [ ] Ensured your changes are compatible with Python 2.7 (ONLY FOR v0.29). 4 | - [ ] Have you updated the [CHANGELOG](CHANGELOG.md) with details of changes to the codebase, this includes new functionality, deprecated features, or any other material changes. 5 | - [ ] If necessary, have you bumped the version number? We will usually do this for you. 6 | - [ ] Have you included py.test tests with your pull request. (Not yet necessary) 7 | - [ ] Ensured your code is as close to PEP 8 compliant as possible? 8 | - [ ] Ensured your pull request is to the `next-release` branch (or `v0.29` if applicable)? 9 | 10 | If you haven't completed the above items, please wait to create a PR until you have done so. We will try to review and reply to PRs as quickly as possible. 11 | 12 | Once your PR is approved by a maintainer, we will either merge it into next-release or do a release with you or for you. 13 | 14 | Thanks for contributing! 15 | -------------------------------------------------------------------------------- /.github/delete-merged-branch-config.yml: -------------------------------------------------------------------------------- 1 | exclude: 2 | - next-release -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: Python package 5 | 6 | on: 7 | pull_request: 8 | branches: [ "master" ] 9 | 10 | jobs: 11 | build: 12 | 13 | runs-on: ubuntu-latest 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v3 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | - name: Install dependencies 26 | run: | 27 | python -m pip install --upgrade pip 28 | python -m pip install flake8 pytest 29 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 30 | - name: Lint with flake8 31 | run: | 32 | # stop the build if there are Python syntax errors or undefined names 33 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 34 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 35 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 36 | - name: Test with pytest 37 | run: | 38 | python tests.py 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Mac 2 | .DS_Store 3 | 4 | # PyCharm stuff 5 | /extract_msg/.idea 6 | 7 | # VS Code stuff. 8 | /.vscode/ 9 | 10 | # Build files and folders 11 | /build/ 12 | *.egg-info/ 13 | /dist/ 14 | /extract_msg.egg-info/ 15 | .idea/* 16 | # Ignore pypi creds file 17 | .pypirc 18 | 19 | # Runtime artifacts 20 | *.pyc 21 | *.pyo 22 | __pycache__/ 23 | 24 | # Ignore new .msg files added from testing 25 | /example-msg-files/expected-outputs/ 26 | /example-msg-files/*.msg 27 | 28 | # Preserve some of our example files 29 | !/example-msg-files/expected-outputs/2013-11-18_0026 Test for TIF files/ 30 | !/example-msg-files/expected-outputs/2013-11-18_0026 Test for TIF files/* 31 | !/example-msg-files/expected-outputs/2016-02-23_0657 MSG Test File/ 32 | !/example-msg-files/expected-outputs/2016-02-23_0657 MSG Test File/* 33 | !/example-msg-files/strangeDate.msg 34 | !/example-msg-files/unicode.msg 35 | 36 | # Reserved Folders 37 | /output 38 | /raw 39 | 40 | # End user logging 41 | logging.json 42 | 43 | # Developer files 44 | upload.bat 45 | upload.sh 46 | venv 47 | 48 | # Sphinx documentation 49 | docs/_build/ 50 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.8" 4 | install: 5 | - python setup.py install 6 | script: 7 | - python tests.py 8 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | include MANIFEST.in 3 | include README.md 4 | include LICENSE.txt 5 | include CHANGELOG.md 6 | recursive-include extract_msg * 7 | recursive-exclude * *.py[co] 8 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PYTHON = python 2 | ARGS = 3 | 4 | # In not in a virtualenv, add --user options for install commands. 5 | INSTALL_OPTS = `$(PYTHON) -c "import sys; print('' if hasattr(sys, 'real_prefix') else '--user')"` 6 | 7 | install: ## Install this package as current user in "edit" mode. 8 | $(PYTHON) setup.py develop $(INSTALL_OPTS) 9 | 10 | test: ## Run tests. 11 | $(PYTHON) tests.py 12 | 13 | upload: ## Upload source tarball on PYPI. Requires a .pypirc file in the home dir. 14 | $(PYTHON) setup.py sdist upload 15 | 16 | clean: ## Remove all build files. 17 | rm -rf `find . -type d -name __pycache__ \ 18 | -o -type f -name \*.bak \ 19 | -o -type f -name \*.orig \ 20 | -o -type f -name \*.pyc \ 21 | -o -type f -name \*.pyd \ 22 | -o -type f -name \*.pyo \ 23 | -o -type f -name \*.rej \ 24 | -o -type f -name \*.so \ 25 | -o -type f -name \*.~ \ 26 | -o -type f -name \*\$testfn` 27 | rm -rf \ 28 | *.core \ 29 | *.egg-info \ 30 | *\$testfn* \ 31 | .coverage \ 32 | .tox \ 33 | build/ \ 34 | dist/ \ 35 | docs/_build/ \ 36 | htmlcov/ \ 37 | venv \ 38 | 2013-11-18_1026* \ 39 | tmp/ 40 | 41 | help: ## Display callable targets. 42 | @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' 43 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_autogen.txt: -------------------------------------------------------------------------------- 1 | extract_msg.rst 2 | extract_msg.attachments.rst 3 | extract_msg.attachments.custom_att_handler.rst 4 | extract_msg.constants.rst 5 | extract_msg.encoding.rst 6 | extract_msg.msg_classes.rst 7 | extract_msg.properties.rst 8 | extract_msg.structures.rst -------------------------------------------------------------------------------- /docs/_gen.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper script for generating the necessary RST files. 3 | """ 4 | 5 | import os 6 | import pathlib 7 | 8 | from typing import Dict, List, NamedTuple, Tuple 9 | 10 | 11 | DIRECTORY = pathlib.Path(__file__).parent 12 | 13 | 14 | class Package(NamedTuple): 15 | """ 16 | A class representing one of the subpackages of a module. 17 | """ 18 | modules: List[str] 19 | packages: List[str] 20 | 21 | 22 | 23 | def _readProject(root) -> Dict[str, Dict[str, bool]]: 24 | """ 25 | Searches a project for Python files, using their locations to create a 26 | dictionary of module paths and their submodules/subpackages. Submodules/subpackages will be a dictionary, where the key is the name and the 27 | """ 28 | # This whole function could almost certainly be optimized, but I'll worry 29 | # about that some other time. 30 | root = pathlib.Path(root) 31 | rootName = root.name 32 | ret = {rootName: {}} 33 | for x in root.glob('**/*.py'): 34 | # Ignore internal files. 35 | if x.name.startswith('_'): 36 | continue 37 | 38 | # Get all parent components. 39 | parents = [] 40 | parent = x.parent 41 | while parent != root: 42 | parents.append(parent.name) 43 | parent = parent.parent 44 | 45 | # Check if any of the parents start with an underscore. If they do, 46 | # ignore the current path. 47 | if any(y.startswith('_') for y in parents): 48 | continue 49 | 50 | parents.append(rootName) 51 | 52 | parents.reverse() 53 | 54 | # Add the subpackages and submodules. 55 | for index, name in enumerate(parents[1:]): 56 | path = '.'.join(parents[:index + 1]) 57 | if path not in ret: 58 | ret[path] = {} 59 | if name not in ret[path]: 60 | ret[path][name] = True 61 | if (path := '.'.join(parents)) not in ret: 62 | ret[path] = {} 63 | ret[path][x.name] = False 64 | 65 | return ret 66 | 67 | 68 | def _makePackage(name: str, data: Dict[str, bool]) -> Package: 69 | return Package([f'{name}.{x}' for x in data if not data[x]], [f'{name}.{x}' for x in data if data[x]]) 70 | 71 | 72 | def run(): 73 | for x in getAutoGenerated(): 74 | os.remove(DIRECTORY / x) 75 | project = readProject(DIRECTORY.parent / 'extract_msg') 76 | for x, y in project.items(): 77 | generateFile(x, y) 78 | 79 | writeAutoGenerated((x + '.rst' for x in project)) 80 | 81 | 82 | def generateFile(name: str, package: Package): 83 | with open(DIRECTORY / (name + '.rst'), 'w') as f: 84 | # Header. 85 | temp = name.replace('_', '\\_') + ' package' 86 | f.write(f'{temp}\n{"=" * len(temp)}\n\n') 87 | 88 | # Subpackages. 89 | if package.packages: 90 | f.write('Subpackages\n-----------\n\n') 91 | f.write('.. toctree::\n') 92 | f.write(' :maxdepth: 4\n\n') 93 | f.write(' ' + '\n '.join(package.packages)) 94 | f.write('\n\n') 95 | 96 | # Submodules. 97 | if package.modules: 98 | f.write('Submodules\n----------\n\n') 99 | for module in package.modules: 100 | if module.endswith('.py'): 101 | module = module[:-3] 102 | temp = module.replace('_', '\\_') + ' module' 103 | f.write(f'{temp}\n{"-" * len(temp)}\n\n') 104 | f.write(f'.. automodule:: {module}\n') 105 | f.write(' :members:\n') 106 | f.write(' :undoc-members:\n') 107 | f.write(' :show-inheritance:\n\n') 108 | 109 | # Module contents. 110 | f.write('Module contents\n---------------\n\n') 111 | f.write(f'.. automodule:: {name}\n') 112 | f.write(' :members:\n') 113 | f.write(' :undoc-members:\n') 114 | f.write(' :show-inheritance:\n') 115 | 116 | 117 | def getAutoGenerated() -> List[str]: 118 | """ 119 | Retrieves the list of previously autogenerated files. 120 | """ 121 | with open(DIRECTORY / '_autogen.txt', 'r') as f: 122 | return [x.strip() for x in f if x] 123 | 124 | 125 | def readProject(root) -> Dict[str, Package]: 126 | """ 127 | Returns a dictionary of package names to Package instances for a project. 128 | """ 129 | initialRead = _readProject(root) 130 | return {x: _makePackage(x, y) for x, y in initialRead.items()} 131 | 132 | 133 | def writeAutoGenerated(files : List[str]) -> None: 134 | """ 135 | Writes the _autogen.txt file. 136 | """ 137 | with open(DIRECTORY / '_autogen.txt', 'w') as f: 138 | f.write('\n'.join(files)) 139 | 140 | 141 | if __name__ == '__main__': 142 | run() -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | 9 | import os 10 | import sys 11 | 12 | sys.path.insert(0, os.path.abspath('..')) 13 | 14 | __author__ = 'Destiny Peterson & Matthew Walker' 15 | __version__ = '0.46.0' 16 | __year__ = '2023' 17 | 18 | 19 | project = 'extract-msg Documentation' 20 | copyright = f'{__year__}, {__author__}' 21 | author = __author__ 22 | release = __version__ 23 | 24 | # -- General configuration --------------------------------------------------- 25 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 26 | 27 | extensions = ['sphinx.ext.todo', 'sphinx.ext.viewcode', 'sphinx.ext.autodoc'] 28 | 29 | templates_path = ['_templates'] 30 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '_autogen.txt'] 31 | 32 | # -- Options for HTML output ------------------------------------------------- 33 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 34 | 35 | html_theme = 'sphinx_rtd_theme' 36 | html_static_path = ['_static'] 37 | -------------------------------------------------------------------------------- /docs/extract_msg.attachments.custom_att_handler.rst: -------------------------------------------------------------------------------- 1 | extract\_msg.attachments.custom\_att\_handler package 2 | ===================================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | extract\_msg.attachments.custom\_att\_handler.custom\_handler module 8 | -------------------------------------------------------------------- 9 | 10 | .. automodule:: extract_msg.attachments.custom_att_handler.custom_handler 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | extract\_msg.attachments.custom\_att\_handler.lnk\_obj\_att module 16 | ------------------------------------------------------------------ 17 | 18 | .. automodule:: extract_msg.attachments.custom_att_handler.lnk_obj_att 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | extract\_msg.attachments.custom\_att\_handler.outlook\_image\_dib module 24 | ------------------------------------------------------------------------ 25 | 26 | .. automodule:: extract_msg.attachments.custom_att_handler.outlook_image_dib 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: extract_msg.attachments.custom_att_handler 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /docs/extract_msg.attachments.rst: -------------------------------------------------------------------------------- 1 | extract\_msg.attachments package 2 | ================================ 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | extract_msg.attachments.custom_att_handler 11 | 12 | Submodules 13 | ---------- 14 | 15 | extract\_msg.attachments.attachment module 16 | ------------------------------------------ 17 | 18 | .. automodule:: extract_msg.attachments.attachment 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | extract\_msg.attachments.attachment\_base module 24 | ------------------------------------------------ 25 | 26 | .. automodule:: extract_msg.attachments.attachment_base 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | extract\_msg.attachments.broken\_att module 32 | ------------------------------------------- 33 | 34 | .. automodule:: extract_msg.attachments.broken_att 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | extract\_msg.attachments.custom\_att module 40 | ------------------------------------------- 41 | 42 | .. automodule:: extract_msg.attachments.custom_att 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | extract\_msg.attachments.emb\_msg\_att module 48 | --------------------------------------------- 49 | 50 | .. automodule:: extract_msg.attachments.emb_msg_att 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | extract\_msg.attachments.signed\_att module 56 | ------------------------------------------- 57 | 58 | .. automodule:: extract_msg.attachments.signed_att 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | extract\_msg.attachments.unsupported\_att module 64 | ------------------------------------------------ 65 | 66 | .. automodule:: extract_msg.attachments.unsupported_att 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | 71 | extract\_msg.attachments.web\_att module 72 | ---------------------------------------- 73 | 74 | .. automodule:: extract_msg.attachments.web_att 75 | :members: 76 | :undoc-members: 77 | :show-inheritance: 78 | 79 | Module contents 80 | --------------- 81 | 82 | .. automodule:: extract_msg.attachments 83 | :members: 84 | :undoc-members: 85 | :show-inheritance: 86 | -------------------------------------------------------------------------------- /docs/extract_msg.constants.rst: -------------------------------------------------------------------------------- 1 | extract\_msg.constants package 2 | ============================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | extract\_msg.constants.ps module 8 | -------------------------------- 9 | 10 | .. automodule:: extract_msg.constants.ps 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | extract\_msg.constants.re module 16 | -------------------------------- 17 | 18 | .. automodule:: extract_msg.constants.re 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | extract\_msg.constants.st module 24 | -------------------------------- 25 | 26 | .. automodule:: extract_msg.constants.st 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: extract_msg.constants 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /docs/extract_msg.encoding.rst: -------------------------------------------------------------------------------- 1 | extract\_msg.encoding package 2 | ============================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | extract\_msg.encoding.utils module 8 | ---------------------------------- 9 | 10 | .. automodule:: extract_msg.encoding.utils 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | Module contents 16 | --------------- 17 | 18 | .. automodule:: extract_msg.encoding 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /docs/extract_msg.msg_classes.rst: -------------------------------------------------------------------------------- 1 | extract\_msg.msg\_classes package 2 | ================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | extract\_msg.msg\_classes.appointment module 8 | -------------------------------------------- 9 | 10 | .. automodule:: extract_msg.msg_classes.appointment 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | extract\_msg.msg\_classes.calendar module 16 | ----------------------------------------- 17 | 18 | .. automodule:: extract_msg.msg_classes.calendar 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | extract\_msg.msg\_classes.calendar\_base module 24 | ----------------------------------------------- 25 | 26 | .. automodule:: extract_msg.msg_classes.calendar_base 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | extract\_msg.msg\_classes.contact module 32 | ---------------------------------------- 33 | 34 | .. automodule:: extract_msg.msg_classes.contact 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | extract\_msg.msg\_classes.journal module 40 | ---------------------------------------- 41 | 42 | .. automodule:: extract_msg.msg_classes.journal 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | extract\_msg.msg\_classes.meeting\_cancellation module 48 | ------------------------------------------------------ 49 | 50 | .. automodule:: extract_msg.msg_classes.meeting_cancellation 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | extract\_msg.msg\_classes.meeting\_exception module 56 | --------------------------------------------------- 57 | 58 | .. automodule:: extract_msg.msg_classes.meeting_exception 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | extract\_msg.msg\_classes.meeting\_forward module 64 | ------------------------------------------------- 65 | 66 | .. automodule:: extract_msg.msg_classes.meeting_forward 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | 71 | extract\_msg.msg\_classes.meeting\_related module 72 | ------------------------------------------------- 73 | 74 | .. automodule:: extract_msg.msg_classes.meeting_related 75 | :members: 76 | :undoc-members: 77 | :show-inheritance: 78 | 79 | extract\_msg.msg\_classes.meeting\_request module 80 | ------------------------------------------------- 81 | 82 | .. automodule:: extract_msg.msg_classes.meeting_request 83 | :members: 84 | :undoc-members: 85 | :show-inheritance: 86 | 87 | extract\_msg.msg\_classes.meeting\_response module 88 | -------------------------------------------------- 89 | 90 | .. automodule:: extract_msg.msg_classes.meeting_response 91 | :members: 92 | :undoc-members: 93 | :show-inheritance: 94 | 95 | extract\_msg.msg\_classes.message module 96 | ---------------------------------------- 97 | 98 | .. automodule:: extract_msg.msg_classes.message 99 | :members: 100 | :undoc-members: 101 | :show-inheritance: 102 | 103 | extract\_msg.msg\_classes.message\_base module 104 | ---------------------------------------------- 105 | 106 | .. automodule:: extract_msg.msg_classes.message_base 107 | :members: 108 | :undoc-members: 109 | :show-inheritance: 110 | 111 | extract\_msg.msg\_classes.message\_signed module 112 | ------------------------------------------------ 113 | 114 | .. automodule:: extract_msg.msg_classes.message_signed 115 | :members: 116 | :undoc-members: 117 | :show-inheritance: 118 | 119 | extract\_msg.msg\_classes.message\_signed\_base module 120 | ------------------------------------------------------ 121 | 122 | .. automodule:: extract_msg.msg_classes.message_signed_base 123 | :members: 124 | :undoc-members: 125 | :show-inheritance: 126 | 127 | extract\_msg.msg\_classes.msg module 128 | ------------------------------------ 129 | 130 | .. automodule:: extract_msg.msg_classes.msg 131 | :members: 132 | :undoc-members: 133 | :show-inheritance: 134 | 135 | extract\_msg.msg\_classes.post module 136 | ------------------------------------- 137 | 138 | .. automodule:: extract_msg.msg_classes.post 139 | :members: 140 | :undoc-members: 141 | :show-inheritance: 142 | 143 | extract\_msg.msg\_classes.sticky\_note module 144 | --------------------------------------------- 145 | 146 | .. automodule:: extract_msg.msg_classes.sticky_note 147 | :members: 148 | :undoc-members: 149 | :show-inheritance: 150 | 151 | extract\_msg.msg\_classes.task module 152 | ------------------------------------- 153 | 154 | .. automodule:: extract_msg.msg_classes.task 155 | :members: 156 | :undoc-members: 157 | :show-inheritance: 158 | 159 | extract\_msg.msg\_classes.task\_request module 160 | ---------------------------------------------- 161 | 162 | .. automodule:: extract_msg.msg_classes.task_request 163 | :members: 164 | :undoc-members: 165 | :show-inheritance: 166 | 167 | Module contents 168 | --------------- 169 | 170 | .. automodule:: extract_msg.msg_classes 171 | :members: 172 | :undoc-members: 173 | :show-inheritance: 174 | -------------------------------------------------------------------------------- /docs/extract_msg.properties.rst: -------------------------------------------------------------------------------- 1 | extract\_msg.properties package 2 | =============================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | extract\_msg.properties.named module 8 | ------------------------------------ 9 | 10 | .. automodule:: extract_msg.properties.named 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | extract\_msg.properties.prop module 16 | ----------------------------------- 17 | 18 | .. automodule:: extract_msg.properties.prop 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | extract\_msg.properties.properties\_store module 24 | ------------------------------------------------ 25 | 26 | .. automodule:: extract_msg.properties.properties_store 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: extract_msg.properties 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /docs/extract_msg.rst: -------------------------------------------------------------------------------- 1 | extract\_msg package 2 | ==================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | extract_msg.attachments 11 | extract_msg.constants 12 | extract_msg.encoding 13 | extract_msg.msg_classes 14 | extract_msg.properties 15 | extract_msg.structures 16 | 17 | Submodules 18 | ---------- 19 | 20 | extract\_msg.enums module 21 | ------------------------- 22 | 23 | .. automodule:: extract_msg.enums 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | extract\_msg.exceptions module 29 | ------------------------------ 30 | 31 | .. automodule:: extract_msg.exceptions 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | 36 | extract\_msg.ole\_writer module 37 | ------------------------------- 38 | 39 | .. automodule:: extract_msg.ole_writer 40 | :members: 41 | :undoc-members: 42 | :show-inheritance: 43 | 44 | extract\_msg.open\_msg module 45 | ----------------------------- 46 | 47 | .. automodule:: extract_msg.open_msg 48 | :members: 49 | :undoc-members: 50 | :show-inheritance: 51 | 52 | extract\_msg.recipient module 53 | ----------------------------- 54 | 55 | .. automodule:: extract_msg.recipient 56 | :members: 57 | :undoc-members: 58 | :show-inheritance: 59 | 60 | extract\_msg.utils module 61 | ------------------------- 62 | 63 | .. automodule:: extract_msg.utils 64 | :members: 65 | :undoc-members: 66 | :show-inheritance: 67 | 68 | Module contents 69 | --------------- 70 | 71 | .. automodule:: extract_msg 72 | :members: 73 | :undoc-members: 74 | :show-inheritance: 75 | -------------------------------------------------------------------------------- /docs/extract_msg.structures.rst: -------------------------------------------------------------------------------- 1 | extract\_msg.structures package 2 | =============================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | extract\_msg.structures.business\_card module 8 | --------------------------------------------- 9 | 10 | .. automodule:: extract_msg.structures.business_card 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | extract\_msg.structures.cfoas module 16 | ------------------------------------ 17 | 18 | .. automodule:: extract_msg.structures.cfoas 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | extract\_msg.structures.contact\_link\_entry module 24 | --------------------------------------------------- 25 | 26 | .. automodule:: extract_msg.structures.contact_link_entry 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | extract\_msg.structures.dev\_mode\_a module 32 | ------------------------------------------- 33 | 34 | .. automodule:: extract_msg.structures.dev_mode_a 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | extract\_msg.structures.dv\_target\_device module 40 | ------------------------------------------------- 41 | 42 | .. automodule:: extract_msg.structures.dv_target_device 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | extract\_msg.structures.entry\_id module 48 | ---------------------------------------- 49 | 50 | .. automodule:: extract_msg.structures.entry_id 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | extract\_msg.structures.misc\_id module 56 | --------------------------------------- 57 | 58 | .. automodule:: extract_msg.structures.misc_id 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | extract\_msg.structures.mon\_stream module 64 | ------------------------------------------ 65 | 66 | .. automodule:: extract_msg.structures.mon_stream 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | 71 | extract\_msg.structures.odt module 72 | ---------------------------------- 73 | 74 | .. automodule:: extract_msg.structures.odt 75 | :members: 76 | :undoc-members: 77 | :show-inheritance: 78 | 79 | extract\_msg.structures.ole\_pres module 80 | ---------------------------------------- 81 | 82 | .. automodule:: extract_msg.structures.ole_pres 83 | :members: 84 | :undoc-members: 85 | :show-inheritance: 86 | 87 | extract\_msg.structures.ole\_stream\_struct module 88 | -------------------------------------------------- 89 | 90 | .. automodule:: extract_msg.structures.ole_stream_struct 91 | :members: 92 | :undoc-members: 93 | :show-inheritance: 94 | 95 | extract\_msg.structures.recurrence\_pattern module 96 | -------------------------------------------------- 97 | 98 | .. automodule:: extract_msg.structures.recurrence_pattern 99 | :members: 100 | :undoc-members: 101 | :show-inheritance: 102 | 103 | extract\_msg.structures.report\_tag module 104 | ------------------------------------------ 105 | 106 | .. automodule:: extract_msg.structures.report_tag 107 | :members: 108 | :undoc-members: 109 | :show-inheritance: 110 | 111 | extract\_msg.structures.system\_time module 112 | ------------------------------------------- 113 | 114 | .. automodule:: extract_msg.structures.system_time 115 | :members: 116 | :undoc-members: 117 | :show-inheritance: 118 | 119 | extract\_msg.structures.time\_zone\_definition module 120 | ----------------------------------------------------- 121 | 122 | .. automodule:: extract_msg.structures.time_zone_definition 123 | :members: 124 | :undoc-members: 125 | :show-inheritance: 126 | 127 | extract\_msg.structures.time\_zone\_struct module 128 | ------------------------------------------------- 129 | 130 | .. automodule:: extract_msg.structures.time_zone_struct 131 | :members: 132 | :undoc-members: 133 | :show-inheritance: 134 | 135 | extract\_msg.structures.toc\_entry module 136 | ----------------------------------------- 137 | 138 | .. automodule:: extract_msg.structures.toc_entry 139 | :members: 140 | :undoc-members: 141 | :show-inheritance: 142 | 143 | extract\_msg.structures.tz\_rule module 144 | --------------------------------------- 145 | 146 | .. automodule:: extract_msg.structures.tz_rule 147 | :members: 148 | :undoc-members: 149 | :show-inheritance: 150 | 151 | Module contents 152 | --------------- 153 | 154 | .. automodule:: extract_msg.structures 155 | :members: 156 | :undoc-members: 157 | :show-inheritance: 158 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. extract-msg documentation master file, created by 2 | sphinx-quickstart on Thu Feb 2 16:41:25 2023. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to extract-msg's documentation! 7 | ======================================= 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | modules 14 | type_support 15 | 16 | Indices and tables 17 | ================== 18 | 19 | * :ref:`genindex` 20 | * :ref:`modindex` 21 | * :ref:`search` 22 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/modules.rst: -------------------------------------------------------------------------------- 1 | msg-extractor 2 | ============= 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | extract_msg 8 | -------------------------------------------------------------------------------- /docs/type-support.csv: -------------------------------------------------------------------------------- 1 | **Message Class**,**Class Name**,**Support Level**,**Support Added**,**Saving Added**,**Saving Completed** 2 | IPM,``MSGFile``,Open,0.29.0,Won't Add, 3 | REPORT.*,,,,, 4 | IPM.Note.*,``Message``,Full,0.29.0,0.29.0,0.35.0 5 | IPM.Activity.*,``Journal``,Full,0.46.0,0.46.0,0.46.0 6 | IPM.Appointment.*,``AppointmentMeeting`` (``Appointment`` before 0.35.0),Full,0.29.0,0.35.0,0.35.0 7 | IPM.Contact.*,,,,, 8 | IPM.Distlist.*,``Contact``,Full,0.29.0,0.35.0,0.35.0 9 | IPM.Note.*.SMIME,,,,, 10 | IPM.Note.*.SMIME.MultipartSigned,``MessageSigned``,Full,0.31.0,0.31.0,0.35.0 11 | IPM.Post.*,``Post``,Full,0.35.0,0.35.0,0.35.0 12 | IPM.Schedule.Meeting.Cancelation.*,``MeetingCancellation``,Full,0.35.0,0.35.0,0.35.0 13 | IPM.Schedule.Meeting.Request.*,``MeetingRequest``,Full,0.35.0,0.35.0,0.35.0 14 | IPM.Schedule.Meeting.Resp.*,``MeetingResponse``,Full,0.35.0,0.35.0,0.35.0 15 | IPM.Schedule.Meeting.Notification.Forward.*,``MeetingForwardNotification``,Full,0.35.0,0.35.0,0.35.0 16 | IPM.Task.*,``Task``,Full,0.33.0,0.35.0,0.35.0 17 | IPM.OLE.CLASS.{00061055-0000-0000-C000-000000000046},``MeetingException``,Full,0.35.0,0.35.0,0.35.0 -------------------------------------------------------------------------------- /docs/type_support.rst: -------------------------------------------------------------------------------- 1 | Type Support 2 | ============ 3 | 4 | This page lists how much support a certain object has. There are three classifications: Open, meaning that the file can be opened but not saved at all, Partial, meaning that the file uses default saving characteristics and has not been fully implemented, and Full, meaning that the class has been completely written. A class with incomplete properties may still be listed as Full if the saving capabilities are done. In addition, the version for each milestone of a class is listed. 5 | 6 | The first column is the internal class type that is used to figure out the value for the second column, the extract-msg class that is used to handle it. If the Class Name column is blank, look to the next one down for details, as it shares a class. Message class types ending with `.*` must start with the string but may have anything after it. If there is no more specialized version later in the table, anything starting with that will be handled by the specified class. 7 | 8 | For things added before 0.29.0, 0.29.0 is listed as when they were added, as that is the oldest version officially supported. 9 | 10 | Saving before version 0.35.0 may work for some things, but is not considered complete. 11 | 12 | .. csv-table:: 13 | :file: type-support.csv 14 | -------------------------------------------------------------------------------- /example-msg-files/expected-outputs/2013-11-18_0026 Test for TIF files/import OleFileIO.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMsgExtractor/msg-extractor/68858f016d010ebf591cbcd7ad5fe7c09b40ea7b/example-msg-files/expected-outputs/2013-11-18_0026 Test for TIF files/import OleFileIO.tif -------------------------------------------------------------------------------- /example-msg-files/expected-outputs/2013-11-18_0026 Test for TIF files/message.text: -------------------------------------------------------------------------------- 1 | From: Brian Zhou 2 | To: brianzhou@me.com 3 | CC: Brian Zhou 4 | Subject: Test for TIF files 5 | Date: Mon, 18 Nov 2013 00:26:24 GMT 6 | ----------------- 7 | 8 | This is a test email to experiment with the MS Outlook MSG Extractor 9 | 10 | 11 | -- 12 | 13 | 14 | Kind regards 15 | 16 | 17 | 18 | 19 | Brian Zhou 20 | 21 | -------------------------------------------------------------------------------- /example-msg-files/expected-outputs/2013-11-18_0026 Test for TIF files/raised value error.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMsgExtractor/msg-extractor/68858f016d010ebf591cbcd7ad5fe7c09b40ea7b/example-msg-files/expected-outputs/2013-11-18_0026 Test for TIF files/raised value error.tif -------------------------------------------------------------------------------- /example-msg-files/expected-outputs/2016-02-23_0657 MSG Test File/message.text: -------------------------------------------------------------------------------- 1 | From: 2 | 3 | To: time2talk@online-convert.com 4 | 5 | CC: 6 | 7 | Subject: MSG Test File 8 | 9 | Date: Tue, 23 Feb 2016 06:57:50 GMT 10 | 11 | ----------------- 12 | 13 | 14 | 15 | MSG test file 16 | 17 | Purpose: Provide example of this file type 18 | 19 | Document file type: MSG 20 | 21 | Version: 1.0 22 | 23 | Remark: 24 | 25 | 26 | 27 | Example content: 28 | 29 | The names "John Doe" for males, "Jane Doe" or "Jane Roe" for females, 30 | 31 | or "Jonnie Doe" and "Janie Doe" for children, or just "Doe" 32 | 33 | non-gender-specifically are used as placeholder names for a party whose 34 | 35 | true identity is unknown or must be withheld in a legal action, case, or 36 | 37 | discussion. The names are also used to refer to acorpse or hospital 38 | 39 | patient whose identity is unknown. This practice is widely used in the 40 | 41 | United States and Canada, but is rarely used in other English-speaking 42 | 43 | countries including the United Kingdom itself, from where the use of 44 | 45 | "John Doe" in a legal context originates. The names Joe Bloggs or John 46 | 47 | Smith are used in the UK instead, as well as in Australia and New 48 | 49 | Zealand. 50 | 51 | 52 | 53 | John Doe is sometimes used to refer to a typical male in other contexts 54 | 55 | as well, in a similar manner to John Q. Public, known in Great Britain 56 | 57 | as Joe Public, John Smith or Joe Bloggs. For example, the first name 58 | 59 | listed on a form is often John Doe, along with a fictional address or 60 | 61 | other fictional information to provide an example of how to fill in the 62 | 63 | form. The name is also used frequently in popular culture, for example 64 | 65 | in the Frank Capra film Meet John Doe. John Doe was also the name of a 66 | 67 | 2002 American television series. 68 | 69 | 70 | 71 | Similarly, a child or baby whose identity is unknown may be referred to 72 | 73 | as Baby Doe. A notorious murder case in Kansas City, Missouri, referred 74 | 75 | to the baby victim as Precious Doe. Other unidentified female murder 76 | 77 | victims are Cali Doe and Princess Doe. Additional persons may be called 78 | 79 | James Doe, Judy Doe, etc. However, to avoid possible confusion, if two 80 | 81 | anonymous or unknown parties are cited in a specific case or action, the 82 | 83 | surnames Doe and Roe may be used simultaneously; for example, "John Doe 84 | 85 | v. Jane Roe". If several anonymous parties are referenced, they may 86 | 87 | simply be labelled John Doe #1, John Doe #2, etc. (the U.S. Operation 88 | 89 | Delego cited 21 (numbered) "John Doe"s) or labelled with other variants 90 | 91 | of Doe / Roe / Poe / etc. Other early alternatives such as John Stiles 92 | 93 | and Richard Miles are now rarely used, and Mary Major has been used in 94 | 95 | some American federal cases. 96 | 97 | 98 | 99 | File created by http://www.online-convert.com 100 | 101 | 102 | 103 | More example files: http://www.online-convert.com/file-type 104 | 105 | 106 | 107 | Text of Example content: Wikipedia 108 | 109 | 110 | 111 | License: Attribution-ShareAlike 3.0 Unported 112 | 113 | 114 | 115 | 116 | 117 | Feel free to use and share the file according to the license above. 118 | 119 | -------------------------------------------------------------------------------- /example-msg-files/export-results/strangeDate.msg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMsgExtractor/msg-extractor/68858f016d010ebf591cbcd7ad5fe7c09b40ea7b/example-msg-files/export-results/strangeDate.msg -------------------------------------------------------------------------------- /example-msg-files/export-results/unicode.msg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMsgExtractor/msg-extractor/68858f016d010ebf591cbcd7ad5fe7c09b40ea7b/example-msg-files/export-results/unicode.msg -------------------------------------------------------------------------------- /example-msg-files/strangeDate.msg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMsgExtractor/msg-extractor/68858f016d010ebf591cbcd7ad5fe7c09b40ea7b/example-msg-files/strangeDate.msg -------------------------------------------------------------------------------- /example-msg-files/unicode.msg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMsgExtractor/msg-extractor/68858f016d010ebf591cbcd7ad5fe7c09b40ea7b/example-msg-files/unicode.msg -------------------------------------------------------------------------------- /extract_msg/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: latin-1 -*- 3 | # Date Format: YYYY-MM-DD 4 | 5 | """ 6 | extract_msg: 7 | Extracts emails and attachments saved in Microsoft Outlook's .msg files. 8 | 9 | https://github.com/TeamMsgExtractor/msg-extractor 10 | """ 11 | 12 | # --- LICENSE.txt -------------------------------------------------------------- 13 | # 14 | # Copyright 2013-2023 Matthew Walker and Destiny Peterson 15 | # 16 | # This program is free software: you can redistribute it and/or modify 17 | # it under the terms of the GNU General Public License as published by 18 | # the Free Software Foundation, either version 3 of the License, or 19 | # (at your option) any later version. 20 | # 21 | # This program is distributed in the hope that it will be useful, 22 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 23 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 24 | # GNU General Public License for more details. 25 | # 26 | # You should have received a copy of the GNU General Public License 27 | # along with this program. If not, see . 28 | 29 | __author__ = 'Destiny Peterson & Matthew Walker' 30 | __date__ = '2025-04-10' 31 | __version__ = '0.54.1' 32 | 33 | __all__ = [ 34 | # Modules: 35 | 'attachments', 36 | 'constants', 37 | 'enums', 38 | 'exceptions', 39 | 'msg_classes', 40 | 'null_date', 41 | 'properties', 42 | 'structures', 43 | 44 | # Classes: 45 | 'Attachment', 46 | 'AttachmentBase', 47 | 'Message', 48 | 'MSGFile', 49 | 'Named', 50 | 'NamedProperties', 51 | 'OleWriter', 52 | 'PropertiesStore', 53 | 'Recipient', 54 | 'SignedAttachment', 55 | 56 | # Functions: 57 | 'openMsg', 58 | 'openMsgBulk', 59 | ] 60 | 61 | 62 | # Ensure these are imported before anything else. 63 | from . import constants, enums, exceptions 64 | 65 | from . import attachments, msg_classes, null_date, properties, structures 66 | from .attachments import Attachment, AttachmentBase, SignedAttachment 67 | from .msg_classes import Message, MSGFile 68 | from .ole_writer import OleWriter 69 | from .open_msg import openMsg, openMsgBulk 70 | from .properties import Named, NamedProperties, PropertiesStore 71 | from .recipient import Recipient 72 | -------------------------------------------------------------------------------- /extract_msg/__main__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'main', 3 | ] 4 | 5 | 6 | import os 7 | import sys 8 | import traceback 9 | import zipfile 10 | 11 | from extract_msg import __doc__, openMsg, utils 12 | from extract_msg.enums import ErrorBehavior 13 | from typing import List 14 | 15 | 16 | def main(argv: List[str] = sys.argv) -> None: 17 | # Setup logging to stdout, indicate running from cli 18 | CLI_LOGGING = 'extract_msg_cli' 19 | args = utils.getCommandArgs(argv[1:]) 20 | 21 | # Determine where to save the files to. 22 | currentDir = os.getcwd() # Store this in case the path changes. 23 | if not args.zip: 24 | if args.outPath: 25 | if not os.path.exists(args.outPath): 26 | os.makedirs(args.outPath) 27 | out = args.outPath 28 | else: 29 | out = currentDir 30 | else: 31 | out = args.outPath if args.outPath else '' 32 | 33 | if not args.dumpStdout: 34 | utils.setupLogging(args.configPath, args.logLevel, args.log, args.fileLogging) 35 | 36 | if args.zip: 37 | createdZip = True 38 | _zip = zipfile.ZipFile(args.zip, 'a', zipfile.ZIP_DEFLATED) 39 | else: 40 | createdZip = False 41 | _zip = None 42 | 43 | # Quickly make a dictionary for the keyword arguments. 44 | kwargs = { 45 | 'allowFallback': args.allowFallback, 46 | 'attachmentsOnly': args.attachmentsOnly, 47 | 'charset': args.charset, 48 | 'contentId': args.cid, 49 | 'customFilename': args.outName, 50 | 'customPath': out, 51 | 'extractEmbedded': args.extractEmbedded, 52 | 'html': args.html, 53 | 'json': args.json, 54 | 'overwriteExisting': args.overwriteExisting, 55 | 'pdf': args.pdf, 56 | 'preparedHtml': args.preparedHtml, 57 | 'rtf': args.rtf, 58 | 'saveHeader': args.saveHeader, 59 | 'skipBodyNotFound': args.skipBodyNotFound, 60 | 'skipEmbedded': args.skipEmbedded, 61 | 'skipHidden': args.skipHidden, 62 | 'skipNotImplemented': args.skipNotImplemented, 63 | 'useMsgFilename': args.useFilename, 64 | 'wkOptions': args.wkOptions, 65 | 'wkPath': args.wkPath, 66 | 'zip': _zip, 67 | } 68 | 69 | openKwargs = { 70 | 'errorBehavior': ErrorBehavior.RTFDE if args.ignoreRtfDeErrors else ErrorBehavior.THROW, 71 | } 72 | 73 | # If we are skipping the NotImplementedError attachments, we need to 74 | # suppress the error. 75 | if args.skipNotImplemented: 76 | openKwargs['errorBehavior'] |= ErrorBehavior.ATTACH_NOT_IMPLEMENTED 77 | 78 | def strSanitize(inp): 79 | """ 80 | Small function to santize parts of a string when failing to print 81 | them. 82 | """ 83 | return ''.join((x if x.isascii() else 84 | f'\\x{ord(x):02X}' if ord(x) <= 0xFF else 85 | f'\\u{ord(x):04X}' if ord(x) <= 0xFFFF else 86 | f'\\U{ord(x):08X}') for x in repr(inp)) 87 | 88 | for x in args.msgs: 89 | if args.progress: 90 | # This may throw an error sometimes and not othertimes. 91 | # Unclear why, so let's just silence it. 92 | try: 93 | print(f'Saving file "{x}"...') 94 | except UnicodeEncodeError: 95 | print(f'Saving file "{strSanitize(x)}" (failed to print without repr)...') 96 | try: 97 | with openMsg(x, **openKwargs) as msg: 98 | if args.dumpStdout: 99 | print(msg.body) 100 | elif args.noFolders: 101 | msg.saveAttachments(**kwargs) 102 | else: 103 | msg.save(**kwargs) 104 | except Exception as e: 105 | try: 106 | print(f'Error with file "{x}": {traceback.format_exc()}') 107 | except UnicodeEncodeError: 108 | print(f'Error with file "{strSanitize(x)}": {traceback.format_exc()}') 109 | 110 | # Close the zip file if we opened it. 111 | if createdZip: 112 | _zip.close() 113 | 114 | if __name__ == '__main__': 115 | main(sys.argv) 116 | -------------------------------------------------------------------------------- /extract_msg/_rtf/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module that provides access to functions to help manage RTF data. 3 | """ 4 | 5 | __all__ = [ 6 | # Classes: 7 | 'Token', 8 | 'TokenType', 9 | 10 | # Functions: 11 | 'createDocument', 12 | 'injectStartRTF', 13 | 'injectStartRTFTokenized', 14 | 'tokenizeRTF', 15 | ] 16 | 17 | 18 | from .create_doc import createDocument 19 | from .inject_rtf import injectStartRTF, injectStartRTFTokenized 20 | from .token import Token, TokenType 21 | from .tokenize_rtf import tokenizeRTF 22 | -------------------------------------------------------------------------------- /extract_msg/_rtf/create_doc.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'createDocument', 3 | ] 4 | 5 | 6 | from typing import Iterable 7 | 8 | from .token import Token, TokenType 9 | 10 | 11 | 12 | def createDocument(tokens: Iterable[Token]) -> bytes: 13 | """ 14 | Combines the tokenized data into bytes and returns the document. 15 | """ 16 | document = b'' 17 | 18 | # Recombining follows a few very basic rules that are based solely on the 19 | # token type. Since every token has the raw bytes, this is pretty easy. In 20 | # fact, control words are the only place where we put a space, as a space 21 | # anywhere else would be literal, and omitting a space could cause issues on 22 | # some control words. 23 | for token in tokens: 24 | if token.type in (TokenType.CONTROL, TokenType.DESTINATION, TokenType.IGNORABLE_DESTINATION): 25 | document += token.raw + b' ' 26 | else: 27 | document += token.raw 28 | 29 | return document 30 | -------------------------------------------------------------------------------- /extract_msg/_rtf/token.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'Token', 3 | 'TokenType', 4 | ] 5 | 6 | 7 | import enum 8 | 9 | from typing import NamedTuple, Optional 10 | 11 | 12 | class TokenType(enum.Enum): 13 | GROUP_START = 0 14 | GROUP_END = 1 15 | CONTROL = 2 16 | SYMBOL = 3 17 | TEXT = 4 18 | DESTINATION = 5 19 | IGNORABLE_DESTINATION = 6 20 | # This one is special, used for handling the binary data. 21 | BINARY = 7 22 | 23 | 24 | 25 | class Token(NamedTuple): 26 | # The raw bytes for the token, used to recreate the document. 27 | raw: bytes 28 | # The type of the token. 29 | type: TokenType 30 | ## The following are optional as they only apply for certain types of tokens. 31 | # The name of the token, if it is a control or destination. 32 | name: Optional[bytes] = None 33 | # The parameter of the token, if it has one. If the token is a `\'hh` token, 34 | # this will be the decimal equivelent of the hex value. 35 | parameter: Optional[int] = None 36 | -------------------------------------------------------------------------------- /extract_msg/attachments/broken_att.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'BrokenAttachment', 3 | ] 4 | 5 | 6 | from .. import constants 7 | from .attachment_base import AttachmentBase 8 | from ..enums import AttachmentType, SaveType 9 | 10 | 11 | class BrokenAttachment(AttachmentBase): 12 | """ 13 | An attachment that has suffered a fatal error. 14 | 15 | Will not generate from a NotImplementedError exception. 16 | """ 17 | 18 | def getFilename(self, **_) -> str: 19 | raise NotImplementedError('Broken attachments cannot be saved.') 20 | 21 | def save(self, **kwargs) -> constants.SAVE_TYPE: 22 | """ 23 | Raises a NotImplementedError unless :param skipNotImplemented: is set to 24 | True. 25 | 26 | If it is, returns a value that indicates no data was saved. 27 | """ 28 | if kwargs.get('skipNotImplemented', False): 29 | return (SaveType.NONE, None) 30 | 31 | raise NotImplementedError('Broken attachments cannot be saved.') 32 | 33 | @property 34 | def data(self) -> None: 35 | """ 36 | Broken attachments have no data. 37 | """ 38 | return None 39 | 40 | @property 41 | def type(self) -> AttachmentType: 42 | return AttachmentType.BROKEN -------------------------------------------------------------------------------- /extract_msg/attachments/custom_att_handler/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | 4 | """ 5 | Submodule designed to help with saving and using custom attachments. 6 | 7 | Custom attachments are those follow standards not defined in the MSG 8 | documentation. Use the function `getHandler` to get an instance of a subclass of 9 | CustomAttachmentHandler. 10 | 11 | It should hopefully be completely unnecessary for your code to know what type of 12 | handler it is using, as the abstract base class should give all of the functions 13 | you would typically want. 14 | 15 | If you would like to add your own handler, simply subclass 16 | CustomAttachmentHandler and add it using the `registerHandler` function. 17 | """ 18 | 19 | __all__ = [ 20 | # Classes. 21 | 'CustomAttachmentHandler', 22 | 'LinkedObjectAttachment', 23 | 'OutlookImageDIB', 24 | 'OutlookImageMetafile', 25 | 26 | # Functions. 27 | 'getHandler', 28 | 'registerHandler', 29 | ] 30 | 31 | 32 | from typing import List, Type, TYPE_CHECKING 33 | 34 | from .custom_handler import CustomAttachmentHandler 35 | from ...exceptions import FeatureNotImplemented 36 | 37 | 38 | # Create a way to register handlers. 39 | _knownHandlers: List[Type[CustomAttachmentHandler]] = [] 40 | 41 | def registerHandler(handler: Type[CustomAttachmentHandler]) -> None: 42 | """ 43 | Registers the CustomAttachmentHandler subclass as a handler. 44 | 45 | :raises TypeError: The handler was not a subclass of 46 | CustomAttachmentHandler. 47 | """ 48 | # Make sure it is a subclass of CustomAttachmentHandler. 49 | if not isinstance(handler, type): 50 | raise ValueError(':param handler: must be a class, not an instance of a class.') 51 | if not issubclass(handler, CustomAttachmentHandler): # pyright: ignore 52 | raise ValueError(':param handler: must be a subclass of CustomAttachmentHandler.') 53 | _knownHandlers.append(handler) 54 | 55 | 56 | # Import built-in handler modules. They will all automatically register their 57 | # respecive handler(s). 58 | from .outlook_image_dib import OutlookImageDIB 59 | from .outlook_image_meta import OutlookImageMetafile 60 | from .lnk_obj_att import LinkedObjectAttachment 61 | 62 | 63 | if TYPE_CHECKING: 64 | from ..attachment_base import AttachmentBase 65 | 66 | 67 | # Function designed to route to the correct handler. 68 | def getHandler(attachment: AttachmentBase) -> CustomAttachmentHandler: 69 | """ 70 | Takes an attachment and uses it to find the correct handler. 71 | 72 | Returns an instance created using the specified attachment. 73 | 74 | :raises NotImplementedError: No handler could be found. 75 | :raises ValueError: A handler was found, but something was wrong with the 76 | attachment data. 77 | """ 78 | for handler in _knownHandlers: 79 | if handler.isCorrectHandler(attachment): 80 | return handler(attachment) 81 | 82 | raise FeatureNotImplemented(f'No valid handler could be found for the attachment. Contact the developers for help. If the CLSID is not all zeros, include it in the title or message. (CLSID: {attachment.clsid})') 83 | -------------------------------------------------------------------------------- /extract_msg/attachments/custom_att_handler/custom_handler.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | 4 | __all__ = [ 5 | 'CustomAttachmentHandler', 6 | ] 7 | 8 | 9 | import abc 10 | import functools 11 | 12 | from typing import Dict, Optional, TYPE_CHECKING, TypeVar 13 | 14 | from ...constants import MSG_PATH, OVERRIDE_CLASS 15 | from ...structures.odt import ODTStruct 16 | from ...structures.ole_pres import OLEPresentationStream 17 | from ...structures.ole_stream_struct import OleStreamStruct 18 | from ...utils import msgPathToString 19 | 20 | 21 | if TYPE_CHECKING: 22 | from ..attachment_base import AttachmentBase 23 | 24 | _T = TypeVar('_T') 25 | 26 | 27 | class CustomAttachmentHandler(abc.ABC): 28 | """ 29 | A class designed to help with custom attachments that may require parsing in 30 | special ways that are completely different from one another. 31 | """ 32 | 33 | def __init__(self, attachment: AttachmentBase): 34 | super().__init__() 35 | self.__att = attachment 36 | 37 | def getStream(self, filename: MSG_PATH) -> Optional[bytes]: 38 | """ 39 | Gets a stream from the custom data directory. 40 | """ 41 | return self.attachment.getStream('__substg1.0_3701000D/' + msgPathToString(filename)) 42 | 43 | def getStreamAs(self, streamID: MSG_PATH, overrideClass: OVERRIDE_CLASS[_T]) -> Optional[_T]: 44 | """ 45 | Returns the specified stream, modifying it to the specified class if it 46 | is found. 47 | 48 | :param overrideClass: Class/function to use to morph the data that was 49 | read. The data will be the first argument to the class's __init__ 50 | function or the function itself, if that is what is provided. If 51 | the value is None, this function is not called. If you want it to 52 | be called regardless, you should handle the data directly. 53 | """ 54 | value = self.getStream(streamID) 55 | 56 | if value is not None: 57 | value = overrideClass(value) 58 | 59 | return value 60 | 61 | @classmethod 62 | @abc.abstractmethod 63 | def isCorrectHandler(cls, attachment: AttachmentBase) -> bool: 64 | """ 65 | Checks if this is the correct handler for the attachment. 66 | """ 67 | 68 | @abc.abstractmethod 69 | def generateRtf(self) -> Optional[bytes]: 70 | """ 71 | Generates the RTF to inject in place of the \\objattph tag. 72 | 73 | If this function should do nothing, returns ``None``. 74 | """ 75 | 76 | @property 77 | def attachment(self) -> AttachmentBase: 78 | """ 79 | The attachment this handler is associated with. 80 | """ 81 | return self.__att 82 | 83 | @property 84 | @abc.abstractmethod 85 | def data(self) -> Optional[bytes]: 86 | """ 87 | Gets the data for the attachment. 88 | 89 | If an attachment should do nothing when saving, returns ``None``. 90 | """ 91 | 92 | @property 93 | @abc.abstractmethod 94 | def name(self) -> Optional[str]: 95 | """ 96 | Returns the name to be used when saving the attachment. 97 | """ 98 | 99 | @property 100 | @abc.abstractmethod 101 | def obj(self) -> Optional[object]: 102 | """ 103 | Returns an object representing the data. 104 | 105 | May return the same value as :property data:. 106 | 107 | If there is no object to represent the custom attachment, including 108 | ``bytes``, returns ``None``. 109 | """ 110 | 111 | @functools.cached_property 112 | def objInfo(self) -> Optional[ODTStruct]: 113 | """ 114 | The structure representing the stream "\\x03ObjInfo", if it exists. 115 | """ 116 | return self.getStreamAs('\x03ObjInfo', ODTStruct) 117 | 118 | @functools.cached_property 119 | def ole(self) -> Optional[OleStreamStruct]: 120 | """ 121 | The structure representing the stream "\\x01Ole", if it exists. 122 | """ 123 | return self.getStreamAs('\x01Ole', OleStreamStruct) 124 | 125 | @functools.cached_property 126 | def presentationObjs(self) -> Optional[Dict[int, OLEPresentationStream]]: 127 | """ 128 | Returns a dict of all presentation streams, as bytes. 129 | """ 130 | return { 131 | int(x[1][-3:]): self.getStreamAs(x[-1], OLEPresentationStream) 132 | for x in self.attachment.listDir() 133 | if x[0] == '__substg1.0_3701000D' and x[1].startswith('\x02OlePres') 134 | } -------------------------------------------------------------------------------- /extract_msg/attachments/custom_att_handler/lnk_obj_att.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | 4 | __all__ = [ 5 | 'LinkedObjectAttachment', 6 | ] 7 | 8 | 9 | from functools import cached_property 10 | from typing import List, Optional, TYPE_CHECKING 11 | 12 | from . import registerHandler 13 | from .custom_handler import CustomAttachmentHandler 14 | from ...structures.entry_id import EntryID 15 | from ...structures.ole_pres import OLEPresentationStream 16 | 17 | 18 | if TYPE_CHECKING: 19 | from ..attachment_base import AttachmentBase 20 | 21 | 22 | class LinkedObjectAttachment(CustomAttachmentHandler): 23 | """ 24 | A link to an Outlook object. 25 | 26 | Not *positive* I understand what this attachment type is, but this seems to 27 | be the most likely name. Contains presentation data about how to render it 28 | as well as properties with data that link to it. It looks *similar* to what 29 | the documentation for Journal specifies would be it's custom attachment 30 | type, however some small details don't perfectly add up. 31 | 32 | I've also only seen this on Journal objects thus far. 33 | """ 34 | 35 | def __init__(self, attachment: AttachmentBase): 36 | super().__init__(attachment) 37 | stream = attachment.getStream('__substg1.0_3701000D/\x03MailStream') 38 | if not stream: 39 | raise ValueError('MailStream could not be found.') 40 | if len(stream) != 12: 41 | raise ValueError('MailStream is the wrong length.') 42 | 43 | @classmethod 44 | def isCorrectHandler(cls, attachment: AttachmentBase) -> bool: 45 | if attachment.clsid != '00020D09-0000-0000-C000-000000000046': 46 | return False 47 | 48 | return True 49 | 50 | def generateRtf(self) -> Optional[bytes]: 51 | # TODO 52 | return None 53 | 54 | @property 55 | def data(self) -> None: 56 | # This type of attachment has no direct associated data. 57 | return None 58 | 59 | @cached_property 60 | def mailMsgAttFld(self) -> Optional[EntryID]: 61 | """ 62 | The EntryID of the folder of the linked Message object. 63 | """ 64 | return EntryID.autoCreate(self.getStream('MailMsgAttFld')) 65 | 66 | @cached_property 67 | def mailMsgAttMdb(self) -> Optional[EntryID]: 68 | """ 69 | The EntryID of the store of the linked Message object. 70 | """ 71 | return EntryID.autoCreate(self.getStream('MailMsgAttMdb')) 72 | 73 | @cached_property 74 | def mailMsgAttMsg(self) -> Optional[EntryID]: 75 | """ 76 | The EntryID linked Message object, required only if the 77 | mailMsgAttSrchKey property is None. 78 | """ 79 | return EntryID.autoCreate(self.getStream('MailMsgAttMsg')) 80 | 81 | @cached_property 82 | def mailMsgAttSrchFld(self) -> Optional[EntryID]: 83 | """ 84 | The object EntryID of the Sent Items special folder of the linked 85 | Message object. 86 | """ 87 | return EntryID.autoCreate(self.getStream('MailMsgAttSrchFld')) 88 | 89 | @cached_property 90 | def mailMsgAttSrchKey(self) -> Optional[bytes]: 91 | """ 92 | The search key for the linked message object, required only if 93 | mailMsgAttMsg is None. 94 | """ 95 | return self.getStream('MailMsgAttSrchKey') 96 | 97 | @property 98 | def name(self) -> None: 99 | # Doesn't save. 100 | return None 101 | 102 | @property 103 | def obj(self) -> None: 104 | # No object to represent this. 105 | return None 106 | 107 | 108 | 109 | registerHandler(LinkedObjectAttachment) -------------------------------------------------------------------------------- /extract_msg/attachments/emb_msg_att.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | 4 | __all__ = [ 5 | 'EmbeddedMsgAttachment', 6 | ] 7 | 8 | 9 | import os 10 | import pathlib 11 | import zipfile 12 | 13 | from typing import TYPE_CHECKING 14 | 15 | from .. import constants 16 | from .attachment_base import AttachmentBase 17 | from ..enums import AttachmentType, SaveType 18 | from ..open_msg import openMsg 19 | from ..utils import createZipOpen, prepareFilename 20 | 21 | 22 | if TYPE_CHECKING: 23 | from ..msg_classes import MSGFile 24 | from ..properties import PropertiesStore 25 | 26 | 27 | _saveDoc = AttachmentBase.save.__doc__ 28 | 29 | 30 | class EmbeddedMsgAttachment(AttachmentBase): 31 | """ 32 | The attachment entry for an Embedded MSG file. 33 | """ 34 | 35 | def __init__(self, msg: MSGFile, dir_: str, propStore: PropertiesStore): 36 | super().__init__(msg, dir_, propStore) 37 | self.__prefix = msg.prefixList + [dir_, '__substg1.0_3701000D'] 38 | self.__data = openMsg(self.msg.path, prefix = self.__prefix, parentMsg = self.msg, treePath = self.treePath, **self.msg.kwargs) 39 | 40 | def getFilename(self, **kwargs) -> str: 41 | """ 42 | Returns the filename to use for the attachment. 43 | 44 | :param contentId: Use the contentId, if available. 45 | :param customFilename: A custom name to use for the file. 46 | 47 | If the filename starts with "UnknownFilename" then there is no guarantee 48 | that the files will have exactly the same filename. 49 | """ 50 | customFilename = kwargs.get('customFilename') 51 | if customFilename: 52 | customFilename = str(customFilename) 53 | # First we need to validate it. If there are invalid characters, 54 | # this will detect it. 55 | if constants.re.INVALID_FILENAME_CHARS.search(customFilename): 56 | raise ValueError('Invalid character found in customFilename. Must not contain any of the following characters: \\/:*?"<>|') 57 | return customFilename 58 | else: 59 | return self.name 60 | 61 | def save(self, **kwargs) -> constants.SAVE_TYPE: 62 | # First check if we are skipping embedded messages and stop 63 | # *immediately* if we are. 64 | if kwargs.get('skipEmbedded'): 65 | return (SaveType.NONE, None) 66 | 67 | # We only need to handle things if we are saving as bytes. 68 | if kwargs.get('extractEmbedded', False): 69 | # Get the filename to use. 70 | filename = self.getFilename(**kwargs) 71 | 72 | # Someone managed to have a null character here, so let's get rid of 73 | # that 74 | filename = prepareFilename(filename) 75 | 76 | # Get the maximum name length. 77 | maxNameLength = kwargs.get('maxNameLength', 256) 78 | 79 | # Make sure the filename is not longer than it should be. 80 | if len(filename) > maxNameLength: 81 | name, ext = os.path.splitext(filename) 82 | filename = name[:maxNameLength - len(ext)] + ext 83 | 84 | # Check if we are doing a zip file. 85 | _zip = kwargs.get('zip') 86 | 87 | createdZip = False 88 | try: 89 | # ZipFile handling. 90 | if _zip: 91 | # If we are doing a zip file, first check that we have been given a path. 92 | if isinstance(_zip, (str, pathlib.Path)): 93 | # If we have a path then we use the zip file. 94 | _zip = zipfile.ZipFile(_zip, 'a', zipfile.ZIP_DEFLATED) 95 | kwargs['zip'] = _zip 96 | createdZip = True 97 | # Path needs to be done in a special way if we are in a zip file. 98 | customPath = pathlib.Path(kwargs.get('customPath', '')) 99 | # Set the open command to be that of the zip file. 100 | _open = createZipOpen(_zip.open) 101 | # Zip files use w for writing in binary. 102 | mode = 'w' 103 | else: 104 | customPath = pathlib.Path(kwargs.get('customPath', '.')).absolute() 105 | mode = 'wb' 106 | _open = open 107 | 108 | fullFilename = self._handleFnc(_zip, filename, customPath, kwargs) 109 | 110 | with _open(str(fullFilename), mode) as f: 111 | self.data.export(f) 112 | 113 | return (SaveType.FILE, str(fullFilename)) 114 | finally: 115 | # Close the ZipFile if this function created it. 116 | if _zip and createdZip: 117 | _zip.close() 118 | else: 119 | # If we are letting the MSG file create stuff, just let it handle 120 | # everything. 121 | return self.data.save(**kwargs) 122 | 123 | save.__doc__ = _saveDoc 124 | 125 | @property 126 | def data(self) -> MSGFile: 127 | """ 128 | Returns the attachment data. 129 | """ 130 | return self.__data 131 | 132 | @property 133 | def type(self) -> AttachmentType: 134 | return AttachmentType.MSG -------------------------------------------------------------------------------- /extract_msg/attachments/unsupported_att.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'UnsupportedAttachment', 3 | ] 4 | 5 | 6 | from .. import constants 7 | from .attachment_base import AttachmentBase 8 | from ..enums import AttachmentType, SaveType 9 | 10 | 11 | class UnsupportedAttachment(AttachmentBase): 12 | """ 13 | An attachment whose type is not currently supported. 14 | """ 15 | 16 | def getFilename(self, **_) -> str: 17 | raise NotImplementedError('Unsupported attachments cannot be saved.') 18 | 19 | def save(self, **kwargs) -> constants.SAVE_TYPE: 20 | """ 21 | Raises a NotImplementedError unless :param skipNotImplemented: is set to 22 | ``True``. 23 | 24 | If it is, returns a value that indicates no data was saved. 25 | """ 26 | if kwargs.get('skipNotImplemented', False): 27 | return (SaveType.NONE, None) 28 | 29 | raise NotImplementedError('Unsupported attachments cannot be saved.') 30 | 31 | @property 32 | def data(self) -> None: 33 | """ 34 | Broken attachments have no data. 35 | """ 36 | return None 37 | 38 | @property 39 | def type(self) -> AttachmentType: 40 | return AttachmentType.UNSUPPORTED -------------------------------------------------------------------------------- /extract_msg/attachments/web_att.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'WebAttachment', 3 | ] 4 | 5 | 6 | import functools 7 | 8 | from typing import Optional 9 | 10 | from .. import constants 11 | from .attachment_base import AttachmentBase 12 | from ..enums import AttachmentPermissionType, AttachmentType, SaveType 13 | 14 | 15 | class WebAttachment(AttachmentBase): 16 | """ 17 | An attachment that exists on the internet and not attached to the MSG file 18 | directly. 19 | """ 20 | 21 | def getFilename(self) -> str: 22 | raise NotImplementedError('Cannot get the filename of a web attachment.') 23 | 24 | def save(self, **kwargs) -> constants.SAVE_TYPE: 25 | """ 26 | Raises a NotImplementedError unless :param skipNotImplemented: is set to 27 | True. 28 | 29 | If it is, returns a value that indicates no data was saved. 30 | """ 31 | if kwargs.get('skipNotImplemented', False): 32 | return (SaveType.NONE, None) 33 | 34 | raise NotImplementedError('Web attachments cannot be saved.') 35 | 36 | @property 37 | def data(self) -> None: 38 | """ 39 | The bytes making up the attachment data. 40 | """ 41 | raise NotImplementedError('Cannot get the data of a web attachment.') 42 | 43 | @functools.cached_property 44 | def originalPermissionType(self) -> Optional[AttachmentPermissionType]: 45 | """ 46 | The permission type data associated with a web reference attachment. 47 | """ 48 | return self.getNamedAs('AttachmentOriginalPermissionType', constants.ps.PSETID_ATTACHMENT, AttachmentPermissionType) 49 | 50 | @functools.cached_property 51 | def permissionType(self) -> Optional[AttachmentPermissionType]: 52 | """ 53 | The permission type data associated with a web reference attachment. 54 | """ 55 | return self.getNamedAs('AttachmentPermissionType', constants.ps.PSETID_ATTACHMENT, AttachmentPermissionType) 56 | 57 | @functools.cached_property 58 | def providerName(self) -> Optional[str]: 59 | """ 60 | The type of web service manipulating the attachment. 61 | """ 62 | return self.getNamedProp('AttachmentProviderType', constants.ps.PSETID_ATTACHMENT) 63 | 64 | @property 65 | def type(self) -> AttachmentType: 66 | return AttachmentType.WEB 67 | 68 | @property 69 | def url(self) -> Optional[str]: 70 | """ 71 | The url for the web attachment. If this is not set, that is probably an 72 | error. 73 | """ 74 | return self.longPathname -------------------------------------------------------------------------------- /extract_msg/constants/ps.py: -------------------------------------------------------------------------------- 1 | """ 2 | Property set identifier constants. 3 | """ 4 | 5 | __all__ = [ 6 | 'PSETID_ADDRESS', 7 | 'PSETID_AIRSYNC', 8 | 'PSETID_APPOINTMENT', 9 | 'PSETID_ATTACHMENT', 10 | 'PSETID_CALENDAR_ASSISTANT', 11 | 'PSETID_COMMON', 12 | 'PSETID_LOG', 13 | 'PSETID_MEETING', 14 | 'PSETID_MESSAGING', 15 | 'PSETID_NOTE', 16 | 'PSETID_POSTRSS', 17 | 'PSETID_SHARING', 18 | 'PSETID_TASK', 19 | 'PSETID_UNIFIEDMESSAGING', 20 | 'PSETID_XMLEXTRACTEDENTITIES', 21 | 'PS_INTERNET_HEADERS', 22 | 'PS_MAPI', 23 | 'PS_PUBLIC_STRINGS', 24 | ] 25 | 26 | from typing import Final 27 | 28 | 29 | PS_MAPI: Final[str] = '{00020328-0000-0000-C000-000000000046}' 30 | PS_PUBLIC_STRINGS: Final[str] = '{00020329-0000-0000-C000-000000000046}' 31 | PSETID_COMMON: Final[str] = '{00062008-0000-0000-C000-000000000046}' 32 | PSETID_ADDRESS: Final[str] = '{00062004-0000-0000-C000-000000000046}' 33 | PS_INTERNET_HEADERS: Final[str] = '{00020386-0000-0000-C000-000000000046}' 34 | PSETID_APPOINTMENT: Final[str] = '{00062002-0000-0000-C000-000000000046}' 35 | PSETID_MEETING: Final[str] = '{6ED8DA90-450B-101B-98DA-00AA003F1305}' 36 | PSETID_LOG: Final[str] = '{0006200A-0000-0000-C000-000000000046}' 37 | PSETID_MESSAGING: Final[str] = '{41F28F13-83F4-4114-A584-EEDB5A6B0BFF}' 38 | PSETID_NOTE: Final[str] = '{0006200E-0000-0000-C000-000000000046}' 39 | PSETID_POSTRSS: Final[str] = '{00062041-0000-0000-C000-000000000046}' 40 | PSETID_TASK: Final[str] = '{00062003-0000-0000-C000-000000000046}' 41 | PSETID_UNIFIEDMESSAGING: Final[str] = '{4442858E-A9E3-4E80-B900-317A210CC15B}' 42 | PSETID_AIRSYNC: Final[str] = '{71035549-0739-4DCB-9163-00F0580DBBDF}' 43 | PSETID_SHARING: Final[str] = '{00062040-0000-0000-C000-000000000046}' 44 | PSETID_XMLEXTRACTEDENTITIES: Final[str] = '{23239608-685D-4732-9C55-4C95CB4E8E33}' 45 | PSETID_ATTACHMENT: Final[str] = '{96357F7F-59E1-47D0-99A7-46515C183B54}' 46 | PSETID_CALENDAR_ASSISTANT: Final[str] = '{11000E07-B51B-40D6-AF21-CAA85EDAB1D0}' -------------------------------------------------------------------------------- /extract_msg/constants/re.py: -------------------------------------------------------------------------------- 1 | """ 2 | Regular expression constants. 3 | """ 4 | 5 | 6 | __all__ = [ 7 | 'HTML_BODY_START', 8 | 'HTML_SAN_SPACE', 9 | 'INVALID_FILENAME_CHARS', 10 | 'INVALID_OLE_PATH', 11 | 'RTF_BODY_STRIP_INIT', 12 | 'RTF_BODY_STRIP_PRE_CLOSE', 13 | 'RTF_BODY_STRIP_PRE_OPEN', 14 | 'RTF_ENC_BODY_START', 15 | ] 16 | 17 | 18 | import re 19 | 20 | from typing import Final 21 | 22 | 23 | # Allow better typing in versions above 3.8. 24 | import sys 25 | if sys.version_info >= (3, 9): 26 | _RE_STR_TYPE = re.Pattern[str] 27 | _RE_BYTES_TYPE = re.Pattern[bytes] 28 | else: 29 | _RE_STR_TYPE = re.Pattern 30 | _RE_BYTES_TYPE = re.Pattern 31 | 32 | 33 | # Characters that are invalid in a filename. 34 | INVALID_FILENAME_CHARS: Final[_RE_STR_TYPE] = re.compile(r'[\\/:*?"<>|]') 35 | # Regular expression to find sections of spaces for htmlSanitize. 36 | HTML_SAN_SPACE: Final[_RE_STR_TYPE] = re.compile(' +') 37 | # Regular expression to find the start of the html body. 38 | HTML_BODY_START: Final[_RE_BYTES_TYPE] = re.compile(b']*>') 39 | # Regular expression to find the start of the html body in encapsulated RTF. 40 | # This is used for one of the pattern types that makes life easy. 41 | RTF_ENC_BODY_START: Final[_RE_BYTES_TYPE] = re.compile(br'\{\\\*\\htmltag[0-9]* ?]*>\}') 42 | # Used in the vaildation of OLE paths. Any of these characters in a name make it 43 | # invalid. 44 | INVALID_OLE_PATH: Final[_RE_STR_TYPE] = re.compile(r'[:/\\!]') 45 | 46 | # Used as the initial step in stripping RTF files for deencapsulation. Finds 47 | # ignored sections that do not contrain groups *and* finds HTML tag sections 48 | # that are entirely empty. It also then finds sections of data that can be 49 | # merged together without affecting the results 50 | RTF_BODY_STRIP_INIT: Final[_RE_BYTES_TYPE] = re.compile(rb'(\\htmlrtf[^0{}][^{}]*?\\htmlrtf0 ?)|(\{\\\*\\htmltag[0-9]+\})|(\\htmlrtf0 ?\\htmlrtf1? ?)|(\\htmlrtf1? ?\{\}\\htmlrtf0 ?)|(\\htmlrtf1? ?\\\'[a-fA-F0-9]{2}\\htmlrtf0 ?)') 51 | 52 | # Preprocessing steps to simplify the RTF. 53 | RTF_BODY_STRIP_PRE_CLOSE: Final[_RE_BYTES_TYPE] = re.compile(rb'(\\htmlrtf1? ?}\\htmlrtf0 ?)|(\\htmlrtf1? ?[^0{}][^{}]*?} ?\\htmlrtf0 ?)') 54 | RTF_BODY_STRIP_PRE_OPEN: Final[_RE_BYTES_TYPE] = re.compile(rb'\\htmlrtf1? ?{[^{}]*?\\htmlrtf0 ?') 55 | -------------------------------------------------------------------------------- /extract_msg/constants/st.py: -------------------------------------------------------------------------------- 1 | """ 2 | Struct constants. 3 | """ 4 | 5 | __all__ = [ 6 | 'ST_BC_FIELD_INFO', 7 | 'ST_BC_HEAD', 8 | 'ST_BE_F32', 9 | 'ST_BE_F64', 10 | 'ST_BE_I16', 11 | 'ST_BE_I32', 12 | 'ST_BE_I64', 13 | 'ST_BE_I8', 14 | 'ST_BE_UI16', 15 | 'ST_BE_UI32', 16 | 'ST_BE_UI64', 17 | 'ST_BE_UI8', 18 | 'ST_CF_DIR_ENTRY', 19 | 'ST_GUID', 20 | 'ST_LE_F32', 21 | 'ST_LE_F64', 22 | 'ST_LE_I16', 23 | 'ST_LE_I32', 24 | 'ST_LE_I64', 25 | 'ST_LE_I8', 26 | 'ST_LE_UI16', 27 | 'ST_LE_UI32', 28 | 'ST_LE_UI64', 29 | 'ST_LE_UI8', 30 | 'ST_NP_ENT', 31 | 'ST_PEID', 32 | 'ST_PROP_BASE', 33 | 'ST_PROP_VAR', 34 | 'ST_PROPSTORE_HEADER', 35 | 'ST_RGB', 36 | 'ST_SBO_I8', 37 | 'ST_SBO_I16', 38 | 'ST_SBO_I32', 39 | 'ST_SBO_I64', 40 | 'ST_SBO_UI8', 41 | 'ST_SBO_UI16', 42 | 'ST_SBO_UI32', 43 | 'ST_SBO_UI64', 44 | 'ST_SYSTEMTIME', 45 | 'ST_TZ', 46 | ] 47 | 48 | 49 | import struct 50 | 51 | from typing import Final 52 | 53 | 54 | # Define pre-compiled structs to make unpacking slightly faster. 55 | # General structs. 56 | ST_PROPSTORE_HEADER: Final[struct.Struct] = struct.Struct('<8x4I') 57 | ST_PROP_BASE: Final[struct.Struct] = struct.Struct('<2HI') 58 | # Struct used for unpacking a system time. 59 | ST_SYSTEMTIME: Final[struct.Struct] = struct.Struct('<8H') 60 | # Struct used for unpacking a GUID from bytes. 61 | ST_GUID: Final[struct.Struct] = struct.Struct('b') 89 | ST_BE_I16: Final[struct.Struct] = struct.Struct('>h') 90 | ST_BE_I32: Final[struct.Struct] = struct.Struct('>i') 91 | ST_BE_I64: Final[struct.Struct] = struct.Struct('>q') 92 | ST_BE_UI8: Final[struct.Struct] = struct.Struct('>B') 93 | ST_BE_UI16: Final[struct.Struct] = struct.Struct('>H') 94 | ST_BE_UI32: Final[struct.Struct] = struct.Struct('>I') 95 | ST_BE_UI64: Final[struct.Struct] = struct.Struct('>Q') 96 | ST_BE_F32: Final[struct.Struct] = struct.Struct('>f') 97 | ST_BE_F64: Final[struct.Struct] = struct.Struct('>d') 98 | # Structs that use the system byte order, where consistency on a single system 99 | # is all that matters. Mainly used for quick casts between signed and unsigned. 100 | ST_SBO_I8: Final[struct.Struct] = struct.Struct('@b') 101 | ST_SBO_I16: Final[struct.Struct] = struct.Struct('@h') 102 | ST_SBO_I32: Final[struct.Struct] = struct.Struct('@i') 103 | ST_SBO_I64: Final[struct.Struct] = struct.Struct('@q') 104 | ST_SBO_UI8: Final[struct.Struct] = struct.Struct('@B') 105 | ST_SBO_UI16: Final[struct.Struct] = struct.Struct('@H') 106 | ST_SBO_UI32: Final[struct.Struct] = struct.Struct('@I') 107 | ST_SBO_UI64: Final[struct.Struct] = struct.Struct('@Q') 108 | # Struct for an RGB value that, in little endian, would be an int written in hex 109 | # as 0x00BBGGRR. 110 | ST_RGB: Final[struct.Struct] = struct.Struct('',63:'?',64:'@',65:'A',66:'B',67:'C',68:'D',69:'E',70:'F',71:'G',72:'H',73:'I',74:'J',75:'K',76:'L',77:'M',78:'N',79:'O',80:'P',81:'Q',82:'R',83:'S',84:'T',85:'U',86:'V',87:'W',88:'X',89:'Y',90:'Z',91:'[',92:'\\',93:']',94:'^',95:'_',96:'`',97:'a',98:'b',99:'c',100:'d',101:'e',102:'f',103:'g',104:'h',105:'i',106:'j',107:'k',108:'l',109:'m',110:'n',111:'o',112:'p',113:'q',114:'r',115:'s',116:'t',117:'u',118:'v',119:'w',120:'x',121:'y',122:'z',123:'{',124:'|',125:'}',126:'~',127:'\x7f',128:'\xC4',129:'\u0100',130:'\u0101',131:'\xC9',132:'\u0104',133:'\xD6',134:'\xDC',135:'\xE1',136:'\u0105',137:'\u010C',138:'\xE4',139:'\u010D',140:'\u0106',141:'\u0107',142:'\xE9',143:'\u0179',144:'\u017A',145:'\u010E',146:'\xED',147:'\u010F',148:'\u0112',149:'\u0113',150:'\u0116',151:'\xF3',152:'\u0117',153:'\xF4',154:'\xF6',155:'\xF5',156:'\xFA',157:'\u011A',158:'\u011B',159:'\xFC',160:'\u2020',161:'\xB0',162:'\u0118',163:'\xA3',164:'\xA7',165:'\u2022',166:'\xB6',167:'\xDF',168:'\xAE',169:'\xA9',170:'\u2122',171:'\u0119',172:'\xA8',173:'\u2260',174:'\u0123',175:'\u012E',176:'\u012F',177:'\u012A',178:'\u2264',179:'\u2265',180:'\u012B',181:'\u0136',182:'\u2202',183:'\u2211',184:'\u0142',185:'\u013B',186:'\u013C',187:'\u013D',188:'\u013E',189:'\u0139',190:'\u013A',191:'\u0145',192:'\u0146',193:'\u0143',194:'\xAC',195:'\u221A',196:'\u0144',197:'\u0147',198:'\u2206',199:'\xAB',200:'\xBB',201:'\u2026',202:'\xA0',203:'\u0148',204:'\u0150',205:'\xD5',206:'\u0151',207:'\u014C',208:'\u2013',209:'\u2014',210:'\u201C',211:'\u201D',212:'\u2018',213:'\u2019',214:'\xF7',215:'\u25CA',216:'\u014D',217:'\u0154',218:'\u0155',219:'\u0158',220:'\u2039',221:'\u203A',222:'\u0159',223:'\u0156',224:'\u0157',225:'\u0160',226:'\u201A',227:'\u201E',228:'\u0161',229:'\u015A',230:'\u015B',231:'\xC1',232:'\u0164',233:'\u0165',234:'\xCD',235:'\u017D',236:'\u017E',237:'\u016A',238:'\xD3',239:'\xD4',240:'\u016B',241:'\u016E',242:'\xDA',243:'\u016F',244:'\u0170',245:'\u0171',246:'\u0172',247:'\u0173',248:'\xDD',249:'\xFD',250:'\u0137',251:'\u017B',252:'\u0141',253:'\u017C',254:'\u0122',255:'\u02C7'} -------------------------------------------------------------------------------- /extract_msg/encoding/_dt/_mac_cyrillic.py: -------------------------------------------------------------------------------- 1 | # Based on https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/CYRILLIC.TXT 2 | 3 | __all__ = [ 4 | 'decodingTable', 5 | ] 6 | 7 | 8 | decodingTable={0:'\x00',1:'\x01',2:'\x02',3:'\x03',4:'\x04',5:'\x05',6:'\x06',7:'\x07',8:'\x08',9:'\t',10:'\n',11:'\x0b',12:'\x0c',13:'\r',14:'\x0e',15:'\x0f',16:'\x10',17:'\x11',18:'\x12',19:'\x13',20:'\x14',21:'\x15',22:'\x16',23:'\x17',24:'\x18',25:'\x19',26:'\x1a',27:'\x1b',28:'\x1c',29:'\x1d',30:'\x1e',31:'\x1f',32:' ',33:'!',34:'"',35:'#',36:'$',37:'%',38:'&',39:"'",40:'(',41:')',42:'*',43:'+',44:',',45:'-',46:'.',47:'/',48:'0',49:'1',50:'2',51:'3',52:'4',53:'5',54:'6',55:'7',56:'8',57:'9',58:':',59:';',60:'<',61:'=',62:'>',63:'?',64:'@',65:'A',66:'B',67:'C',68:'D',69:'E',70:'F',71:'G',72:'H',73:'I',74:'J',75:'K',76:'L',77:'M',78:'N',79:'O',80:'P',81:'Q',82:'R',83:'S',84:'T',85:'U',86:'V',87:'W',88:'X',89:'Y',90:'Z',91:'[',92:'\\',93:']',94:'^',95:'_',96:'`',97:'a',98:'b',99:'c',100:'d',101:'e',102:'f',103:'g',104:'h',105:'i',106:'j',107:'k',108:'l',109:'m',110:'n',111:'o',112:'p',113:'q',114:'r',115:'s',116:'t',117:'u',118:'v',119:'w',120:'x',121:'y',122:'z',123:'{',124:'|',125:'}',126:'~',127:'\x7f',128:'\u0410',129:'\u0411',130:'\u0412',131:'\u0413',132:'\u0414',133:'\u0415',134:'\u0416',135:'\u0417',136:'\u0418',137:'\u0419',138:'\u041A',139:'\u041B',140:'\u041C',141:'\u041D',142:'\u041E',143:'\u041F',144:'\u0420',145:'\u0421',146:'\u0422',147:'\u0423',148:'\u0424',149:'\u0425',150:'\u0426',151:'\u0427',152:'\u0428',153:'\u0429',154:'\u042A',155:'\u042B',156:'\u042C',157:'\u042D',158:'\u042E',159:'\u042F',160:'\u2020',161:'\xB0',162:'\xA2',163:'\xA3',164:'\xA7',165:'\u2022',166:'\xB6',167:'\u0406',168:'\xAE',169:'\xA9',170:'\u2122',171:'\u0402',172:'\u0452',173:'\u2260',174:'\u0403',175:'\u0453',176:'\u221E',177:'\xB1',178:'\u2264',179:'\u2265',180:'\u0456',181:'\xB5',182:'\u2202',183:'\u0408',184:'\u0404',185:'\u0454',186:'\u0407',187:'\u0457',188:'\u0409',189:'\u0459',190:'\u040A',191:'\u045A',192:'\u0458',193:'\u0405',194:'\xAC',195:'\u221A',196:'\u0192',197:'\u2248',198:'\u2206',199:'\xAB',200:'\xBB',201:'\u2026',202:'\xA0',203:'\u040B',204:'\u045B',205:'\u040C',206:'\u045C',207:'\u0455',208:'\u2013',209:'\u2014',210:'\u201C',211:'\u201D',212:'\u2018',213:'\u2019',214:'\xF7',215:'\u201E',216:'\u040E',217:'\u045E',218:'\u040F',219:'\u045F',220:'\u2116',221:'\u0401',222:'\u0451',223:'\u044F',224:'\u0430',225:'\u0431',226:'\u0432',227:'\u0433',228:'\u0434',229:'\u0435',230:'\u0436',231:'\u0437',232:'\u0438',233:'\u0439',234:'\u043A',235:'\u043B',236:'\u043C',237:'\u043D',238:'\u043E',239:'\u043F',240:'\u0440',241:'\u0441',242:'\u0442',243:'\u0443',244:'\u0444',245:'\u0445',246:'\u0446',247:'\u0447',248:'\u0448',249:'\u0449',250:'\u044A',251:'\u044B',252:'\u044C',253:'\u044D',254:'\u044E',255:'\xA4'} -------------------------------------------------------------------------------- /extract_msg/encoding/_dt/_mac_greek.py: -------------------------------------------------------------------------------- 1 | # Based on https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/GREEK.TXT 2 | 3 | __all__ = [ 4 | 'decodingTable', 5 | ] 6 | 7 | 8 | decodingTable={0:'\x00',1:'\x01',2:'\x02',3:'\x03',4:'\x04',5:'\x05',6:'\x06',7:'\x07',8:'\x08',9:'\t',10:'\n',11:'\x0b',12:'\x0c',13:'\r',14:'\x0e',15:'\x0f',16:'\x10',17:'\x11',18:'\x12',19:'\x13',20:'\x14',21:'\x15',22:'\x16',23:'\x17',24:'\x18',25:'\x19',26:'\x1a',27:'\x1b',28:'\x1c',29:'\x1d',30:'\x1e',31:'\x1f',32:' ',33:'!',34:'"',35:'#',36:'$',37:'%',38:'&',39:"'",40:'(',41:')',42:'*',43:'+',44:',',45:'-',46:'.',47:'/',48:'0',49:'1',50:'2',51:'3',52:'4',53:'5',54:'6',55:'7',56:'8',57:'9',58:':',59:';',60:'<',61:'=',62:'>',63:'?',64:'@',65:'A',66:'B',67:'C',68:'D',69:'E',70:'F',71:'G',72:'H',73:'I',74:'J',75:'K',76:'L',77:'M',78:'N',79:'O',80:'P',81:'Q',82:'R',83:'S',84:'T',85:'U',86:'V',87:'W',88:'X',89:'Y',90:'Z',91:'[',92:'\\',93:']',94:'^',95:'_',96:'`',97:'a',98:'b',99:'c',100:'d',101:'e',102:'f',103:'g',104:'h',105:'i',106:'j',107:'k',108:'l',109:'m',110:'n',111:'o',112:'p',113:'q',114:'r',115:'s',116:'t',117:'u',118:'v',119:'w',120:'x',121:'y',122:'z',123:'{',124:'|',125:'}',126:'~',127:'\x7f',128:'\xC4',129:'\xB9',130:'\xB2',131:'\xC9',132:'\xB3',133:'\xD6',134:'\xDC',135:'\u0385',136:'\xE0',137:'\xE2',138:'\xE4',139:'\u0384',140:'\xA8',141:'\xE7',142:'\xE9',143:'\xE8',144:'\xEA',145:'\xEB',146:'\xA3',147:'\u2122',148:'\xEE',149:'\xEF',150:'\u2022',151:'\xBD',152:'\u2030',153:'\xF4',154:'\xF6',155:'\xA6',156:'\xAD',157:'\xF9',158:'\xFB',159:'\xFC',160:'\u2020',161:'\u0393',162:'\u0394',163:'\u0398',164:'\u039B',165:'\u039E',166:'\u03A0',167:'\xDF',168:'\xAE',169:'\xA9',170:'\u03A3',171:'\u03AA',172:'\xA7',173:'\u2260',174:'\xB0',175:'\u0387',176:'\u0391',177:'\xB1',178:'\u2264',179:'\u2265',180:'\xA5',181:'\u0392',182:'\u0395',183:'\u0396',184:'\u0397',185:'\u0399',186:'\u039A',187:'\u039C',188:'\u03A6',189:'\u03AB',190:'\u03A8',191:'\u03A9',192:'\u03AC',193:'\u039D',194:'\xAC',195:'\u039F',196:'\u03A1',197:'\u2248',198:'\u03A4',199:'\xAB',200:'\xBB',201:'\u2026',202:'\xA0',203:'\u03A5',204:'\u03A7',205:'\u0386',206:'\u0388',207:'\u0153',208:'\u2013',209:'\u2015',210:'\u201C',211:'\u201D',212:'\u2018',213:'\u2019',214:'\xF7',215:'\u0389',216:'\u038A',217:'\u038C',218:'\u038E',219:'\u03AD',220:'\u03AE',221:'\u03AF',222:'\u03CC',223:'\u038F',224:'\u03CD',225:'\u03B1',226:'\u03B2',227:'\u03C8',228:'\u03B4',229:'\u03B5',230:'\u03C6',231:'\u03B3',232:'\u03B7',233:'\u03B9',234:'\u03BE',235:'\u03BA',236:'\u03BB',237:'\u03BC',238:'\u03BD',239:'\u03BF',240:'\u03C0',241:'\u03CE',242:'\u03C1',243:'\u03C3',244:'\u03C4',245:'\u03B8',246:'\u03C9',247:'\u03C2',248:'\u03C7',249:'\u03C5',250:'\u03B6',251:'\u03CA',252:'\u03CB',253:'\u0390',254:'\u03B0'} -------------------------------------------------------------------------------- /extract_msg/encoding/_dt/_mac_iceland.py: -------------------------------------------------------------------------------- 1 | # Based on https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/ICELAND.TXT 2 | 3 | __all__ = [ 4 | 'decodingTable', 5 | ] 6 | 7 | 8 | decodingTable={0:'\x00',1:'\x01',2:'\x02',3:'\x03',4:'\x04',5:'\x05',6:'\x06',7:'\x07',8:'\x08',9:'\t',10:'\n',11:'\x0b',12:'\x0c',13:'\r',14:'\x0e',15:'\x0f',16:'\x10',17:'\x11',18:'\x12',19:'\x13',20:'\x14',21:'\x15',22:'\x16',23:'\x17',24:'\x18',25:'\x19',26:'\x1a',27:'\x1b',28:'\x1c',29:'\x1d',30:'\x1e',31:'\x1f',32:' ',33:'!',34:'"',35:'#',36:'$',37:'%',38:'&',39:"'",40:'(',41:')',42:'*',43:'+',44:',',45:'-',46:'.',47:'/',48:'0',49:'1',50:'2',51:'3',52:'4',53:'5',54:'6',55:'7',56:'8',57:'9',58:':',59:';',60:'<',61:'=',62:'>',63:'?',64:'@',65:'A',66:'B',67:'C',68:'D',69:'E',70:'F',71:'G',72:'H',73:'I',74:'J',75:'K',76:'L',77:'M',78:'N',79:'O',80:'P',81:'Q',82:'R',83:'S',84:'T',85:'U',86:'V',87:'W',88:'X',89:'Y',90:'Z',91:'[',92:'\\',93:']',94:'^',95:'_',96:'`',97:'a',98:'b',99:'c',100:'d',101:'e',102:'f',103:'g',104:'h',105:'i',106:'j',107:'k',108:'l',109:'m',110:'n',111:'o',112:'p',113:'q',114:'r',115:'s',116:'t',117:'u',118:'v',119:'w',120:'x',121:'y',122:'z',123:'{',124:'|',125:'}',126:'~',127:'\x7f',128:'\xC4',129:'\xC5',130:'\xC7',131:'\xC9',132:'\xD1',133:'\xD6',134:'\xDC',135:'\xE1',136:'\xE0',137:'\xE2',138:'\xE4',139:'\xE3',140:'\xE5',141:'\xE7',142:'\xE9',143:'\xE8',144:'\xEA',145:'\xEB',146:'\xED',147:'\xEC',148:'\xEE',149:'\xEF',150:'\xF1',151:'\xF3',152:'\xF2',153:'\xF4',154:'\xF6',155:'\xF5',156:'\xFA',157:'\xF9',158:'\xFB',159:'\xFC',160:'\xDD',161:'\xB0',162:'\xA2',163:'\xA3',164:'\xA7',165:'\u2022',166:'\xB6',167:'\xDF',168:'\xAE',169:'\xA9',170:'\u2122',171:'\xB4',172:'\xA8',173:'\u2260',174:'\xC6',175:'\xD8',176:'\u221E',177:'\xB1',178:'\u2264',179:'\u2265',180:'\xA5',181:'\xB5',182:'\u2202',183:'\u2211',184:'\u220F',185:'\u03C0',186:'\u222B',187:'\xAA',188:'\xBA',189:'\u2126',190:'\xE6',191:'\xF8',192:'\xBF',193:'\xA1',194:'\xAC',195:'\u221A',196:'\u0192',197:'\u2248',198:'\u2206',199:'\xAB',200:'\xBB',201:'\u2026',202:'\xA0',203:'\xC0',204:'\xC3',205:'\xD5',206:'\u0152',207:'\u0153',208:'\u2013',209:'\u2014',210:'\u201C',211:'\u201D',212:'\u2018',213:'\u2019',214:'\xF7',215:'\u25CA',216:'\xFF',217:'\u0178',218:'\u2044',219:'\xA4',220:'\xD0',221:'\xF0',222:'\xDE',223:'\xFE',224:'\xFD',225:'\xB7',226:'\u201A',227:'\u201E',228:'\u2030',229:'\xC2',230:'\xCA',231:'\xC1',232:'\xCB',233:'\xC8',234:'\xCD',235:'\xCE',236:'\xCF',237:'\xCC',238:'\xD3',239:'\xD4',241:'\xD2',242:'\xDA',243:'\xDB',244:'\xD9',245:'\u0131',246:'\u02C6',247:'\u02DC',248:'\xAF',249:'\u02D8',250:'\u02D9',251:'\u02DA',252:'\xB8',253:'\u02DD',254:'\u02DB',255:'\u02C7'} -------------------------------------------------------------------------------- /extract_msg/encoding/_dt/_mac_turkish.py: -------------------------------------------------------------------------------- 1 | # Based on https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/TURKISH.TXT 2 | 3 | __all__ = [ 4 | 'decodingTable', 5 | ] 6 | 7 | 8 | decodingTable={0:'\x00',1:'\x01',2:'\x02',3:'\x03',4:'\x04',5:'\x05',6:'\x06',7:'\x07',8:'\x08',9:'\t',10:'\n',11:'\x0b',12:'\x0c',13:'\r',14:'\x0e',15:'\x0f',16:'\x10',17:'\x11',18:'\x12',19:'\x13',20:'\x14',21:'\x15',22:'\x16',23:'\x17',24:'\x18',25:'\x19',26:'\x1a',27:'\x1b',28:'\x1c',29:'\x1d',30:'\x1e',31:'\x1f',32:' ',33:'!',34:'"',35:'#',36:'$',37:'%',38:'&',39:"'",40:'(',41:')',42:'*',43:'+',44:',',45:'-',46:'.',47:'/',48:'0',49:'1',50:'2',51:'3',52:'4',53:'5',54:'6',55:'7',56:'8',57:'9',58:':',59:';',60:'<',61:'=',62:'>',63:'?',64:'@',65:'A',66:'B',67:'C',68:'D',69:'E',70:'F',71:'G',72:'H',73:'I',74:'J',75:'K',76:'L',77:'M',78:'N',79:'O',80:'P',81:'Q',82:'R',83:'S',84:'T',85:'U',86:'V',87:'W',88:'X',89:'Y',90:'Z',91:'[',92:'\\',93:']',94:'^',95:'_',96:'`',97:'a',98:'b',99:'c',100:'d',101:'e',102:'f',103:'g',104:'h',105:'i',106:'j',107:'k',108:'l',109:'m',110:'n',111:'o',112:'p',113:'q',114:'r',115:'s',116:'t',117:'u',118:'v',119:'w',120:'x',121:'y',122:'z',123:'{',124:'|',125:'}',126:'~',127:'\x7f',128:'\xC4',129:'\xC5',130:'\xC7',131:'\xC9',132:'\xD1',133:'\xD6',134:'\xDC',135:'\xE1',136:'\xE0',137:'\xE2',138:'\xE4',139:'\xE3',140:'\xE5',141:'\xE7',142:'\xE9',143:'\xE8',144:'\xEA',145:'\xEB',146:'\xED',147:'\xEC',148:'\xEE',149:'\xEF',150:'\xF1',151:'\xF3',152:'\xF2',153:'\xF4',154:'\xF6',155:'\xF5',156:'\xFA',157:'\xF9',158:'\xFB',159:'\xFC',160:'\u2020',161:'\xB0',162:'\xA2',163:'\xA3',164:'\xA7',165:'\u2022',166:'\xB6',167:'\xDF',168:'\xAE',169:'\xA9',170:'\u2122',171:'\xB4',172:'\xA8',173:'\u2260',174:'\xC6',175:'\xD8',176:'\u221E',177:'\xB1',178:'\u2264',179:'\u2265',180:'\xA5',181:'\xB5',182:'\u2202',183:'\u2211',184:'\u220F',185:'\u03C0',186:'\u222B',187:'\xAA',188:'\xBA',189:'\u2126',190:'\xE6',191:'\xF8',192:'\xBF',193:'\xA1',194:'\xAC',195:'\u221A',196:'\u0192',197:'\u2248',198:'\u2206',199:'\xAB',200:'\xBB',201:'\u2026',202:'\xA0',203:'\xC0',204:'\xC3',205:'\xD5',206:'\u0152',207:'\u0153',208:'\u2013',209:'\u2014',210:'\u201C',211:'\u201D',212:'\u2018',213:'\u2019',214:'\xF7',215:'\u25CA',216:'\xFF',217:'\u0178',218:'\u011E',219:'\u011F',220:'\u0130',221:'\u0131',222:'\u015E',223:'\u015F',224:'\u2021',225:'\xB7',226:'\u201A',227:'\u201E',228:'\u2030',229:'\xC2',230:'\xCA',231:'\xC1',232:'\xCB',233:'\xC8',234:'\xCD',235:'\xCE',236:'\xCF',237:'\xCC',238:'\xD3',239:'\xD4',241:'\xD2',242:'\xDA',243:'\xDB',244:'\xD9',246:'\u02C6',247:'\u02DC',248:'\xAF',249:'\u02D8',250:'\u02D9',251:'\u02DA',252:'\xB8',253:'\u02DD',254:'\u02DB',255:'\u02C7'} -------------------------------------------------------------------------------- /extract_msg/encoding/_dt/_win874_dec.py: -------------------------------------------------------------------------------- 1 | # Based on https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP874.TXT 2 | 3 | __all__ = [ 4 | 'decodingTable', 5 | ] 6 | 7 | 8 | decodingTable={0:'\x00',1:'\x01',2:'\x02',3:'\x03',4:'\x04',5:'\x05',6:'\x06',7:'\x07',8:'\x08',9:'\t',10:'\n',11:'\x0B',12:'\x0C',13:'\r',14:'\x0E',15:'\x0F',16:'\x10',17:'\x11',18:'\x12',19:'\x13',20:'\x14',21:'\x15',22:'\x16',23:'\x17',24:'\x18',25:'\x19',26:'\x1A',27:'\x1B',28:'\x1C',29:'\x1D',30:'\x1E',31:'\x1F',32:' ',33:'!',34:'"',35:'#',36:'$',37:'%',38:'&',39:"'",40:'(',41:')',42:'*',43:'+',44:',',45:'-',46:'.',47:'/',48:'0',49:'1',50:'2',51:'3',52:'4',53:'5',54:'6',55:'7',56:'8',57:'9',58:':',59:';',60:'<',61:'=',62:'>',63:'?',64:'@',65:'A',66:'B',67:'C',68:'D',69:'E',70:'F',71:'G',72:'H',73:'I',74:'J',75:'K',76:'L',77:'M',78:'N',79:'O',80:'P',81:'Q',82:'R',83:'S',84:'T',85:'U',86:'V',87:'W',88:'X',89:'Y',90:'Z',91:'[',92:'\\',93:']',94:'^',95:'_',96:'`',97:'a',98:'b',99:'c',100:'d',101:'e',102:'f',103:'g',104:'h',105:'i',106:'j',107:'k',108:'l',109:'m',110:'n',111:'o',112:'p',113:'q',114:'r',115:'s',116:'t',117:'u',118:'v',119:'w',120:'x',121:'y',122:'z',123:'{',124:'|',125:'}',126:'~',127:'\x7F',128:'\x80',129:'\u20AC',133:'\u2026',145:'\u2018',146:'\u2019',147:'\u201C',148:'\u201D',149:'\u2022',150:'\u2013',151:'\u2014',160:'\xA0',161:'\u0E01',162:'\u0E02',163:'\u0E03',164:'\u0E04',165:'\u0E05',166:'\u0E06',167:'\u0E07',168:'\u0E08',169:'\u0E09',170:'\u0E0A',171:'\u0E0B',172:'\u0E0C',173:'\u0E0D',174:'\u0E0E',175:'\u0E0F',176:'\u0E10',177:'\u0E11',178:'\u0E12',179:'\u0E13',180:'\u0E14',181:'\u0E15',182:'\u0E16',183:'\u0E17',184:'\u0E18',185:'\u0E19',186:'\u0E1A',187:'\u0E1B',188:'\u0E1C',189:'\u0E1D',190:'\u0E1E',191:'\u0E1F',192:'\u0E20',193:'\u0E21',194:'\u0E22',195:'\u0E23',196:'\u0E24',197:'\u0E25',198:'\u0E26',199:'\u0E27',200:'\u0E28',201:'\u0E29',202:'\u0E2A',203:'\u0E2B',204:'\u0E2C',205:'\u0E2D',206:'\u0E2E',207:'\u0E2F',208:'\u0E30',209:'\u0E31',210:'\u0E32',211:'\u0E33',212:'\u0E34',213:'\u0E35',214:'\u0E36',215:'\u0E37',216:'\u0E38',217:'\u0E39',218:'\u0E3A',224:'\u0E3F',225:'\u0E40',226:'\u0E41',227:'\u0E42',228:'\u0E43',229:'\u0E44',230:'\u0E45',231:'\u0E46',232:'\u0E47',233:'\u0E48',234:'\u0E49',235:'\u0E4A',236:'\u0E4B',237:'\u0E4C',238:'\u0E4D',239:'\u0E4E',240:'\u0E4F',241:'\u0E50',242:'\u0E51',243:'\u0E52',244:'\u0E53',245:'\u0E54',246:'\u0E55',247:'\u0E56',248:'\u0E57',249:'\u0E58',250:'\u0E59',251:'\u0E5A',252:'\u0E5B'} -------------------------------------------------------------------------------- /extract_msg/exceptions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | extract_msg.exceptions 5 | ~~~~~~~~~~~~~~~~~~~~~~ 6 | This module contains the set of extract_msg exceptions. 7 | """ 8 | 9 | __all__ = [ 10 | 'ExMsgBaseException', 11 | 12 | 'ConversionError', 13 | 'DataNotFoundError', 14 | 'DeencapMalformedData', 15 | 'DeencapNotEncapsulated', 16 | 'ExecutableNotFound', 17 | 'IncompatibleOptionsError', 18 | 'InvalidFileFormatError', 19 | 'InvalidPropertyIdError', 20 | 'StandardViolationError', 21 | 'TZError', 22 | 'UnknownCodepageError', 23 | 'UnsupportedEncodingError', 24 | 'UnknownTypeError', 25 | 'UnsupportedMSGTypeError', 26 | 'UnrecognizedMSGTypeError', 27 | 'WKError', 28 | ] 29 | 30 | # Base exception types. 31 | 32 | class ExMsgBaseException(Exception): 33 | """ 34 | The base class for all custom exceptions the module uses. 35 | """ 36 | 37 | # I would want this to also be a subclass of NotImplementedError, but Python 38 | # docs say that CPython can make that a bit problematic due to things from the C 39 | # side of the code. 40 | class FeatureNotImplemented(ExMsgBaseException): 41 | """ 42 | The base class for a feature not yet being implemented in the module. 43 | """ 44 | 45 | 46 | # More specific exceptions. 47 | 48 | 49 | class ConversionError(ExMsgBaseException): 50 | """ 51 | An error occured during type conversion. 52 | """ 53 | 54 | class DataNotFoundError(ExMsgBaseException): 55 | """ 56 | Requested stream type was unavailable. 57 | """ 58 | 59 | class DeencapMalformedData(ExMsgBaseException): 60 | """ 61 | Data to deencapsulate was malformed in some way. 62 | """ 63 | 64 | class DeencapNotEncapsulated(ExMsgBaseException): 65 | """ 66 | Data to deencapsulate did not contain any encapsulated data. 67 | """ 68 | 69 | class DependencyError(ExMsgBaseException): 70 | """ 71 | An optional dependency could not be found or was unable to be used as 72 | expected. 73 | """ 74 | 75 | class ExecutableNotFound(DependencyError): 76 | """ 77 | Could not find the specified executable. 78 | """ 79 | 80 | class IncompatibleOptionsError(ExMsgBaseException): 81 | """ 82 | Provided options are incompatible with each other. 83 | """ 84 | 85 | class InvalidFileFormatError(ExMsgBaseException): 86 | """ 87 | An Invalid File Format Error occurred. 88 | """ 89 | 90 | class InvalidPropertyIdError(ExMsgBaseException): 91 | """ 92 | The provided property ID was invalid. 93 | """ 94 | 95 | class MimetypeFailureError(ExMsgBaseException): 96 | """ 97 | The mimetype was unable to be properly determined when it was mandatory. 98 | """ 99 | 100 | class NotWritableError(ExMsgBaseException): 101 | """ 102 | Modification was attempted on an instance that is not writable. 103 | """ 104 | 105 | class PrefixError(ExMsgBaseException): 106 | """ 107 | An issue was detected with the provided prefix. 108 | 109 | This should never occur if you have no manually provided a prefix. 110 | """ 111 | 112 | class SecurityError(ExMsgBaseException): 113 | """ 114 | A code path was triggered that would use an insecure feature, but that 115 | insecure feature was not enabled. 116 | """ 117 | 118 | class StandardViolationError(InvalidFileFormatError): 119 | """ 120 | A critical violation of the MSG standards was detected and could not be 121 | recovered from. 122 | 123 | Recoverable violations will result in log messages instead. 124 | 125 | Any that could reasonably be skipped, although are likely to still cause 126 | errors down the line, can be suppressed. 127 | """ 128 | 129 | class TooManySectorsError(ExMsgBaseException): 130 | """ 131 | Ole writer has too much data to write to the file. 132 | """ 133 | 134 | class TZError(ExMsgBaseException): 135 | """ 136 | Specifically not an OSError to avoid being caught by parts of the module. 137 | 138 | This error represents a fatal error in the datetime parsing as it usually 139 | means your installation of tzlocal or tzdata are broken. If you have 140 | received this error after using PyInstaller, you must include the resource 141 | files for tzdata for it to work properly. See TeamMsgExtractor#272 and 142 | TeamMsgExtractor#169 for information on why you are getting this error. 143 | """ 144 | 145 | class UnknownCodepageError(ExMsgBaseException): 146 | """ 147 | The codepage provided was not one we know of. 148 | """ 149 | 150 | class UnsupportedEncodingError(FeatureNotImplemented): 151 | """ 152 | The codepage provided is known but is not supported. 153 | """ 154 | 155 | class UnknownTypeError(ExMsgBaseException): 156 | """ 157 | The type specified is not one that is recognized. 158 | """ 159 | 160 | class UnsupportedMSGTypeError(FeatureNotImplemented): 161 | """ 162 | An exception that is raised when an MSG class is recognized by not 163 | supported. 164 | """ 165 | 166 | class UnrecognizedMSGTypeError(ExMsgBaseException): 167 | """ 168 | An exception that is raised when the module cannot determine how to properly 169 | open a specific class of MSG file. 170 | """ 171 | 172 | class WKError(DependencyError): 173 | """ 174 | An error occured while running wkhtmltopdf. 175 | """ 176 | -------------------------------------------------------------------------------- /extract_msg/msg_classes/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classes for opening an MSG file with. 3 | """ 4 | 5 | __all__ = [ 6 | # Classes: 7 | 'AppointmentMeeting', 8 | 'Calendar', 9 | 'CalendarBase', 10 | 'Contact', 11 | 'Journal', 12 | 'MeetingCancellation', 13 | 'MeetingException', 14 | 'MeetingForwardNotification', 15 | 'MeetingRelated', 16 | 'MeetingRequest', 17 | 'MeetingResponse', 18 | 'Message', 19 | 'MessageBase', 20 | 'MessageSigned', 21 | 'MessageSignedBase', 22 | 'MSGFile', 23 | 'Post', 24 | 'StickyNote', 25 | 'Task', 26 | 'TaskRequest', 27 | ] 28 | 29 | 30 | from .appointment import AppointmentMeeting 31 | from .calendar_base import CalendarBase 32 | from .calendar import Calendar 33 | from .contact import Contact 34 | from .journal import Journal 35 | from .meeting_cancellation import MeetingCancellation 36 | from .meeting_exception import MeetingException 37 | from .meeting_forward import MeetingForwardNotification 38 | from .meeting_related import MeetingRelated 39 | from .meeting_request import MeetingRequest 40 | from .meeting_response import MeetingResponse 41 | from .message import Message 42 | from .message_base import MessageBase 43 | from .message_signed import MessageSigned 44 | from .message_signed_base import MessageSignedBase 45 | from .msg import MSGFile 46 | from .post import Post 47 | from .sticky_note import StickyNote 48 | from .task import Task 49 | from .task_request import TaskRequest -------------------------------------------------------------------------------- /extract_msg/msg_classes/calendar.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'Calendar', 3 | ] 4 | 5 | 6 | import datetime 7 | import functools 8 | 9 | from typing import Optional 10 | 11 | from ..constants import ps 12 | from .calendar_base import CalendarBase 13 | from ..enums import ClientIntentFlag 14 | 15 | 16 | class Calendar(CalendarBase): 17 | """ 18 | A calendar object. 19 | """ 20 | 21 | @functools.cached_property 22 | def clientIntent(self) -> Optional[ClientIntentFlag]: 23 | """ 24 | A set of the actions a user has taken on a Meeting object. 25 | """ 26 | return self.getNamedAs('0015', ps.PSETID_CALENDAR_ASSISTANT, ClientIntentFlag) 27 | 28 | @functools.cached_property 29 | def fExceptionalAttendees(self) -> Optional[bool]: 30 | """ 31 | Indicates that it is a Recurring Calendar object with one or more 32 | excpetions and that at least one of the Exception Embedded Message 33 | objects has at least one RecipientRow structure. 34 | 35 | SHOULD NOT be set for any Calendar object other than that of the 36 | organizer's. 37 | """ 38 | return self.getNamedProp('822B', ps.PSETID_APPOINTMENT) 39 | 40 | @functools.cached_property 41 | def reminderDelta(self) -> Optional[int]: 42 | """ 43 | The interval, in minutes, between the time at which the reminder first 44 | becomes overdue and the start time of the Calendar object. 45 | """ 46 | return self.getNamedProp('8501', ps.PSETID_COMMON) 47 | 48 | @functools.cached_property 49 | def reminderFileParameter(self) -> Optional[str]: 50 | """ 51 | The full path (MAY only specify the file name) of the sound that a 52 | client SHOULD play when the reminder for the Message Object becomes 53 | overdue. 54 | """ 55 | return self.getNamedProp('851F', ps.PSETID_COMMON) 56 | 57 | @functools.cached_property 58 | def reminderOverride(self) -> bool: 59 | """ 60 | Specifies if clients SHOULD respect the value of the reminderPlaySound 61 | property and the reminderFileParameter property. 62 | """ 63 | return bool(self.getNamedProp('851C', ps.PSETID_COMMON)) 64 | 65 | @functools.cached_property 66 | def reminderPlaySound(self) -> bool: 67 | """ 68 | Specified that the cliebnt should play a sound when the reminder becomes 69 | overdue. 70 | """ 71 | return bool(self.getNamedProp('851E', ps.PSETID_COMMON)) 72 | 73 | @functools.cached_property 74 | def reminderSet(self) -> bool: 75 | """ 76 | Specifies whether a reminder is set on the object. 77 | """ 78 | return bool(self.getNamedProp('8503', ps.PSETID_COMMON)) 79 | 80 | @functools.cached_property 81 | def reminderSignalTime(self) -> Optional[datetime.datetime]: 82 | """ 83 | The point in time when a reminder transitions from pending to overdue. 84 | """ 85 | return self.getNamedProp('8560', ps.PSETID_COMMON) 86 | 87 | @functools.cached_property 88 | def reminderTime(self) -> Optional[datetime.datetime]: 89 | """ 90 | The time after which the user would be late. 91 | """ 92 | return self.getNamedProp('8502', ps.PSETID_COMMON) 93 | -------------------------------------------------------------------------------- /extract_msg/msg_classes/journal.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'Journal', 3 | ] 4 | 5 | 6 | import base64 7 | import datetime 8 | import functools 9 | import json 10 | 11 | from typing import List, Optional 12 | 13 | from ..constants import HEADER_FORMAT_TYPE, ps 14 | from ..enums import LogFlags 15 | from .message_base import MessageBase 16 | from ..utils import minutesToDurationStr 17 | 18 | 19 | class Journal(MessageBase): 20 | """ 21 | Class for parsing Journal messages. 22 | """ 23 | 24 | def getJson(self) -> str: 25 | return json.dumps({ 26 | 'subject': self.subject, 27 | 'entryType': self.logTypeDesc, 28 | 'company': self.companies[0] if self.companies else None, 29 | 'start': self.logStart.__format__(self.datetimeFormat) if self.logStart else None, 30 | 'end': self.logEnd.__format__(self.datetimeFormat) if self.logEnd else None, 31 | 'duration': minutesToDurationStr(self.duration), 32 | 'body': self.body, 33 | # There is a good chance the body property won't exist, so this is a 34 | # backup. 35 | 'rtfBodyB64': base64.b64encode(self.rtfBody).decode('ascii') if self.rtfBody else None, 36 | }) 37 | 38 | @functools.cached_property 39 | def companies(self) -> Optional[List[str]]: 40 | """ 41 | Contains a list of company names, each of which is accociated with a 42 | contact this is precified in the contacts property. 43 | """ 44 | return self.getNamedProp('8539', ps.PSETID_COMMON) 45 | 46 | @functools.cached_property 47 | def logDocumentPosted(self) -> bool: 48 | """ 49 | Indicates whether the document was sent by email of posted to a server 50 | folder during journaling. 51 | """ 52 | return bool(self.getNamedProp('8711', ps.PSETID_LOG)) 53 | 54 | @functools.cached_property 55 | def logDocumentPrinted(self) -> bool: 56 | """ 57 | Indicates whether the document was printed during journaling. 58 | """ 59 | return bool(self.getNamedProp('870E', ps.PSETID_LOG)) 60 | 61 | @functools.cached_property 62 | def logDocumentRouted(self) -> bool: 63 | """ 64 | Indicates whether the document was sent to a routing recipient during 65 | journaling. 66 | """ 67 | return bool(self.getNamedProp('8710', ps.PSETID_LOG)) 68 | 69 | @functools.cached_property 70 | def logDocumentSaved(self) -> bool: 71 | """ 72 | Indicates whether the document was saved during journaling. 73 | """ 74 | return bool(self.getNamedProp('870F', ps.PSETID_LOG)) 75 | 76 | @functools.cached_property 77 | def logDuration(self) -> int: 78 | """ 79 | The duration, in minutes, of the activity. 80 | """ 81 | return self.getNamedProp('8707', ps.PSETID_LOG, 0) 82 | 83 | @functools.cached_property 84 | def logEnd(self) -> Optional[datetime.datetime]: 85 | """ 86 | The name of the activity that is being recorded. 87 | """ 88 | return self.getNamedProp('8708', ps.PSETID_LOG) 89 | 90 | @functools.cached_property 91 | def logFlags(self) -> LogFlags: 92 | """ 93 | The name of the activity that is being recorded. 94 | """ 95 | return LogFlags(self.getNamedProp('870C', ps.PSETID_LOG, 0)) 96 | 97 | @functools.cached_property 98 | def logStart(self) -> Optional[datetime.datetime]: 99 | """ 100 | The name of the activity that is being recorded. 101 | """ 102 | return self.getNamedProp('8706', ps.PSETID_LOG) 103 | 104 | @functools.cached_property 105 | def logType(self) -> Optional[str]: 106 | """ 107 | The name of the activity that is being recorded. 108 | """ 109 | return self.getNamedProp('8700', ps.PSETID_LOG) 110 | 111 | @functools.cached_property 112 | def logTypeDesc(self) -> Optional[str]: 113 | """ 114 | The description of the activity that is being recorded. 115 | """ 116 | return self.getNamedProp('8712', ps.PSETID_LOG) 117 | 118 | @property 119 | def headerFormatProperties(self) -> HEADER_FORMAT_TYPE: 120 | return { 121 | '-main details-': { 122 | 'Subject': self.subject, 123 | 'Entry Type': self.logTypeDesc, 124 | 'Company': self.companies[0] if self.companies else None, 125 | }, 126 | '-time-': { 127 | 'Start': self.logStart.__format__(self.datetimeFormat) if self.logStart else None, 128 | 'End': self.logEnd.__format__(self.datetimeFormat) if self.logEnd else None, 129 | 'Duration': minutesToDurationStr(self.duration), 130 | }, 131 | } 132 | -------------------------------------------------------------------------------- /extract_msg/msg_classes/meeting_cancellation.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'MeetingCancellation', 3 | ] 4 | 5 | 6 | import json 7 | 8 | from .. import constants 9 | from ..enums import RecurPatternType, ResponseStatus 10 | from .meeting_related import MeetingRelated 11 | 12 | 13 | # The documentation for this only specifies restrictions on existing properties, 14 | # so we just mostly leave this alone. 15 | class MeetingCancellation(MeetingRelated): 16 | """ 17 | Class for a Meeting Cancellation object. 18 | """ 19 | 20 | def getJson(self) -> str: 21 | meetingStatusString = { 22 | ResponseStatus.NONE: None, 23 | ResponseStatus.ORGANIZED: 'Meeting organizer', 24 | ResponseStatus.TENTATIVE: 'Tentatively accepted', 25 | ResponseStatus.ACCEPTED: 'Accepted', 26 | ResponseStatus.DECLINED: 'Declined', 27 | ResponseStatus.NOT_RESPONDED: 'Not yet responded', 28 | }[self.responseStatus] 29 | 30 | # Get the recurrence string. 31 | recur = '(none)' 32 | if self.appointmentRecur: 33 | recur = { 34 | RecurPatternType.DAY: 'Daily', 35 | RecurPatternType.WEEK: 'Weekly', 36 | RecurPatternType.MONTH: 'Monthly', 37 | RecurPatternType.MONTH_NTH: 'Monthly', 38 | RecurPatternType.MONTH_END: 'Monthly', 39 | RecurPatternType.HJ_MONTH: 'Monthly', 40 | RecurPatternType.HJ_MONTH_NTH: 'Monthly', 41 | RecurPatternType.HJ_MONTH_END: 'Monthly', 42 | }[self.appointmentRecur.patternType] 43 | 44 | return json.dumps({ 45 | 'recurrence': recur, 46 | 'recurrencePattern': self.recurrencePattern, 47 | 'body': self.body, 48 | 'meetingStatus': meetingStatusString, 49 | 'organizer': self.organizer, 50 | 'requiredAttendees': self.to, 51 | 'optionalAttendees': self.cc, 52 | 'resources': self.bcc, 53 | 'start': self.startDate.__format__(self.datetimeFormat) if self.endDate else None, 54 | 'end': self.endDate.__format__(self.datetimeFormat) if self.endDate else None, 55 | }) 56 | 57 | @property 58 | def headerFormatProperties(self) -> constants.HEADER_FORMAT_TYPE: 59 | meetingStatusString = { 60 | ResponseStatus.NONE: None, 61 | ResponseStatus.ORGANIZED: 'Meeting organizer', 62 | ResponseStatus.TENTATIVE: 'Tentatively accepted', 63 | ResponseStatus.ACCEPTED: 'Accepted', 64 | ResponseStatus.DECLINED: 'Declined', 65 | ResponseStatus.NOT_RESPONDED: 'Not yet responded', 66 | }[self.responseStatus] 67 | 68 | # Get the recurrence string. 69 | recur = '(none)' 70 | if self.appointmentRecur: 71 | recur = { 72 | RecurPatternType.DAY: 'Daily', 73 | RecurPatternType.WEEK: 'Weekly', 74 | RecurPatternType.MONTH: 'Monthly', 75 | RecurPatternType.MONTH_NTH: 'Monthly', 76 | RecurPatternType.MONTH_END: 'Monthly', 77 | RecurPatternType.HJ_MONTH: 'Monthly', 78 | RecurPatternType.HJ_MONTH_NTH: 'Monthly', 79 | RecurPatternType.HJ_MONTH_END: 'Monthly', 80 | }[self.appointmentRecur.patternType] 81 | 82 | return { 83 | '-main info-': { 84 | 'Subject': self.subject, 85 | 'Location': self.location, 86 | }, 87 | '-date-': { 88 | 'Start': self.startDate.__format__(self.datetimeFormat) if self.startDate else None, 89 | 'End': self.endDate.__format__(self.datetimeFormat) if self.endDate else None, 90 | 'Show Time As': 'Free', 91 | }, 92 | '-recurrence-': { 93 | 'Recurrance': recur, 94 | 'Recurrence Pattern': self.recurrencePattern, 95 | }, 96 | '-status-': { 97 | 'Meeting Status': meetingStatusString, 98 | }, 99 | '-attendees-': { 100 | 'Organizer': self.organizer, 101 | 'Required Attendees': self.to, 102 | 'Optional Attendees': self.cc, 103 | 'Resources': self.bcc, 104 | }, 105 | '-importance-': { 106 | 'Importance': self.importanceString, 107 | }, 108 | } 109 | -------------------------------------------------------------------------------- /extract_msg/msg_classes/meeting_exception.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'MeetingException', 3 | ] 4 | 5 | 6 | import datetime 7 | import functools 8 | 9 | from typing import Optional 10 | 11 | from .. import constants 12 | from ..enums import SaveType 13 | from .meeting_related import MeetingRelated 14 | 15 | 16 | class MeetingException(MeetingRelated): 17 | """ 18 | Class for handling Meeting Exceptions. 19 | """ 20 | 21 | def save(self, **_) -> constants.SAVE_TYPE: 22 | """ 23 | Meeting Exceptions are hidden attachments with no save behaviors. 24 | 25 | If you want something to happen for saving, you can call the save of a 26 | parent class or write your own code. 27 | """ 28 | return (SaveType.NONE, None) 29 | 30 | @functools.cached_property 31 | def exceptionReplaceTime(self) -> Optional[datetime.datetime]: 32 | """ 33 | The date and time within the recurrence pattern that the exception will 34 | replace. 35 | 36 | The value is specified in UTC. 37 | """ 38 | return self.getNamedProp('8228', constants.ps.PSETID_APPOINTMENT) 39 | 40 | @functools.cached_property 41 | def fExceptionalBody(self) -> bool: 42 | """ 43 | Indicates that the Exception Embedded Message object has a body that 44 | differs from the Recurring Calendar object. 45 | 46 | If ``True``, the Exception MUST have a body. 47 | """ 48 | return bool(self.getNamedProp('8206', constants.ps.PSETID_APPOINTMENT)) 49 | 50 | @functools.cached_property 51 | def fInvited(self) -> bool: 52 | """ 53 | Indicates if invitations have been sent for this exception. 54 | """ 55 | return bool(self.getNamedProp('8229', constants.ps.PSETID_APPOINTMENT)) 56 | -------------------------------------------------------------------------------- /extract_msg/msg_classes/meeting_forward.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'MeetingForwardNotification', 3 | ] 4 | 5 | 6 | import functools 7 | import json 8 | 9 | from typing import Optional 10 | 11 | from .. import constants 12 | from .meeting_related import MeetingRelated 13 | from ..enums import RecurPatternType, ResponseStatus 14 | 15 | 16 | class MeetingForwardNotification(MeetingRelated): 17 | """ 18 | Class for handling Meeting Forward Notification objects. 19 | """ 20 | 21 | def getJson(self) -> str: 22 | meetingStatusString = { 23 | ResponseStatus.NONE: None, 24 | ResponseStatus.ORGANIZED: 'Meeting organizer', 25 | ResponseStatus.TENTATIVE: 'Tentatively accepted', 26 | ResponseStatus.ACCEPTED: 'Accepted', 27 | ResponseStatus.DECLINED: 'Declined', 28 | ResponseStatus.NOT_RESPONDED: 'Not yet responded', 29 | }[self.responseStatus] 30 | 31 | # Get the recurrence string. 32 | recur = '(none)' 33 | if self.appointmentRecur: 34 | recur = { 35 | RecurPatternType.DAY: 'Daily', 36 | RecurPatternType.WEEK: 'Weekly', 37 | RecurPatternType.MONTH: 'Monthly', 38 | RecurPatternType.MONTH_NTH: 'Monthly', 39 | RecurPatternType.MONTH_END: 'Monthly', 40 | RecurPatternType.HJ_MONTH: 'Monthly', 41 | RecurPatternType.HJ_MONTH_NTH: 'Monthly', 42 | RecurPatternType.HJ_MONTH_END: 'Monthly', 43 | }[self.appointmentRecur.patternType] 44 | 45 | return json.dumps({ 46 | 'recurrence': recur, 47 | 'recurrencePattern': self.recurrencePattern, 48 | 'body': self.body, 49 | 'meetingStatus': meetingStatusString, 50 | 'organizer': self.organizer, 51 | 'requiredAttendees': self.to, 52 | 'optionalAttendees': self.cc, 53 | 'resources': self.bcc, 54 | 'start': self.startDate.__format__(self.datetimeFormat) if self.endDate else None, 55 | 'end': self.endDate.__format__(self.datetimeFormat) if self.endDate else None, 56 | }) 57 | 58 | @functools.cached_property 59 | def forwardNotificationRecipients(self) -> Optional[bytes]: 60 | """ 61 | Bytes containing a list of RecipientRow structures that indicate the 62 | recipients of a meeting forward. 63 | 64 | Incomplete, looks to be the same structure as 65 | appointmentUnsendableRecipients, so we need more examples of this. 66 | """ 67 | return self.getNamedProp('8261', constants.ps.PSETID_APPOINTMENT) 68 | 69 | @property 70 | def headerFormatProperties(self) -> constants.HEADER_FORMAT_TYPE: 71 | # Get the recurrence string. 72 | recur = '(none)' 73 | if self.appointmentRecur: 74 | recur = { 75 | RecurPatternType.DAY: 'Daily', 76 | RecurPatternType.WEEK: 'Weekly', 77 | RecurPatternType.MONTH: 'Monthly', 78 | RecurPatternType.MONTH_NTH: 'Monthly', 79 | RecurPatternType.MONTH_END: 'Monthly', 80 | RecurPatternType.HJ_MONTH: 'Monthly', 81 | RecurPatternType.HJ_MONTH_NTH: 'Monthly', 82 | RecurPatternType.HJ_MONTH_END: 'Monthly', 83 | }[self.appointmentRecur.patternType] 84 | 85 | return { 86 | '-main info-': { 87 | 'Subject': self.subject, 88 | 'Location': self.location, 89 | }, 90 | '-date-': { 91 | 'Start': self.startDate.__format__(self.datetimeFormat) if self.startDate else None, 92 | 'End': self.endDate.__format__(self.datetimeFormat) if self.endDate else None, 93 | }, 94 | '-recurrence-': { 95 | 'Recurrance': recur, 96 | 'Recurrence Pattern': self.recurrencePattern, 97 | }, 98 | '-attendees-': { 99 | 'Organizer': self.organizer, 100 | 'Required Attendees': self.to, 101 | 'Optional Attendees': self.cc, 102 | 'Resources': self.bcc, 103 | }, 104 | '-importance-': { 105 | 'Importance': self.importanceString, 106 | }, 107 | } 108 | 109 | @functools.cached_property 110 | def promptSendUpdate(self) -> bool: 111 | """ 112 | Indicates that the Meeting Forward Notification object was out-of-date 113 | when it was received. 114 | """ 115 | return bool(self.getNamedProp('8045', constants.ps.PSETID_COMMON)) 116 | -------------------------------------------------------------------------------- /extract_msg/msg_classes/meeting_related.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'MeetingRelated', 3 | ] 4 | 5 | 6 | import datetime 7 | import functools 8 | 9 | from typing import Optional 10 | 11 | from ..constants import ps 12 | from .calendar_base import CalendarBase 13 | from ..enums import ServerProcessingAction 14 | 15 | 16 | class MeetingRelated(CalendarBase): 17 | """ 18 | Base class for meeting-related objects. 19 | """ 20 | 21 | @functools.cached_property 22 | def attendeeCriticalChange(self) -> Optional[datetime.datetime]: 23 | """ 24 | The date and time at which the meeting-related object was sent. 25 | """ 26 | return self.getNamedProp('0001', ps.PSETID_MEETING) 27 | 28 | @functools.cached_property 29 | def processed(self) -> bool: 30 | """ 31 | Indicates whether a client has processed a meeting-related object. 32 | """ 33 | return bool(self.getPropertyVal('7D01000B')) 34 | 35 | @functools.cached_property 36 | def serverProcessed(self) -> bool: 37 | """ 38 | Indicates that the Meeting Request object or Meeting Update object has 39 | been processed. 40 | """ 41 | return bool(self.getNamedProp('85CC', ps.PSETID_CALENDAR_ASSISTANT)) 42 | 43 | @functools.cached_property 44 | def serverProcessingActions(self) -> Optional[ServerProcessingAction]: 45 | """ 46 | A union of which actions have been taken on the Meeting Request object 47 | or Meeting Update object. 48 | """ 49 | return self.getNamedAs('85CD', ps.PSETID_CALENDAR_ASSISTANT, ServerProcessingAction) 50 | 51 | @functools.cached_property 52 | def timeZone(self) -> Optional[int]: 53 | """ 54 | Specifies information about the time zone of a recurring meeting. 55 | 56 | See PidLidTimeZone in [MS-OXOCAL] for details. 57 | """ 58 | return self.getNamedProp('000C', ps.PSETID_MEETING) 59 | 60 | @functools.cached_property 61 | def where(self) -> Optional[str]: 62 | """ 63 | PidLidWhere. Should be the same as location. 64 | """ 65 | return self.getNamedProp('0002', ps.PSETID_MEETING) 66 | -------------------------------------------------------------------------------- /extract_msg/msg_classes/meeting_response.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'MeetingResponse', 3 | ] 4 | 5 | 6 | import datetime 7 | import functools 8 | 9 | from typing import Optional 10 | 11 | from ..constants import ps 12 | from ..enums import ResponseType 13 | from .meeting_related import MeetingRelated 14 | 15 | 16 | class MeetingResponse(MeetingRelated): 17 | """ 18 | Class for handling meeting response objects. 19 | """ 20 | 21 | @functools.cached_property 22 | def appointmentCounterProposal(self) -> bool: 23 | """ 24 | Indicates if the response is a counter proposal. 25 | """ 26 | return bool(self.getNamedProp('8257', ps.PSETID_APPOINTMENT)) 27 | 28 | @functools.cached_property 29 | def appointmentProposedDuration(self) -> Optional[int]: 30 | """ 31 | The proposed value for the appointmentDuration property for a counter 32 | proposal. 33 | """ 34 | return self.getNamedProp('8256', ps.PSETID_APPOINTMENT) 35 | 36 | @functools.cached_property 37 | def appointmentProposedEndWhole(self) -> Optional[datetime.datetime]: 38 | """ 39 | The proposal value for the appointmentEndWhole property for a counter 40 | proposal. 41 | """ 42 | return self.getNamedProp('8251', ps.PSETID_APPOINTMENT) 43 | 44 | @functools.cached_property 45 | def appointmentProposedStartWhole(self) -> Optional[datetime.datetime]: 46 | """ 47 | The proposal value for the appointmentStartWhole property for a counter 48 | proposal. 49 | """ 50 | return self.getNamedProp('8250', ps.PSETID_APPOINTMENT) 51 | 52 | @functools.cached_property 53 | def isSilent(self) -> bool: 54 | """ 55 | Indicates if the user did not include any text in the body of the 56 | Meeting Response object. 57 | """ 58 | return bool(self.getNamedProp('0004', ps.PSETID_MEETING)) 59 | 60 | @functools.cached_property 61 | def promptSendUpdate(self) -> bool: 62 | """ 63 | Indicates that the Meeting Response object was out-of-date when it was 64 | received. 65 | """ 66 | return bool(self.getNamedProp('8045', ps.PSETID_COMMON)) 67 | 68 | @functools.cached_property 69 | def responseType(self) -> ResponseType: 70 | """ 71 | The type of Meeting Response object. 72 | """ 73 | # The ending of the class type determines the type of response. 74 | return ResponseType(self.classType.lower().split('.')[-1]) 75 | -------------------------------------------------------------------------------- /extract_msg/msg_classes/message.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'Message', 3 | ] 4 | 5 | 6 | from .message_base import MessageBase 7 | 8 | 9 | class Message(MessageBase): 10 | """ 11 | Parser for Microsoft Outlook message files. 12 | """ 13 | -------------------------------------------------------------------------------- /extract_msg/msg_classes/message_signed.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'MessageSigned', 3 | ] 4 | 5 | 6 | from typing import TypeVar 7 | 8 | from .message_signed_base import MessageSignedBase 9 | 10 | 11 | _T = TypeVar('_T') 12 | 13 | 14 | class MessageSigned(MessageSignedBase[_T]): 15 | """ 16 | Parser for Signed Microsoft Outlook message files. 17 | """ 18 | -------------------------------------------------------------------------------- /extract_msg/msg_classes/post.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'Post', 3 | ] 4 | 5 | 6 | import functools 7 | import json 8 | 9 | from typing import Optional 10 | 11 | from .. import constants 12 | from .message_base import MessageBase 13 | 14 | 15 | class Post(MessageBase): 16 | """ 17 | Class for parsing Post messages. 18 | """ 19 | 20 | def getJson(self) -> str: 21 | """ 22 | Returns the JSON representation of the Post. 23 | """ 24 | return json.dumps({ 25 | 'from': self.sender, 26 | 'subject': self.subject, 27 | 'date': self.date.__format__(self.datetimeFormat) if self.date else None, 28 | 'conversation': self.conversation, 29 | 'body': self.body, 30 | }) 31 | 32 | @functools.cached_property 33 | def conversation(self) -> Optional[str]: 34 | """ 35 | The name of the conversation being posted to. 36 | """ 37 | return self.getStringStream('__substg1.0_0070') 38 | 39 | @property 40 | def headerFormatProperties(self) -> constants.HEADER_FORMAT_TYPE: 41 | return { 42 | '-main details-': { 43 | 'From': self.sender, 44 | 'Posted At': self.date.__format__(self.datetimeFormat) if self.date else None, 45 | 'Conversation': self.conversation, 46 | }, 47 | '-subject-': { 48 | 'Subject': self.subject, 49 | }, 50 | '-importance-': { 51 | 'Importance': self.importanceString, 52 | }, 53 | } 54 | -------------------------------------------------------------------------------- /extract_msg/msg_classes/sticky_note.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import json 3 | 4 | from typing import Optional 5 | 6 | from .. import constants 7 | from ..enums import NoteColor 8 | from .message_base import MessageBase 9 | 10 | 11 | # Note: Sticky note is basically just text and a background color, so we don't 12 | # really do much when saving it. 13 | class StickyNote(MessageBase): 14 | """ 15 | A sticky note. 16 | """ 17 | 18 | def getJson(self) -> str: 19 | return json.dumps({ 20 | 'subject': self.subject, 21 | 'date': self.date.__format__(self.datetimeFormat) if self.date else None, 22 | 'body': self.body, 23 | 'height': self.noteHeight, 24 | 'width': self.noteWidth, 25 | 'color': None if self.noteColor is None else self.noteColor.name.lower(), 26 | }) 27 | 28 | @property 29 | def headerFormatProperties(self) -> constants.HEADER_FORMAT_TYPE: 30 | return None 31 | 32 | @functools.cached_property 33 | def noteColor(self) -> Optional[NoteColor]: 34 | """ 35 | The color of the sticky note. 36 | """ 37 | return self.getNamedAs('8B00', constants.ps.PSETID_NOTE, NoteColor) 38 | 39 | @functools.cached_property 40 | def noteHeight(self) -> Optional[int]: 41 | """ 42 | The height of the note window, in pixels. 43 | """ 44 | return self.getNamedProp('8B03', constants.ps.PSETID_NOTE) 45 | 46 | @functools.cached_property 47 | def noteWidth(self) -> Optional[int]: 48 | """ 49 | The width of the note window, in pixels. 50 | """ 51 | return self.getNamedProp('8B02', constants.ps.PSETID_NOTE) 52 | 53 | @functools.cached_property 54 | def noteX(self) -> Optional[int]: 55 | """ 56 | The distance, in pixels, from the left edge of the screen that a user 57 | interface displays the note. 58 | """ 59 | return self.getNamedProp('8B02', constants.ps.PSETID_NOTE) 60 | 61 | @functools.cached_property 62 | def noteY(self) -> Optional[int]: 63 | """ 64 | The distance, in pixels, from the top edge of the screen that a user 65 | interafce displays the note. 66 | """ 67 | return self.getNamedProp('8B02', constants.ps.PSETID_NOTE) -------------------------------------------------------------------------------- /extract_msg/msg_classes/task_request.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'TaskRequest', 3 | ] 4 | 5 | 6 | import functools 7 | import logging 8 | 9 | from typing import cast, Optional 10 | 11 | from .. import constants 12 | from ..enums import ErrorBehavior, TaskMode, TaskRequestType 13 | from ..exceptions import StandardViolationError 14 | from .message_base import MessageBase 15 | from .task import Task 16 | 17 | 18 | logger = logging.getLogger(__name__) 19 | logger.addHandler(logging.NullHandler()) 20 | 21 | 22 | class TaskRequest(MessageBase): 23 | """ 24 | Class for handling Task Request objects, including Task Accept, Task 25 | Decline, and Task Update. 26 | """ 27 | 28 | @property 29 | def headerFormatProperties(self) -> constants.HEADER_FORMAT_TYPE: 30 | # So this is rather weird. Looks like TaskRequest does not rely on 31 | # headers at all, simply using the body itself for all the data to 32 | # print. So I guess we just return None and handle that. 33 | return None 34 | 35 | @functools.cached_property 36 | def processed(self) -> bool: 37 | """ 38 | Indicates whether a client has already processed a received task 39 | communication. 40 | """ 41 | return bool(self.getPropertyVal('7D01000B')) 42 | 43 | @functools.cached_property 44 | def taskMode(self) -> Optional[TaskMode]: 45 | """ 46 | The assignment status of the embedded Task object. 47 | """ 48 | return self.getNamedAs('8518', constants.ps.PSETID_COMMON, TaskMode) 49 | 50 | @functools.cached_property 51 | def taskObject(self) -> Optional[Task]: 52 | """ 53 | The task object embedded in this Task Request object. 54 | 55 | This function does all of the most basic validation, and so will log 56 | most issues or throw exceptions if there are too many problems. 57 | 58 | :raises StandardViolationError: A standard was blatently violated in a 59 | way that program does not tolerate. 60 | """ 61 | # Get the task object. 62 | # 63 | # The task object MUST be the first attachment, but we will be 64 | # lenient and allow it to be in any position. It not existing, 65 | # however, will not be tolerated. 66 | task = next(((index, att) for index, att in enumerate(self.attachments) if isinstance(att.data, Task)), None) 67 | 68 | if task is None: 69 | if ErrorBehavior.STANDARDS_VIOLATION in self.errorBehavior: 70 | logger.error('Task object not found on TaskRequest object.') 71 | return None 72 | raise StandardViolationError('Task object not found on TaskRequest object.') 73 | 74 | # We know we have the task, let's make sure it's at index 0. If not, 75 | # log it. 76 | if task[0] != 0: 77 | logger.warning('Embedded task object was not located at index 0.') 78 | 79 | return cast(Task, task[1]) 80 | 81 | @functools.cached_property 82 | def taskRequestType(self) -> Optional[TaskRequestType]: 83 | """ 84 | The type of task request. 85 | """ 86 | return self.getStringStreamAs('__substg1.0_001A', TaskRequestType.fromClassType) 87 | -------------------------------------------------------------------------------- /extract_msg/null_date.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'NullDate' 3 | ] 4 | 5 | 6 | import datetime 7 | 8 | from typing import Optional 9 | 10 | 11 | class NullDate(datetime.datetime): 12 | """ 13 | Version of datetime.datetime intended to represent a value of NULL. 14 | 15 | Some properties use different values for a null date, and those need to be 16 | differentiated when packing the data back into an MSG file, which is why 17 | this class exists. Comparisons between NullDate instances will always say 18 | the two dates are equal. 19 | 20 | :attribute filetime: An optional value that can be set to the filetime a 21 | null date should convert back to. 22 | """ 23 | 24 | filetime: Optional[int] = None 25 | 26 | def __eq__(self, other) -> bool: 27 | if isinstance(other, NullDate): 28 | return True 29 | return super().__eq__(other) 30 | 31 | def __ne__(self, other) -> bool: 32 | if isinstance(other, NullDate): 33 | return False 34 | return super().__eq__(other) -------------------------------------------------------------------------------- /extract_msg/properties/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classes and functions involved with managing properties. 3 | """ 4 | 5 | __all__ = [ 6 | 'FixedLengthProp', 7 | 'Named', 8 | 'NamedProperties', 9 | 'NamedPropertyBase', 10 | 'NumericalNamedProperty', 11 | 'PropBase', 12 | 'PropertiesStore', 13 | 'StringNamedProperty', 14 | 'VariableLengthProp', 15 | ] 16 | 17 | 18 | from .named import ( 19 | Named, NamedProperties, NamedPropertyBase, NumericalNamedProperty, 20 | StringNamedProperty 21 | ) 22 | from .prop import FixedLengthProp, PropBase, VariableLengthProp 23 | from .properties_store import PropertiesStore -------------------------------------------------------------------------------- /extract_msg/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMsgExtractor/msg-extractor/68858f016d010ebf591cbcd7ad5fe7c09b40ea7b/extract_msg/py.typed -------------------------------------------------------------------------------- /extract_msg/structures/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | extract_msg.structures - Submodule to help with parsing data structures in MSG 3 | files. Broken up by structure type. 4 | """ 5 | 6 | __all__ = [ 7 | '_helpers', 8 | 'contact_link_entry', 9 | 'business_card', 10 | 'cfoas', 11 | 'contact_link_entry', 12 | 'dev_mode_a', 13 | 'dv_target_device', 14 | 'entry_id', 15 | 'misc_id', 16 | 'mon_stream', 17 | 'odt', 18 | 'ole_pres', 19 | 'ole_stream_struct', 20 | 'recurrence_pattern', 21 | 'report_tag', 22 | 'system_time', 23 | 'time_zone_definition', 24 | 'time_zone_struct', 25 | 'toc_entry', 26 | 'tz_rule', 27 | ] 28 | 29 | from . import ( 30 | _helpers, business_card, cfoas, contact_link_entry, dev_mode_a, 31 | dv_target_device, entry_id, misc_id, mon_stream, odt, ole_pres, 32 | ole_stream_struct, recurrence_pattern, report_tag, system_time, 33 | time_zone_definition, time_zone_struct, toc_entry, tz_rule 34 | ) -------------------------------------------------------------------------------- /extract_msg/structures/cfoas.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'ClipboardFormatOrAnsiString', 3 | ] 4 | 5 | 6 | from typing import Optional, Union 7 | 8 | from .. import constants 9 | from ._helpers import BytesReader 10 | from ..enums import ClipboardFormat 11 | 12 | 13 | class ClipboardFormatOrAnsiString: 14 | def __init__(self, reader: Optional[Union[bytes, BytesReader]] = None): 15 | if reader is None: 16 | self.__markerOrLength = 0 17 | self.__clipboardFormat = None 18 | self.__ansiString = None 19 | return 20 | 21 | if isinstance(reader, bytes): 22 | reader = BytesReader(reader) 23 | 24 | self.__markerOrLength = reader.readUnsignedInt() 25 | if self.__markerOrLength > 0xFFFFFFFD: 26 | self.__ansiString = None 27 | self.__clipboardFormat = ClipboardFormat(reader.readUnsignedInt()) 28 | elif self.__markerOrLength > 0: 29 | self.__ansiString = reader.read(self.__markerOrLength) 30 | self.__clipboardFormat = None 31 | else: 32 | self.__ansiString = None 33 | self.__clipboardFormat = None 34 | 35 | def __bytes__(self) -> bytes: 36 | return self.toBytes() 37 | 38 | def toBytes(self) -> bytes: 39 | ret = constants.st.ST_LE_UI32.pack(self.markerOrLength) 40 | if self.markerOrLength > 0xFFFFFFFD: 41 | ret += constants.st.ST_LE_UI32.pack(self.clipboardFormat) 42 | elif self.markerOrLength > 0: 43 | ret += self.__ansiString 44 | return ret 45 | 46 | @property 47 | def ansiString(self) -> Optional[bytes]: 48 | """ 49 | The null-terminated ANSI string, as bytes, of the name of a registered 50 | clipboard format. Only set if markerOrLength is not ``0x00000000``, 51 | ``0xFFFFFFFE``, or ``0xFFFFFFFF``. 52 | 53 | Setting this will modify the markerOrLength field automatically. 54 | """ 55 | return self.__ansiString 56 | 57 | @ansiString.setter 58 | def ansiString(self, val: bytes) -> None: 59 | if not val: 60 | raise ValueError('Cannot set :property ansiString: to None or empty bytes.') 61 | 62 | self.__ansiString = val 63 | 64 | @property 65 | def clipboardFormat(self) -> Optional[ClipboardFormat]: 66 | """ 67 | The clipboard format, if any. 68 | 69 | To set this, make sure that :property markerOrLength: is ``0xFFFFFFFE`` 70 | or ``0xFFFFFFFF`` *before* setting. 71 | """ 72 | return self.__clipboardFormat 73 | 74 | @clipboardFormat.setter 75 | def clipboardFormat(self, val: ClipboardFormat) -> None: 76 | if not val: 77 | raise ValueError('Cannot set clipboard format to None.') 78 | if self.markerOrLength < 0xFFFFFFFE: 79 | raise ValueError('Cannot set the clipboard format while the marker or length is not 0xFFFFFFFE or 0xFFFFFFFF') 80 | self.__clipboardFormat = val 81 | 82 | @property 83 | def markerOrLength(self) -> int: 84 | """ 85 | If set the 0x00000000, then neither the format property nor the 86 | ansiString property will be set. If it is 0xFFFFFFFF or 0xFFFFFFFE, then 87 | the clipboardFormat property will be set. Otherwise, the ansiString 88 | property 89 | will be set. 90 | """ 91 | return self.__markerOrLength 92 | 93 | @markerOrLength.setter 94 | def markerOrLength(self, val: int) -> None: 95 | if val < 0: 96 | raise ValueError(':property markerOrLength: must be a positive integer.') 97 | if val > 0xFFFFFFFF: 98 | raise ValueError(':property markerOrLength: cannot be greater than 0xFFFFFFFF') 99 | 100 | if val == 0: 101 | self.__ansiString = None 102 | self.__clipboardFormat = None 103 | elif val > 0xFFFFFFFD: 104 | self.__ansiString = None 105 | self.__clipboardFormat = ClipboardFormat.CF_BITMAP 106 | else: 107 | raise ValueError('Cannot set :property markerOrLength: to a length value. Set :property ansiString: instead.') 108 | self.__markerOrLength = val -------------------------------------------------------------------------------- /extract_msg/structures/contact_link_entry.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'ContactLinkEntry', 3 | ] 4 | 5 | 6 | from typing import List 7 | 8 | from ._helpers import BytesReader 9 | from ..constants import st 10 | from .entry_id import EntryID 11 | 12 | 13 | class ContactLinkEntry: 14 | entries: List[EntryID] 15 | 16 | def __init__(self, data: bytes): 17 | # My experience with this data almost entirely doesn't match the 18 | # documentation, so I'm just going to do what I see and not what I'm 19 | # told. 20 | reader = BytesReader(data) 21 | count = reader.readUnsignedInt() 22 | # Ignore this field. 23 | reader.read(4) 24 | self.entries = [] 25 | for _ in range(count): 26 | size = reader.readUnsignedInt() 27 | self.entries.append(EntryID.autoCreate(reader.read(size))) 28 | if (size & 3) != 0: 29 | reader.read(4 - (size & 3)) 30 | 31 | def __bytes__(self) -> bytes: 32 | return self.toBytes() 33 | 34 | def toBytes(self) -> bytes: 35 | ret = st.ST_LE_UI32.pack(len(self.entries)) 36 | 37 | # Need to handle the data before hand. 38 | data = b'' 39 | for entry in self.entries: 40 | entryData = entry.toBytes() 41 | # Size goes before data. 42 | data += st.ST_LE_UI32.pack(edLen := len(entryData)) 43 | data += entryData 44 | # Handle padding. 45 | if edLen & 3: 46 | data += b'\x00' * (4 - edLen) 47 | 48 | ret += st.ST_LE_UI32.pack(len(data)) 49 | ret += data 50 | 51 | return ret 52 | -------------------------------------------------------------------------------- /extract_msg/structures/mon_stream.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'MonikerStream', 3 | ] 4 | 5 | 6 | from typing import final, Optional 7 | 8 | 9 | @final 10 | class MonikerStream: 11 | def __init__(self, data: Optional[bytes] = None): 12 | if data: 13 | self.__clsid = data[:16] 14 | self.__streamData = data[16:] 15 | else: 16 | self.__clsid = b'\x00' * 16 17 | self.__streamData = b'' 18 | 19 | def __bytes__(self) -> bytes: 20 | return self.toBytes() 21 | 22 | def toBytes(self) -> bytes: 23 | return self.__clsid + self.__streamData 24 | 25 | @property 26 | def clsid(self) -> bytes: 27 | """ 28 | The CLSID, as a stream of 16 bytes, of an implementation specific object 29 | capable of processing the stream data. 30 | """ 31 | return self.__clsid 32 | 33 | @clsid.setter 34 | def clsid(self, data: bytes) -> None: 35 | if not isinstance(data, bytes): 36 | raise TypeError('CLSID MUST be bytes.') 37 | if len(data) != 16: 38 | raise ValueError('CLSID MUST be 16 bytes.') 39 | 40 | self.__clsid = data 41 | 42 | @property 43 | def streamData(self) -> bytes: 44 | """ 45 | An array of bytes that specifies the reference to the linked object. 46 | """ 47 | return self.__streamData 48 | 49 | @streamData.setter 50 | def streamData(self, data: bytes) -> None: 51 | if not isinstance(data, bytes): 52 | raise TypeError('Stream data MUST be bytes.') 53 | 54 | self.__streamData = data 55 | 56 | -------------------------------------------------------------------------------- /extract_msg/structures/odt.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'ODTStruct', 3 | ] 4 | 5 | import struct 6 | from typing import final, Optional 7 | 8 | from ..enums import ODTCf, ODTPersist1, ODTPersist2 9 | 10 | 11 | @final 12 | class ODTStruct: 13 | def __init__(self, data: Optional[bytes] = None): 14 | if data: 15 | values = struct.unpack('= 6: 19 | self.__persist2 = ODTPersist2(struct.unpack(' bytes: 28 | return self.toBytes() 29 | 30 | def toBytes(self) -> bytes: 31 | return struct.pack(' ODTCf: 35 | """ 36 | An enum value that specifies the format this OLE object uses to 37 | transmit data to the host application. 38 | """ 39 | return self.__cf 40 | 41 | @cf.setter 42 | def cf(self, value: ODTCf) -> None: 43 | if not isinstance(value, ODTCf): 44 | raise TypeError(':property cf: MUST be of type ODTCf.') 45 | 46 | self.__cf = value 47 | 48 | @property 49 | def odtPersist1(self) -> ODTPersist1: 50 | """ 51 | Flags that specify information about the OLE object. 52 | """ 53 | return self.__persist1 54 | 55 | @odtPersist1.setter 56 | def odtPersist1(self, value: ODTPersist1) -> None: 57 | if not isinstance(value, ODTPersist1): 58 | raise TypeError(':property odtPersist1: MUST be of type ODTPersist1.') 59 | 60 | self.__persist1 = value 61 | 62 | @property 63 | def odtPersist2(self) -> ODTPersist2: 64 | """ 65 | Flags that specify additional information about the OLE object. 66 | """ 67 | return self.__persist2 68 | 69 | @odtPersist2.setter 70 | def odtPersist2(self, value: ODTPersist2) -> None: 71 | if not isinstance(value, ODTPersist2): 72 | raise TypeError(':property odtPersist2: MUST be of type ODTPersist2.') 73 | 74 | self.__persist2 = value 75 | -------------------------------------------------------------------------------- /extract_msg/structures/ole_stream_struct.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'OleStreamStruct', 3 | ] 4 | 5 | 6 | from typing import final, Optional 7 | 8 | from ..constants import st 9 | from ._helpers import BytesReader 10 | from .mon_stream import MonikerStream 11 | 12 | 13 | @final 14 | class OleStreamStruct: 15 | """ 16 | The OLEStream structure, as specified in [MS-OLEDS]. 17 | 18 | Specifically, this is *only* the version that is used for embedded objects. 19 | As such, only some of the fields are ever present. 20 | """ 21 | 22 | def __init__(self, data: Optional[bytes] = None): 23 | """ 24 | :raises TypeError: The data given is not for an embedded object. 25 | """ 26 | self.__rms = None 27 | if not data: 28 | self.__flags = 0 29 | self.__linkUpdateOption = 0 30 | return 31 | reader = BytesReader(data) 32 | # Assert the version. 33 | reader.assertRead(b'\x01\x00\x00\x02', 'Ole stream had invalid version (expected {expected}, got {actual}).') 34 | self.__flags = reader.readUnsignedInt() 35 | if self.__flags & 1: 36 | raise TypeError('Cannot parse an OLEStream structure for a linked object.') 37 | self.__linkUpdateOption = reader.readUnsignedInt() 38 | reader.assertNull(4, 'Ole stream reserved was not null (got {actual}).') 39 | rmsSize = reader.readUnsignedInt() 40 | if rmsSize > 0: 41 | self.__rms = MonikerStream(reader.read(rmsSize - 4)) 42 | 43 | def __bytes__(self) -> bytes: 44 | return self.toBytes() 45 | 46 | def toBytes(self) -> bytes: 47 | ret = b'\x01\x00\x00\x02' 48 | ret += st.ST_LE_UI32.pack(self.__flags) 49 | ret += st.ST_LE_UI32.pack(self.__linkUpdateOption) 50 | ret += b'\x00\x00\x00\x00' 51 | rmsBytes = b'' if self.__rms is None else bytes(self.__rms) 52 | rmsLen = (len(rmsBytes) + 4) if rmsBytes else 0 53 | ret += st.ST_LE_UI32.pack(rmsLen) + rmsBytes 54 | 55 | return ret 56 | 57 | @property 58 | def flags(self) -> int: 59 | """ 60 | The flags for the OLEStream. 61 | 62 | The bit with mask ``0x00001000`` is an implementation-specific hint 63 | supplied by the application or by a higher-level protocol that creates 64 | the data structure. It MAY be ignored on processing. A server 65 | implementation which does not ignore this bit MAY cache the storage 66 | when the bit is set. 67 | 68 | :raises ValueError: The property was set with a bit other than the 69 | implementation specific bit set. 70 | """ 71 | return self.__flags 72 | 73 | @flags.setter 74 | def flags(self, value: int) -> None: 75 | if not isinstance(value, int): 76 | raise TypeError(':property flags: MUST be an int.') 77 | if value != 0 and value != 0x1000: 78 | raise ValueError('Cannot set bits other than the implementation specific one.') 79 | 80 | self.__flags = value 81 | 82 | @property 83 | def linkUpdateOption(self) -> int: 84 | """ 85 | An implementation-specific hint. 86 | 87 | This hint MAY be ignored. On Windows, this field contains values from 88 | the OLEUPDATE enumeration. 89 | """ 90 | return self.__linkUpdateOption 91 | 92 | @linkUpdateOption.setter 93 | def linkUpdateOption(self, value: int) -> None: 94 | if not isinstance(value, int): 95 | raise TypeError(':property linkUpdateOption: MUST be an int.') 96 | if value < 0: 97 | raise ValueError(':property linkUpdateOption: MUST be positive.') 98 | if value > 0xFFFFFFFF: 99 | raise ValueError(':property linkUpdateOption: MUST be less than 0x100000000.') 100 | 101 | self.__linkUpdateOption = value 102 | 103 | @property 104 | def reservedMonikerStream(self) -> Optional[MonikerStream]: 105 | """ 106 | A MonikerStream structure that can contain any arbitrary value. 107 | """ 108 | return self.__rms 109 | 110 | @reservedMonikerStream.setter 111 | def reservedMonikerStream(self, data: Optional[MonikerStream]) -> None: 112 | if data is not None and not isinstance(data, MonikerStream): 113 | raise TypeError('Reserved moniker stream must be a MonikerStream instance or None.') 114 | 115 | self.__rms = data 116 | 117 | 118 | -------------------------------------------------------------------------------- /extract_msg/structures/report_tag.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'ReportTag', 3 | ] 4 | 5 | 6 | from typing import Optional 7 | 8 | from ._helpers import BytesReader 9 | from .entry_id import EntryID, FolderEntryID, MessageEntryID, StoreObjectEntryID 10 | 11 | 12 | class ReportTag: 13 | """ 14 | A Report Tag structure, as defined in [MS-OXOMSG]. 15 | """ 16 | 17 | def __init__(self, data: bytes): 18 | self.__rawData = data 19 | reader = BytesReader(data) 20 | 21 | self.__cookie = reader.assertRead(b'PCDFEB09\x00') 22 | 23 | self.__version = reader.readUnsignedInt() 24 | entrySize = reader.readInt() 25 | if entrySize: 26 | self.__storeEntryID = StoreObjectEntryID(reader.read(entrySize)) 27 | else: 28 | self.__storeEntryID = None 29 | 30 | entrySize = reader.readInt() 31 | if entrySize: 32 | self.__folderEntryID = FolderEntryID(reader.read(entrySize)) 33 | else: 34 | self.__folderEntryID = None 35 | 36 | entrySize = reader.readInt() 37 | if entrySize: 38 | self.__messageEntryID = MessageEntryID(reader.read(entrySize)) 39 | else: 40 | self.__messageEntryID = None 41 | 42 | entrySize = reader.readInt() 43 | if entrySize: 44 | self.__searchFolderEntryID = FolderEntryID(reader.read(entrySize)) 45 | else: 46 | self.__searchFolderEntryID = None 47 | 48 | entrySize = reader.readInt() 49 | if entrySize: 50 | self.__messageSearchKey = reader.read(entrySize) 51 | else: 52 | self.__messageSearchKey = None 53 | 54 | entrySize = reader.readInt() 55 | if entrySize: 56 | self.__ansiText = reader.read(entrySize) 57 | else: 58 | self.__ansiText = None 59 | 60 | def __bytes__(self) -> bytes: 61 | return self.toBytes() 62 | 63 | def toBytes(self) -> bytes: 64 | return self.__rawData 65 | 66 | @property 67 | def ansiText(self) -> Optional[bytes]: 68 | """ 69 | The subject of the original message. 70 | 71 | Set to None if not present. 72 | """ 73 | return self.__ansiText 74 | 75 | @property 76 | def cookie(self) -> bytes: 77 | """ 78 | String used for validation. 79 | 80 | Set to ``b'PCDFEB09\x00'``. 81 | """ 82 | return self.__cookie 83 | 84 | @property 85 | def folderEntryID(self) -> Optional[EntryID]: 86 | """ 87 | The EntryID of the folder than contains the original message. 88 | """ 89 | return self.__folderEntryID 90 | 91 | @property 92 | def messageEntryID(self) -> Optional[EntryID]: 93 | """ 94 | The EntryID of the original message. 95 | """ 96 | return self.__messageEntryID 97 | 98 | @property 99 | def messageSearchKey(self) -> Optional[bytes]: 100 | """ 101 | The search key of the original message. 102 | """ 103 | return self.__messageSearchKey 104 | 105 | @property 106 | def searchFolderEntryID(self) -> Optional[EntryID]: 107 | """ 108 | The EntryID of an alternate folder that contains the original message. 109 | """ 110 | return self.__searchFolderEntryID 111 | 112 | @property 113 | def storeEntryID(self) -> Optional[EntryID]: 114 | """ 115 | The EntryID of the mailbox that contains the original message. 116 | """ 117 | return self.__storeEntryID 118 | 119 | @property 120 | def version(self) -> int: 121 | """ 122 | The version used. 123 | 124 | If SearchFolderEntryID is present, this MUST be ``0x00020001``, 125 | otherwise it MUST be ``0x00010001``. 126 | """ 127 | return self.__version 128 | -------------------------------------------------------------------------------- /extract_msg/structures/system_time.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'SystemTime', 3 | ] 4 | 5 | 6 | from typing import Any, Optional 7 | 8 | from .. import constants 9 | 10 | 11 | class SystemTime: 12 | """ 13 | A SYSTEMTIME struct, as defined in [MS-DTYP]. 14 | """ 15 | def __init__(self, data: Optional[bytes] = None): 16 | data = data or (b'\x00' * 16) 17 | self.unpack(data) 18 | 19 | def __eq__(self, other: Any) -> bool: 20 | return isinstance(other, SystemTime) and bytes(self) == bytes(other) 21 | 22 | def __ne__(self, other: Any) -> bool: 23 | return not self.__eq__(other) 24 | 25 | def __bytes__(self) -> bytes: 26 | return self.toBytes() 27 | 28 | def toBytes(self) -> bytes: 29 | """ 30 | Packs the current data into bytes. 31 | """ 32 | return constants.st.ST_SYSTEMTIME.pack(self.__year, 33 | self.__month, 34 | self.__dayOfWeek, 35 | self.__day, 36 | self.__hour, 37 | self.__minute, 38 | self.__second, 39 | self.__milliseconds) 40 | 41 | def unpack(self, data: bytes) -> None: 42 | """ 43 | Fills out the fields of this instance by unpacking the bytes. 44 | """ 45 | unpacked = constants.st.ST_SYSTEMTIME.unpack(data) 46 | self.__year = unpacked[0] 47 | self.__month = unpacked[1] 48 | self.__dayOfWeek = unpacked[2] 49 | self.__day = unpacked[3] 50 | self.__hour = unpacked[4] 51 | self.__minute = unpacked[5] 52 | self.__second = unpacked[6] 53 | self.__milliseconds = unpacked[7] 54 | 55 | @property 56 | def day(self) -> int: 57 | return self.__day 58 | 59 | @day.setter 60 | def day(self, value: int) -> None: 61 | if value < 0: 62 | raise ValueError('Day must be positive.') 63 | if value > 0xFFFF: 64 | raise ValueError('Day cannot be greater than 0xFFFF.') 65 | 66 | self.__day = value 67 | 68 | @property 69 | def dayOfWeek(self) -> int: 70 | return self.__dayOfWeek 71 | 72 | @dayOfWeek.setter 73 | def dayOfWeek(self, value: int) -> None: 74 | if value < 0: 75 | raise ValueError('Day of week must be positive.') 76 | if value > 0xFFFF: 77 | raise ValueError('Day of week cannot be greater than 0xFFFF.') 78 | 79 | self.__dayOfWeek = value 80 | 81 | @property 82 | def hour(self) -> int: 83 | return self.__hour 84 | 85 | @hour.setter 86 | def hour(self, value: int) -> None: 87 | if value < 0: 88 | raise ValueError('Hour must be positive.') 89 | if value > 0xFFFF: 90 | raise ValueError('Hour cannot be greater than 0xFFFF.') 91 | 92 | self.__hour = value 93 | 94 | @property 95 | def milliseconds(self) -> int: 96 | return self.__milliseconds 97 | 98 | @milliseconds.setter 99 | def milliseconds(self, value: int) -> None: 100 | if value < 0: 101 | raise ValueError('Milliseconds must be positive.') 102 | if value > 0xFFFF: 103 | raise ValueError('Milliseconds cannot be greater than 0xFFFF.') 104 | 105 | self.__milliseconds = value 106 | 107 | @property 108 | def minute(self) -> int: 109 | return self.__minute 110 | 111 | @minute.setter 112 | def minute(self, value: int) -> None: 113 | if value < 0: 114 | raise ValueError('Minute must be positive.') 115 | if value > 0xFFFF: 116 | raise ValueError('Minute cannot be greater than 0xFFFF.') 117 | 118 | self.__minute = value 119 | 120 | @property 121 | def month(self) -> int: 122 | return self.__month 123 | 124 | @month.setter 125 | def month(self, value: int) -> None: 126 | if value < 0: 127 | raise ValueError('Month must be positive.') 128 | if value > 0xFFFF: 129 | raise ValueError('Month cannot be greater than 0xFFFF.') 130 | 131 | self.__month = value 132 | 133 | @property 134 | def second(self) -> int: 135 | return self.__second 136 | 137 | @second.setter 138 | def second(self, value: int) -> None: 139 | if value < 0: 140 | raise ValueError('Second must be positive.') 141 | if value > 0xFFFF: 142 | raise ValueError('Second cannot be greater than 0xFFFF.') 143 | 144 | self.__second = value 145 | 146 | @property 147 | def year(self) -> int: 148 | return self.__year 149 | 150 | @year.setter 151 | def year(self, value: int) -> None: 152 | if value < 0: 153 | raise ValueError('Year must be positive.') 154 | if value > 0xFFFF: 155 | raise ValueError('Year cannot be greater than 0xFFFF.') 156 | 157 | self.__year = value 158 | 159 | -------------------------------------------------------------------------------- /extract_msg/structures/time_zone_definition.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'TimeZoneDefinition', 3 | ] 4 | 5 | 6 | from typing import List, Optional 7 | 8 | from ..constants import st 9 | from ._helpers import BytesReader 10 | from .tz_rule import TZRule 11 | 12 | 13 | class TimeZoneDefinition: 14 | """ 15 | Structure for PidLidAppointmentTimeZoneDefinitionRecur from [MS-OXOCAL]. 16 | """ 17 | 18 | def __init__(self, data: Optional[bytes] = None): 19 | if not data: 20 | self.__majorVersion = 2 21 | self.__minorVersion = 1 22 | self.__keyName = '' 23 | self.__rules = [TZRule()] 24 | return 25 | reader = BytesReader(data) 26 | self.__majorVersion = reader.readUnsignedByte() 27 | self.__minorVersion = reader.readUnsignedByte() 28 | cbHeader = reader.readUnsignedShort() 29 | reader.assertRead(b'\x02\x00') 30 | cchKeyName = reader.readUnsignedShort() 31 | self.__keyName = reader.read(2 * cchKeyName).decode('utf-16-le') 32 | cRules = reader.readUnsignedShort() 33 | if cRules < 1 or cRules > 1024: 34 | raise ValueError('Value for cRules was out of range.') 35 | self.__rules = [reader.readClass(TZRule) for _ in range(cRules)] 36 | 37 | def __bytes__(self) -> bytes: 38 | return self.toBytes() 39 | 40 | def toBytes(self) -> bytes: 41 | # Validate some of the data. 42 | if len(self.__rules) < 1: 43 | raise ValueError('Cannot pack a TimeZoneDefinition with no rules.') 44 | if len(self.__rules) > 1024: 45 | raise ValueError('TimeZoneDefintion can only have up to 1024 rules.') 46 | 47 | ret = bytes((self.__majorVersion, self.__minorVersion)) 48 | ret += st.ST_LE_UI16.pack(6 + 2 * len(self.__keyName)) 49 | ret += b'\x02\x00' 50 | ret += st.ST_LE_UI16.pack(2* len(self.__keyName)) 51 | ret += st.ST_LE_UI16.pack(len(self.__rules)) 52 | ret += b''.join(bytes(x) for x in self.__rules) 53 | 54 | return ret 55 | 56 | @property 57 | def keyName(self) -> str: 58 | """ 59 | The name of the associated time zone. 60 | 61 | Not localized but instead set to the unique name of the desired time 62 | zone. 63 | """ 64 | return self.__keyName 65 | 66 | @keyName.setter 67 | def keyName(self, value: str) -> None: 68 | value = str(value) 69 | if len(value) > 260: 70 | raise ValueError('Key name must be a string less than 261 characters.') 71 | 72 | self.__keyName = value 73 | 74 | @property 75 | def majorVersion(self) -> int: 76 | """ 77 | The major version. 78 | """ 79 | return self.__majorVersion 80 | 81 | @majorVersion.setter 82 | def majorVersion(self, value: int) -> None: 83 | if value > 255: 84 | raise ValueError('Major version cannot be greater than 255') 85 | if value < 0: 86 | raise ValueError('Major version must be positive.') 87 | 88 | self.__minorVersion = value 89 | 90 | @property 91 | def minorVersion(self) -> int: 92 | """ 93 | The minor version. 94 | """ 95 | return self.__minorVersion 96 | 97 | @minorVersion.setter 98 | def minorVersion(self, value: int) -> None: 99 | if value > 255: 100 | raise ValueError('Minor version cannot be greater than 255') 101 | if value < 0: 102 | raise ValueError('Minor version must be positive.') 103 | 104 | self.__minorVersion = value 105 | 106 | @property 107 | def rules(self) -> List[TZRule]: 108 | """ 109 | A tuple of TZRule structures that specifies a time zone. 110 | """ 111 | return self.__rules 112 | -------------------------------------------------------------------------------- /extract_msg/structures/time_zone_struct.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'TimeZoneStruct', 3 | ] 4 | 5 | 6 | from typing import Optional 7 | 8 | from .. import constants 9 | from .system_time import SystemTime 10 | 11 | 12 | class TimeZoneStruct: 13 | """ 14 | A TimeZoneStruct, as specified in [MS-OXOCAL]. 15 | """ 16 | 17 | def __init__(self, data: Optional[bytes] = None): 18 | if not data: 19 | self.__bias = 0 20 | self.__standardBias = 0 21 | self.__daylightBias = 0 22 | self.__standardDate = SystemTime() 23 | self.__daylightDate = SystemTime() 24 | 25 | return 26 | unpacked = constants.st.ST_TZ.unpack(data) 27 | self.__bias = unpacked[0] 28 | self.__standardBias = unpacked[1] 29 | self.__daylightBias = unpacked[2] 30 | self.__standardDate = SystemTime(unpacked[4]) 31 | self.__daylightDate = SystemTime(unpacked[6]) 32 | 33 | def __bytes__(self) -> bytes: 34 | return self.toBytes() 35 | 36 | def toBytes(self) -> bytes: 37 | return constants.st.ST_TZ.pack(self.__bias, 38 | self.__standardBias, 39 | self.__daylightBias, 40 | self.standardYear, 41 | bytes(self.__standardDate), 42 | self.daylightYear, 43 | bytes(self.__daylightDate)) 44 | 45 | @property 46 | def bias(self) -> int: 47 | """ 48 | The time zone's offset in minutes from UTC. 49 | """ 50 | return self.__bias 51 | 52 | @property 53 | def daylightBias(self) -> int: 54 | """ 55 | The offset in minutes from the value of the bias field during daylight 56 | saving time. 57 | """ 58 | return self.__daylightBias 59 | 60 | @property 61 | def daylightDate(self) -> SystemTime: 62 | """ 63 | The date and local time that indicate when to begin using the value 64 | specified in the daylightBias field. Uses the same format as 65 | standardDate. 66 | """ 67 | return self.__daylightDate 68 | 69 | @property 70 | def daylightYear(self) -> int: 71 | """ 72 | The value of the daylightDate field's year. 73 | """ 74 | return self.__daylightDate.year 75 | 76 | @property 77 | def standardBias(self) -> int: 78 | """ 79 | The offset in minutes from the value of the bias field during standard 80 | time. 81 | """ 82 | return self.__standardBias 83 | 84 | @property 85 | def standardDate(self) -> SystemTime: 86 | """ 87 | The date and local time that indicate when to begin using the value 88 | specified in the standardBias field. If the time zone does not support 89 | daylight's savings time, the month member must be 0. If the year is not 90 | 0, then it is an absolute date than only occurs once, otherwise it is a 91 | relative date that occurs yearly. 92 | 93 | See [MS-OXOCAL] for details. 94 | """ 95 | return self.__standardDate 96 | 97 | @property 98 | def standardYear(self) -> int: 99 | """ 100 | The value of the standardDate field's year. 101 | """ 102 | return self.__standardDate.year 103 | -------------------------------------------------------------------------------- /extract_msg/structures/toc_entry.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'TOCEntry', 3 | ] 4 | 5 | 6 | from typing import Optional, Union 7 | 8 | from ._helpers import BytesReader 9 | from .cfoas import ClipboardFormatOrAnsiString 10 | from ..constants import st 11 | from .dv_target_device import DVTargetDevice 12 | from ..enums import ADVF, DVAspect 13 | 14 | 15 | class TOCEntry: 16 | def __init__(self, reader: Optional[Union[bytes, BytesReader]] = None): 17 | if reader is None: 18 | self.__clipFormat = ClipboardFormatOrAnsiString() 19 | self.__aspect = 0 20 | self.__lindex = 0 21 | self.__tymed = 0 22 | self.__advf = 0 23 | self.__targetDevice = DVTargetDevice(None) 24 | return 25 | 26 | if isinstance(reader, bytes): 27 | reader = BytesReader(reader) 28 | 29 | self.__clipFormat = ClipboardFormatOrAnsiString(reader) 30 | targetDeviceSize = reader.readUnsignedInt() 31 | self.__aspect = reader.readUnsignedInt() 32 | self.__lindex = reader.readUnsignedInt() 33 | self.__tymed = reader.readUnsignedInt() 34 | reader.read(12) 35 | self.__advf = reader.readUnsignedInt() 36 | 37 | # Based off the wording of the documentation, it seems like this can't 38 | # actually be 0 bytes, so this should be fine. 39 | self.__targetDevice = DVTargetDevice(reader.read(targetDeviceSize)) 40 | 41 | 42 | def __bytes__(self) -> bytes: 43 | return self.toBytes() 44 | 45 | def toBytes(self) -> bytes: 46 | ret = bytes(self.__clipFormat) 47 | td = bytes(self.__targetDevice) 48 | ret += st.ST_LE_UI32.pack(len(td)) 49 | ret += st.ST_LE_UI32.pack(self.__aspect) 50 | ret += st.ST_LE_UI32.pack(self.__lindex) 51 | ret += st.ST_LE_UI32.pack(self.__tymed) 52 | ret += b'\x00' * 12 53 | ret += st.ST_LE_UI32.pack(self.__advf) 54 | ret += b'\x00' * 4 55 | ret += td 56 | 57 | return ret 58 | 59 | @property 60 | def advf(self) -> Union[int, ADVF]: 61 | """ 62 | An implementation specific hint on how to render the presentation data 63 | on screen. May be ignored on processing. 64 | """ 65 | return self.__advf 66 | 67 | @advf.setter 68 | def advf(self, val: Union[int, ADVF]) -> None: 69 | if not isinstance(val, int): 70 | raise TypeError(':property advf: must be an int.') 71 | if val < 0: 72 | raise ValueError(':property advf: must be positive.') 73 | if val > 0xFFFFFFFF: 74 | raise ValueError(':property advf: cannot be greater than 0xFFFFFFFF.') 75 | 76 | self.__advf = val 77 | 78 | @property 79 | def ansiClipboardFormat(self) -> ClipboardFormatOrAnsiString: 80 | return self.__clipFormat 81 | 82 | @property 83 | def aspect(self) -> Union[int, DVAspect]: 84 | """ 85 | An implementation specific hint on how to render the presentation data 86 | on screen. May be ignored on processing. 87 | """ 88 | return self.__aspect 89 | 90 | @aspect.setter 91 | def aspect(self, val: Union[int, DVAspect]) -> None: 92 | if not isinstance(val, int): 93 | raise TypeError(':property aspect: must be an int.') 94 | if val < 0: 95 | raise ValueError(':property aspect: must be positive.') 96 | if val > 0xFFFFFFFF: 97 | raise ValueError(':property aspect: cannot be greater than 0xFFFFFFFF.') 98 | 99 | self.__aspect = val 100 | 101 | @property 102 | def lindex(self) -> int: 103 | """ 104 | An implementation specific hint on how to render the presentation data 105 | on screen. May be ignored on processing. 106 | """ 107 | return self.__lindex 108 | 109 | @lindex.setter 110 | def lindex(self, val: int) -> None: 111 | if not isinstance(val, int): 112 | raise TypeError(':property lindex: must be an int.') 113 | if val < 0: 114 | raise ValueError(':property lindex: must be positive.') 115 | if val > 0xFFFFFFFF: 116 | raise ValueError(':property lindex: cannot be greater than 0xFFFFFFFF.') 117 | 118 | self.__lindex = val 119 | 120 | @property 121 | def targetDevice(self) -> DVTargetDevice: 122 | return self.__targetDevice 123 | 124 | @property 125 | def tymed(self) -> int: 126 | return self.__tymed 127 | 128 | @tymed.setter 129 | def tymed(self, val: int) -> None: 130 | if not isinstance(val, int): 131 | raise TypeError(':property lindex: must be an int.') 132 | if val < 0: 133 | raise ValueError(':property lindex: must be positive.') 134 | if val > 0xFFFFFFFF: 135 | raise ValueError(':property lindex: cannot be greater than 0xFFFFFFFF.') 136 | 137 | self.__tymed = val 138 | -------------------------------------------------------------------------------- /extract_msg/structures/tz_rule.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'TZRule', 3 | ] 4 | 5 | 6 | import logging 7 | 8 | from struct import Struct 9 | from typing import Final, final, Optional 10 | 11 | from ..enums import TZFlag 12 | from ._helpers import BytesReader 13 | from .system_time import SystemTime 14 | 15 | 16 | logger = logging.getLogger(__name__) 17 | logger.addHandler(logging.NullHandler()) 18 | 19 | 20 | @final 21 | class TZRule: 22 | """ 23 | A TZRule structure, as defined in [MS-OXOCAL]. 24 | """ 25 | 26 | __SIZE__: int = 66 27 | __struct: Final[Struct] = Struct('4B2H14x3i16s16s') 28 | 29 | def __init__(self, data: Optional[bytes] = None): 30 | if not data: 31 | self.__majorVersion = 2 32 | self.__minorVersion = 1 33 | self.__flags = TZFlag(0) 34 | self.__year = 0 35 | self.__bias = 0 36 | self.__standardBias = 0 37 | self.__daylightBias = 0 38 | self.__standardDate = SystemTime() 39 | self.__daylightDate = SystemTime() 40 | return 41 | 42 | reader = BytesReader(data) 43 | self.__majorVersion = reader.readUnsignedByte() 44 | self.__minorVersion = reader.readUnsignedByte() 45 | reader.assertRead(b'\x3E\x00') 46 | self.__flags = TZFlag(reader.readUnsignedShort()) 47 | self.__year = reader.readUnsignedShort() 48 | # This *MUST* be null, however I've seen Outlook not follow that. Simply 49 | # log a warning about it even though it's a violation. 50 | if any(b := reader.read(14)): 51 | logger.warning(f'Read TZRule with non-null X section (got {b}).') 52 | self.__bias = reader.readInt() 53 | self.__standardBias = reader.readInt() 54 | self.__daylightBias = reader.readInt() 55 | self.__standardDate = SystemTime(reader.read(16)) 56 | self.__daylightDate = SystemTime(reader.read(16)) 57 | 58 | def __bytes__(self) -> bytes: 59 | return self.toBytes() 60 | 61 | def toBytes(self) -> bytes: 62 | return self.__struct.pack(self.__majorVersion, 63 | self.__minorVersion, 64 | 62, 65 | 0, 66 | self.__flags, 67 | self.__year, 68 | self.__bias, 69 | self.__standardBias, 70 | self.__daylightBias, 71 | bytes(self.__standardDate), 72 | bytes(self.__daylightDate)) 73 | 74 | @property 75 | def bias(self) -> int: 76 | """ 77 | The time zone's offset in minutes from UTC. 78 | """ 79 | return self.__bias 80 | 81 | @property 82 | def daylightBias(self) -> int: 83 | """ 84 | The offset in minutes from the value of the bias field during daylight 85 | saving time. 86 | """ 87 | return self.__daylightBias 88 | 89 | @property 90 | def daylightDate(self) -> SystemTime: 91 | """ 92 | The date and local time that indicate when to begin using the value 93 | specified in the daylightBias field. Uses the same format as 94 | standardDate. 95 | """ 96 | return self.__daylightDate 97 | 98 | @property 99 | def flags(self) -> TZFlag: 100 | """ 101 | The flags for this rule. 102 | """ 103 | return self.__flags 104 | 105 | @property 106 | def majorVersion(self) -> int: 107 | """ 108 | The major version. 109 | """ 110 | return self.__majorVersion 111 | 112 | @property 113 | def minorVersion(self) -> int: 114 | """ 115 | The minor version. 116 | """ 117 | return self.__minorVersion 118 | 119 | @property 120 | def standardBias(self) -> int: 121 | """ 122 | The offset in minutes from the value of the bias field during standard 123 | time. 124 | """ 125 | return self.__standardBias 126 | 127 | @property 128 | def standardDate(self) -> SystemTime: 129 | """ 130 | The date and local time that indicate when to begin using the value 131 | specified in the standardBias field. If the time zone does not support 132 | daylight's savings time, the month member must be 0. If the year is not 133 | 0, then it is an absolute date than only occurs once, otherwise it is a 134 | relative date that occurs yearly. 135 | 136 | See [MS-OXOCAL] for details. 137 | """ 138 | return self.__standardDate 139 | 140 | @property 141 | def year(self) -> int: 142 | """ 143 | The year this rule is scheduled to take place. A rule will remain in 144 | effect from January 1 of it's year until January 1 of the next rule's 145 | year field. If no rules exist for subsequent years, this rule will 146 | remain in effect indefinately. 147 | """ 148 | return self.__year 149 | -------------------------------------------------------------------------------- /extract_msg_tests/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'AttachmentTests', 3 | 'CommandLineTests', 4 | 'OleWriterEditingTests', 5 | 'OleWriterExportTests', 6 | 'PropTests', 7 | 'UtilTests', 8 | 'ValidationTests', 9 | ] 10 | 11 | from .attachment_tests import AttachmentTests 12 | from .cmd_line_tests import CommandLineTests 13 | from .ole_writer_tests import OleWriterEditingTests, OleWriterExportTests 14 | from .prop_tests import PropTests 15 | from .util_tests import UtilTests 16 | from .validation_tests import ValidationTests 17 | -------------------------------------------------------------------------------- /extract_msg_tests/attachment_tests.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'AttachmentTests', 3 | ] 4 | 5 | 6 | import unittest 7 | 8 | from typing import Callable, Type, TypeVar 9 | 10 | from .constants import TEST_FILE_DIR 11 | from extract_msg import enums, MSGFile, openMsg, PropertiesStore 12 | from extract_msg.attachments import ( 13 | Attachment, AttachmentBase, BrokenAttachment, UnsupportedAttachment 14 | ) 15 | 16 | 17 | _T = TypeVar('_T', bound = AttachmentBase) 18 | 19 | 20 | class AttachmentTests(unittest.TestCase): 21 | def testBroken(self): 22 | attFunction = _forceAttachmentType(BrokenAttachment) 23 | with openMsg(TEST_FILE_DIR / 'unicode.msg', initAttachment = attFunction) as msg: 24 | self.assertEqual(len(msg.attachments), 2) 25 | 26 | for att in msg.attachments: 27 | self.assertIsInstance(att, BrokenAttachment) 28 | self.assertIs(att.type, enums.AttachmentType.BROKEN) 29 | self.assertIsNone(att.data) 30 | with self.assertRaises(NotImplementedError): 31 | att.save() 32 | self.assertEqual(att.save(skipNotImplemented = True), (enums.SaveType.NONE, None)) 33 | with self.assertRaises(NotImplementedError): 34 | att.getFilename() 35 | 36 | def testNormal(self): 37 | # Just covers a bit of the attachment class. 38 | with openMsg(TEST_FILE_DIR / 'unicode.msg') as msg: 39 | self.assertEqual(len(msg.attachments), 2) 40 | 41 | for att in msg.attachments: 42 | self.assertIsInstance(att, Attachment) 43 | self.assertIs(att.type, enums.AttachmentType.DATA) 44 | self.assertTrue(att.exists('__properties_version1.0')) 45 | self.assertTrue(att.exists('__substg1.0_0FF90102')) 46 | self.assertTrue(att.exists('__substg1.0_37010102')) 47 | self.assertTrue(att.sExists('__substg1.0_3704')) 48 | self.assertTrue(att.sExists('__substg1.0_3707')) 49 | self.assertTrue(att.exists('__substg1.0_370A0102')) 50 | self.assertTrue(att.sExists('__substg1.0_370E')) 51 | 52 | self.assertIsNotNone(att.getFilename()) 53 | self.assertIsNone(att.attachmentEncoding) 54 | self.assertIsNone(att.additionalInformation) 55 | self.assertIsNone(att.cid) 56 | self.assertIs(att.contentId, att.cid) 57 | self.assertEqual(att.mimetype, 'image/tiff') 58 | self.assertIs(att.msg, msg) 59 | 60 | weakRefList = att.treePath 61 | self.assertEqual(len(weakRefList), 2) 62 | self.assertIs(weakRefList[0](), msg) 63 | self.assertIs(weakRefList[1](), att) 64 | 65 | self.assertIsNotNone(att.data) 66 | self.assertIs(att.dataType, bytes) 67 | self.assertIsInstance(att.data, bytes) 68 | 69 | def testUnsupported(self): 70 | attFunction = _forceAttachmentType(UnsupportedAttachment) 71 | with openMsg(TEST_FILE_DIR / 'unicode.msg', initAttachment = attFunction) as msg: 72 | self.assertEqual(len(msg.attachments), 2) 73 | 74 | for att in msg.attachments: 75 | self.assertIsInstance(att, UnsupportedAttachment) 76 | self.assertIs(att.type, enums.AttachmentType.UNSUPPORTED) 77 | self.assertIsNone(att.data) 78 | with self.assertRaises(NotImplementedError): 79 | att.save() 80 | self.assertEqual(att.save(skipNotImplemented = True), (enums.SaveType.NONE, None)) 81 | with self.assertRaises(NotImplementedError): 82 | att.getFilename() 83 | 84 | 85 | def _forceAttachmentType(attType: Type[_T]) -> Callable[[MSGFile, str], _T]: 86 | def createAttachment(msg: MSGFile, dir_: str) -> _T: 87 | propertiesStream = msg.getStream([dir_, '__properties_version1.0']) 88 | propStore = PropertiesStore(propertiesStream, enums.PropertiesType.ATTACHMENT) 89 | return attType(msg, dir_, propStore) 90 | 91 | return createAttachment 92 | -------------------------------------------------------------------------------- /extract_msg_tests/cmd_line_tests.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'CommandLineTests', 3 | ] 4 | 5 | 6 | import subprocess 7 | import sys 8 | import unittest 9 | 10 | from .constants import TEST_FILE_DIR, USER_TEST_DIR 11 | 12 | 13 | class CommandLineTests(unittest.TestCase): 14 | def testStdin(self, testFileDir = TEST_FILE_DIR): 15 | for path in testFileDir.glob('*.msg'): 16 | # First, let's do the file on the disk. 17 | process = subprocess.Popen([sys.executable, '-m', 'extract_msg', '--dump-stdout', str(path)], stdin = subprocess.PIPE, stdout = subprocess.PIPE, stderr = subprocess.PIPE) 18 | # Wait for the process to return data. 19 | stdout1, stderr1 = process.communicate() 20 | 21 | # Now, do the same thing with stdin. 22 | process = subprocess.Popen([sys.executable, '-m', 'extract_msg', '-s', '--dump-stdout'], stdin = subprocess.PIPE, stdout = subprocess.PIPE, stderr = subprocess.PIPE) 23 | with open(path, 'rb') as f: 24 | stdout2, stderr2 = process.communicate(f.read()) 25 | 26 | # Now, compare the two. 27 | with self.subTest(path): 28 | self.assertEqual(stdout1, stdout2) 29 | self.assertEqual(stderr1, stderr2) 30 | 31 | @unittest.skipIf(USER_TEST_DIR is None, 'User test files not defined.') 32 | def testUserStdin(self): 33 | self.testStdin(USER_TEST_DIR) -------------------------------------------------------------------------------- /extract_msg_tests/constants.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'TEST_FILE_DIR', 3 | 'USER_TEST_DIR', 4 | ] 5 | 6 | 7 | import os 8 | 9 | from pathlib import Path 10 | 11 | 12 | TEST_FILE_DIR = Path(__file__).parent.parent / 'example-msg-files' 13 | _utd = None 14 | if bool(userTestDir := os.environ.get('EXTRACT_MSG_TEST_DIR')): 15 | userTestDir = Path(userTestDir) 16 | if userTestDir.exists(): 17 | _utd = userTestDir 18 | 19 | USER_TEST_DIR = _utd 20 | -------------------------------------------------------------------------------- /extract_msg_tests/util_tests.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'UtilTests', 3 | ] 4 | 5 | 6 | import unittest 7 | 8 | from extract_msg import utils 9 | 10 | 11 | class UtilTests(unittest.TestCase): 12 | def test_dictGetCasedKey(self): 13 | caseDict = {'hello': 1, 'HeUtQjWkW': 2} 14 | 15 | self.assertEqual(utils.dictGetCasedKey(caseDict, 'Hello'), 'hello') 16 | self.assertEqual(utils.dictGetCasedKey(caseDict, 'heutqjwkw'), 'HeUtQjWkW') 17 | with self.assertRaises(KeyError): 18 | utils.dictGetCasedKey(caseDict, 'jjjjj') 19 | 20 | def test_divide(self): 21 | inputString = '12345678901234567890' 22 | expectedOutputs = { 23 | 1: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0'], 24 | 2: ['12', '34', '56', '78', '90', '12', '34', '56', '78', '90'], 25 | 3: ['123', '456', '789', '012', '345', '678', '90'], 26 | 4: ['1234', '5678', '9012', '3456', '7890'], 27 | 5: ['12345', '67890', '12345', '67890'], 28 | 6: ['123456', '789012', '345678', '90'], 29 | 7: ['1234567', '8901234', '567890'], 30 | 8: ['12345678', '90123456', '7890'], 31 | 9: ['123456789', '012345678', '90'], 32 | 10: ['1234567890', '1234567890'], 33 | 11: ['12345678901', '234567890'], 34 | } 35 | 36 | for divideBy, expectedResult in expectedOutputs.items(): 37 | self.assertListEqual(utils.divide(inputString, divideBy), expectedResult) 38 | 39 | def test_makeWeakRef(self): 40 | self.assertIsNone(utils.makeWeakRef(None)) 41 | class TestClass: 42 | pass 43 | self.assertIsNotNone(utils.makeWeakRef(TestClass())) 44 | 45 | def test_minutesToDurationStr(self): 46 | self.assertEqual(utils.minutesToDurationStr(0), '0 hours') 47 | self.assertEqual(utils.minutesToDurationStr(1), '1 minute') 48 | self.assertEqual(utils.minutesToDurationStr(2), '2 minutes') 49 | self.assertEqual(utils.minutesToDurationStr(59), '59 minutes') 50 | self.assertEqual(utils.minutesToDurationStr(60), '1 hour') 51 | self.assertEqual(utils.minutesToDurationStr(61), '1 hour 1 minute') 52 | self.assertEqual(utils.minutesToDurationStr(62), '1 hour 2 minutes') 53 | self.assertEqual(utils.minutesToDurationStr(120), '2 hours') 54 | self.assertEqual(utils.minutesToDurationStr(121), '2 hours 1 minute') 55 | self.assertEqual(utils.minutesToDurationStr(122), '2 hours 2 minutes') 56 | 57 | def test_msgPathToStr(self): 58 | self.assertEqual(utils.msgPathToString('hello/world/one'), 'hello/world/one') 59 | self.assertEqual(utils.msgPathToString('hello/world\\one'), 'hello/world/one') 60 | self.assertEqual(utils.msgPathToString(['hello', 'world', 'one']), 'hello/world/one') 61 | self.assertEqual(utils.msgPathToString(['hello\\world', 'one']), 'hello/world/one') -------------------------------------------------------------------------------- /extract_msg_tests/validation_tests.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'ValidationTests', 3 | ] 4 | 5 | 6 | import enum 7 | import re 8 | import struct 9 | import unittest 10 | 11 | 12 | class ValidationTests(unittest.TestCase): 13 | """ 14 | Tests to perform basic validation on parts of the module. 15 | """ 16 | 17 | def testConstants(self): 18 | """ 19 | Tests to validate the constants submodule. 20 | """ 21 | from extract_msg import constants 22 | 23 | # Validate the structs section. 24 | # Check for exports that are not structs. 25 | for name in constants.st.__all__: 26 | with self.subTest(f'Check that {name} is an instance of Struct.'): 27 | self.assertIsInstance(getattr(constants.st, name), struct.Struct) 28 | 29 | # Check for structs that didn't get exported. 30 | for name in dir(constants.st): 31 | if isinstance(getattr(constants.st, name), struct.Struct): 32 | with self.subTest(f'Ensure {name} is exported.'): 33 | self.assertIn(name, constants.st.__all__) 34 | 35 | # Ensure names are all uppercase. 36 | for name in constants.st.__all__: 37 | with self.subTest(f'Ensure {name} is a valid constant name.'): 38 | self.assertTrue(name.isupper()) 39 | 40 | # Validate the regular expressions section. 41 | # Check for exports that are not regular expressions. 42 | for name in constants.re.__all__: 43 | with self.subTest(f'Check that {name} is an instance of Pattern.'): 44 | self.assertIsInstance(getattr(constants.re, name), re.Pattern) 45 | 46 | # Check for regular expressions that didn't get exported. 47 | for name in dir(constants.re): 48 | if isinstance(getattr(constants.re, name), re.Pattern): 49 | with self.subTest(f'Ensure {name} is exported.'): 50 | self.assertIn(name, constants.re.__all__) 51 | 52 | # Ensure names are all uppercase. 53 | for name in constants.re.__all__: 54 | with self.subTest(f'Ensure {name} is a valid constant name.'): 55 | self.assertTrue(name.isupper()) 56 | 57 | # The PropertiesSet section. 58 | # Ensure names are all uppercase. 59 | for name in constants.ps.__all__: 60 | with self.subTest(f'Ensure {name} is a valid constant name.'): 61 | self.assertTrue(name.isupper()) 62 | 63 | # Basic validation of the normal constants section. We have to do 64 | # things a little differently since the submodule are lowercase. 65 | # Ensure names are all uppercase. 66 | for name in constants.__all__: 67 | with self.subTest(f'Ensure {name} is a valid constant name.'): 68 | if not isinstance(getattr(constants, name), type(constants)): 69 | self.assertTrue(name.isupper()) 70 | 71 | def testEnums(self): 72 | """ 73 | Tests to validate the enums submodule. 74 | """ 75 | # First test, make sure everything in enums is actually an enum. Only 76 | # test the actual exports. 77 | from extract_msg import enums 78 | for name in enums.__all__: 79 | with self.subTest(f'Check that {name} is a subclass of Enum.'): 80 | self.assertTrue(issubclass(getattr(enums, name), enum.Enum)) 81 | 82 | # Check for enums that didn't get exported. 83 | for name in dir(enums): 84 | if isinstance(getattr(enums, name), enum.Enum): 85 | with self.subTest(f'Ensure {name} is exported.'): 86 | self.assertIn(name, enums.__all__) -------------------------------------------------------------------------------- /helper-scripts/README.md: -------------------------------------------------------------------------------- 1 | Just a collection of helper scripts for the development of the module. 2 | 3 | detect-prop-overlap.py: A script to detect properties in a file that are using the same name or property ID. Looks for instances of `_ensureSetX` to find conflicts and outputs a list of anything that is duplicated. Takes in the path to a file. 4 | -------------------------------------------------------------------------------- /helper-scripts/detect-prop-overlap.py.old: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | import glob 3 | import re 4 | import sys 5 | 6 | 7 | def main(args): 8 | """ 9 | Checks the specified file(s) for overlapping properties. Properties are 10 | overlapping if they share the same variable name or come from the same 11 | property. This may be intentional for some properties. 12 | """ 13 | raise Exception('This script needs to be rewritten for the new property system.') 14 | if len(args) < 2: 15 | print('Please specify a file to read.') 16 | sys.exit(1) 17 | 18 | pattern = re.compile(r"(?<=self._get)((Named)|(Property)|(Typed)|(Stream))As\('(.*?)'") 19 | 20 | for patt in args[1:]: 21 | for name in glob.glob(patt): 22 | with open(name, 'r', encoding = 'utf-8') as f: 23 | data = f.read() 24 | 25 | #names = tuple(sorted(x.group(5) for x in pattern.finditer(data))) 26 | ids = tuple(sorted(x.group(6) for x in pattern.finditer(data))) 27 | 28 | #duplicateNamesFound = len(names) != len(list(set(names))) 29 | duplicateIdsFound = len(ids) != len(list(set(names))) 30 | 31 | print(name) 32 | 33 | if False:#duplicateNamesFound: 34 | print('\tVariable Names:') 35 | counts = {x: 0 for x in names} 36 | for x in names: 37 | counts[x] += 1 38 | for x in counts: 39 | if counts[x] > 1: 40 | print(f'\t\t{x}') 41 | 42 | if duplicateIdsFound: 43 | print('\tIDs:') 44 | counts = {x: 0 for x in ids} 45 | for x in ids: 46 | counts[x] += 1 47 | for x in counts: 48 | if counts[x] > 1: 49 | print(f'\t\t{x}') 50 | 51 | if not duplicateIdsFound:# and not duplicateNamesFound: 52 | print('\tNo duplicates detected.') 53 | 54 | print() 55 | 56 | 57 | if __name__ == '__main__': 58 | main(sys.argv) 59 | -------------------------------------------------------------------------------- /helper-scripts/produce-dec-table.py: -------------------------------------------------------------------------------- 1 | """ 2 | Basic script to convert a document for a single-byte encoding into a decoding 3 | table for use by the module. 4 | """ 5 | 6 | 7 | import sys 8 | 9 | 10 | conversionDict = {0:"'\\x00'",1:"'\\x01'",2:"'\\x02'",3:"'\\x03'",4:"'\\x04'",5:"'\\x05'",6:"'\\x06'",7:"'\\x07'",8:"'\\x08'",9:"'\\t'",10:"'\\n'",11:"'\\x0b'",12:"'\\x0c'",13:"'\\r'",14:"'\\x0e'",15:"'\\x0f'",16:"'\\x10'",17:"'\\x11'",18:"'\\x12'",19:"'\\x13'",20:"'\\x14'",21:"'\\x15'",22:"'\\x16'",23:"'\\x17'",24:"'\\x18'",25:"'\\x19'",26:"'\\x1a'",27:"'\\x1b'",28:"'\\x1c'",29:"'\\x1d'",30:"'\\x1e'",31:"'\\x1f'",32:"' '",33:"'!'",34:'\'"\'',35:"'#'",36:"'$'",37:"'%'",38:"'&'",39:'"\'"',40:"'('",41:"')'",42:"'*'",43:"'+'",44:"','",45:"'-'",46:"'.'",47:"'/'",48:"'0'",49:"'1'",50:"'2'",51:"'3'",52:"'4'",53:"'5'",54:"'6'",55:"'7'",56:"'8'",57:"'9'",58:"':'",59:"';'",60:"'<'",61:"'='",62:"'>'",63:"'?'",64:"'@'",65:"'A'",66:"'B'",67:"'C'",68:"'D'",69:"'E'",70:"'F'",71:"'G'",72:"'H'",73:"'I'",74:"'J'",75:"'K'",76:"'L'",77:"'M'",78:"'N'",79:"'O'",80:"'P'",81:"'Q'",82:"'R'",83:"'S'",84:"'T'",85:"'U'",86:"'V'",87:"'W'",88:"'X'",89:"'Y'",90:"'Z'",91:"'['",92:"'\\\\'",93:"']'",94:"'^'",95:"'_'",96:"'`'",97:"'a'",98:"'b'",99:"'c'",100:"'d'",101:"'e'",102:"'f'",103:"'g'",104:"'h'",105:"'i'",106:"'j'",107:"'k'",108:"'l'",109:"'m'",110:"'n'",111:"'o'",112:"'p'",113:"'q'",114:"'r'",115:"'s'",116:"'t'",117:"'u'",118:"'v'",119:"'w'",120:"'x'",121:"'y'",122:"'z'",123:"'{'",124:"'|'",125:"'}'",126:"'~'",127:"'\\x7f'",128:"'\\x80'",129:"'\\x81'",130:"'\\x82'",131:"'\\x83'",132:"'\\x84'",133:"'\\x85'",134:"'\\x86'",135:"'\\x87'",136:"'\\x88'",137:"'\\x89'",138:"'\\x8A'",139:"'\\x8B'",140:"'\\x8C'",141:"'\\x8D'",142:"'\\x8E'",143:"'\\x8F'",144:"'\\x90'",145:"'\\x91'",146:"'\\x92'",147:"'\\x93'",148:"'\\x94'",149:"'\\x95'",150:"'\\x96'",151:"'\\x97'",152:"'\\x98'",153:"'\\x99'",154:"'\\x9A'",155:"'\\x9B'",156:"'\\x9C'",157:"'\\x9D'",158:"'\\x9E'",159:"'\\x9F'",160:"'\\xA0'",161:"'\\xA1'",162:"'\\xA2'",163:"'\\xA3'",164:"'\\xA4'",165:"'\\xA5'",166:"'\\xA6'",167:"'\\xA7'",168:"'\\xA8'",169:"'\\xA9'",170:"'\\xAA'",171:"'\\xAB'",172:"'\\xAC'",173:"'\\xAD'",174:"'\\xAE'",175:"'\\xAF'",176:"'\\xB0'",177:"'\\xB1'",178:"'\\xB2'",179:"'\\xB3'",180:"'\\xB4'",181:"'\\xB5'",182:"'\\xB6'",183:"'\\xB7'",184:"'\\xB8'",185:"'\\xB9'",186:"'\\xBA'",187:"'\\xBB'",188:"'\\xBC'",189:"'\\xBD'",190:"'\\xBE'",191:"'\\xBF'",192:"'\\xC0'",193:"'\\xC1'",194:"'\\xC2'",195:"'\\xC3'",196:"'\\xC4'",197:"'\\xC5'",198:"'\\xC6'",199:"'\\xC7'",200:"'\\xC8'",201:"'\\xC9'",202:"'\\xCA'",203:"'\\xCB'",204:"'\\xCC'",205:"'\\xCD'",206:"'\\xCE'",207:"'\\xCF'",208:"'\\xD0'",209:"'\\xD1'",210:"'\\xD2'",211:"'\\xD3'",212:"'\\xD4'",213:"'\\xD5'",214:"'\\xD6'",215:"'\\xD7'",216:"'\\xD8'",217:"'\\xD9'",218:"'\\xDA'",219:"'\\xDB'",220:"'\\xDC'",221:"'\\xDD'",222:"'\\xDE'",223:"'\\xDF'",224:"'\\xE0'",225:"'\\xE1'",226:"'\\xE2'",227:"'\\xE3'",228:"'\\xE4'",229:"'\\xE5'",230:"'\\xE6'",231:"'\\xE7'",232:"'\\xE8'",233:"'\\xE9'",234:"'\\xEA'",235:"'\\xEB'",236:"'\\xEC'",237:"'\\xED'",238:"'\\xEE'",239:"'\\xEF'",240:"'\\xF0'",241:"'\\xF1'",242:"'\\xF2'",243:"'\\xF3'",244:"'\\xF4'",245:"'\\xF5'",246:"'\\xF6'",247:"'\\xF7'",248:"'\\xF8'",249:"'\\xF9'",250:"'\\xFA'",251:"'\\xFB'",252:"'\\xFC'",253:"'\\xFD'",254:"'\\xFE'",255:"'\\xFF'"} 11 | 12 | def symbolToEntry(input: int, output: int) -> str: 13 | if output < 256: 14 | return f'{input}:{conversionDict[output]}' 15 | elif output < 0x10000: 16 | return f'{input}:\'\\u{output:04X}\'' 17 | else: 18 | return f'{input}:\'\\U{output:08X}\'' 19 | 20 | 21 | if __name__ == "__main__": 22 | table = {} 23 | if len(sys.argv) != 2: 24 | print('Invalid number of arguments (requires exactly one file name)!') 25 | exit(-1) 26 | 27 | with open(sys.argv[1], 'r') as f: 28 | for line in f: 29 | # Skip comment lines. 30 | if not line.strip().startswith('#'): 31 | items = line.split('\t') 32 | if len(items) != 1 and items[1]: 33 | table[int(items[0], 16)] = int(items[1], 16) 34 | 35 | with open('.'.join(sys.argv[1].split('.')[:-1] + ['py']), 'w') as f: 36 | # Write the top of the file. 37 | f.write('__all__ = [\n \'decodingTable\',\n]\n\n\ndecodingTable={') 38 | f.write(','.join(symbolToEntry(x, table[x]) for x in table)) 39 | f.write('}') -------------------------------------------------------------------------------- /msg-documentation/[MS-OXMSG].pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeamMsgExtractor/msg-extractor/68858f016d010ebf591cbcd7ad5fe7c09b40ea7b/msg-documentation/[MS-OXMSG].pdf -------------------------------------------------------------------------------- /msg-documentation/source.txt: -------------------------------------------------------------------------------- 1 | Microsoft's documentation of the .msg file format was downloaded from http://msdn.microsoft.com/en-us/library/cc463912%28v=exchg.80%29.aspx (2013-11-27). 2 | -------------------------------------------------------------------------------- /notes/Custom Attachment CLSIDs.txt: -------------------------------------------------------------------------------- 1 | 00020D09-0000-0000-C000-000000000046: Seems to be a link to an outlook object. 2 | 00000316-0000-0000-C000-000000000046: Device Independent Bitmap. 3 | 00000300-0000-0000-C000-000000000046: CLSID_StdOleLink -------------------------------------------------------------------------------- /notes/README.md: -------------------------------------------------------------------------------- 1 | A collection of notes regarding the implementation and behaviors of various 2 | parts of the MSG file format. 3 | -------------------------------------------------------------------------------- /notes/contact business card details.txt: -------------------------------------------------------------------------------- 1 | Here are my current notes: 2 | Image: 3 | For the image, it scales the image to the right size, then aligns it, 4 | using a border of 9 pixels from the edge of the card. 50% would be 132 5 | pixels (calculating the square root of half the card size, that is 6 | pretty close. Square root is ~137 pixels). Multiply the percentage 7 | integer by 264 and divide by 100 and we will call it good enough. 8 | 9 | The scale defined for an image is actually for the area it will be occupying, 10 | not the image itself. 50% means that 50% of the image area is being used. 11 | Here is a full list of how the options behave: 12 | Image left: Image area is left %. Right border is 6-7 pixels. 13 | Fit to Edge: Scales it vertically to match 14 | Top Left: Scales horizontally to match. Places in top left. 15 | Top Center: Scales horizontally to match. Places in top center. 16 | Top Right: Scales horizontally to match. Places in top right. 17 | Center Left: Scales horizontally to match. Places in center left. 18 | Center: Scales horizontally to match. Places in center. 19 | Center Right: Scales horizontally to match. Places in center right. 20 | Bottom Left: Scales horizontally to match. Places in bottom left. 21 | Bottom Center: Scales horizontally to match. Places in bottom center. 22 | Bottom Right: Scales horizontally to match. Places in bottom right. 23 | Image Right: Image area is right %. Left border is 6-7 pixels. 24 | Fit to Edge: Scales it vertically to match 25 | Top Left: 26 | Top Center: 27 | Top Right: 28 | Center Left: 29 | Center: 30 | Center Right: 31 | Bottom Left: 32 | Bottom Center: 33 | Bottom Right: 34 | Image Top: Image area is top %. Bottom border is 6-7 pixels. Top border is none. 35 | Fit to Edge: Scales it horizontally to match. 36 | Top Left: 37 | Top Center: 38 | Top Right: 39 | Center Left: 40 | Center: 41 | Center Right: 42 | Bottom Left: 43 | Bottom Center: 44 | Bottom Right: 45 | Image Bottom: Image area is bottom %. Top border is 6-7 pixels. Bottom border is 9 pixels. 46 | Fit to Edge: Scales it horizontally to match. 47 | Top Left: 48 | Top Center: 49 | Top Right: 50 | Center Left: 51 | Center: 52 | Center Right: 53 | Bottom Left: 54 | Bottom Center: 55 | Bottom Right: 56 | Text Only: No image. 57 | Background Image: 58 | Fit to Edge: Scales it horizontally to match. 59 | Top Left: Seems to want to scale it to leave 8 pixel border, but may not. Places to left. 60 | Top Center: Seems to want to scale it to leave 8 pixel border, but may not. Places in center. 61 | Top Right: Seems to want to scale it to leave 8 pixel border, but may not. Places to right. 62 | Center Left: Same as top left. 63 | Center: Same as top center. 64 | Center Right: Same as top right. 65 | Bottom Left: Same as top left. 66 | Bottom Center: Same as top center. 67 | Bottom Right: Same as top right. 68 | 69 | 70 | Text: 71 | For the text, the "align" is 8 blank pixels on the side you are aligning to. So 72 | the edge of the text will be 8 pixels away from the edge of the canvas. The top 73 | line of text is 10 pixels down from the top, however the way it aligns text from 74 | the top is unclear. More research needed. If the line is going to go off the 75 | page, it does truncate with a "..." and will go all the way to the very edge. 76 | Text on a line below existing rendered line will not be rendered if it does not 77 | entirely fit on the page vertically. 78 | 79 | Additionally, font size may slightly increase distance to border. However, it 80 | never goes below 8. 81 | -------------------------------------------------------------------------------- /pyrightconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "pythonVersion": "3.8", 3 | "pythonPlatform": "All", 4 | "reportUnnecessaryIsInstance": "information", 5 | "reportConstantRedefinition": "error", 6 | "reportDeprecated": "warning" 7 | } -------------------------------------------------------------------------------- /readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.8" 7 | 8 | sphinx: 9 | configuration: docs/conf.py 10 | 11 | python: 12 | install: 13 | - method: pip 14 | extra_requirements: 15 | - readthedocs 16 | path: . -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | 2 | # First level requirements 3 | # Modules using == are *intentional* and must be checked manually before 4 | # upgrading. 5 | olefile==0.47 6 | tzlocal>=4.2,<6 7 | compressed-rtf>=1.0.6,<2 8 | ebcdic>=1.1.1,<2 9 | beautifulsoup4>=4.11.1,<4.14 10 | RTFDE>=0.1.1,<0.2 11 | red-black-tree-mod>=1.20, <=1.23 12 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | python-tag=py3 3 | 4 | [options.extras_require] 5 | all = 6 | extract-msg[mime] 7 | extract-msg[image] 8 | extract-msg[encoding] 9 | mime = 10 | python-magic>=0.4.27,<0.5 11 | image = 12 | Pillow>=9.5.0,<10 13 | encoding = 14 | chardet>=3.0.0,<6 # This can probably be unbound. 15 | readthedocs = 16 | sphinx-rtd-theme -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | import re 4 | 5 | 6 | # A handful of variables that are used a couple of times. 7 | github_url = 'https://github.com/TeamMsgExtractor/msg-extractor' 8 | main_module = 'extract_msg' 9 | 10 | # Read in the description from README. 11 | with open('README.rst', 'rb') as stream: 12 | long_description = stream.read().decode('utf-8').replace('\r', '') 13 | 14 | # Get the version string this way to avoid issues with modules not being 15 | # installed before setup. 16 | version_re = re.compile("__version__ = '(?P[0-9\\.]*)'") 17 | with open('extract_msg/__init__.py', 'r') as stream: 18 | contents = stream.read() 19 | match = version_re.search(contents) 20 | version = match.groupdict()['version'] 21 | 22 | # Read in the dependencies from the virtualenv requirements file. 23 | dependencies = [] 24 | filename = os.path.join('requirements.txt') 25 | with open(filename, 'r') as stream: 26 | for line in stream: 27 | package = line.strip().split('#')[0] 28 | if package: 29 | dependencies.append(package) 30 | 31 | setup( 32 | name=main_module, 33 | version=version, 34 | description="Extracts emails and attachments saved in Microsoft Outlook's .msg files", 35 | long_description=long_description, 36 | long_description_content_type='text/x-rst', 37 | url=github_url, 38 | download_url='%s/archives/master' % github_url, 39 | author='Destiny Peterson & Matthew Walker', 40 | author_email='arceusthe@gmail.com, mattgwwalker@gmail.com', 41 | license='GPL', 42 | packages=[main_module], 43 | py_modules=[main_module], 44 | entry_points={ 45 | 'console_scripts': ['extract_msg = extract_msg.__main__:main',] 46 | }, 47 | include_package_data=True, 48 | install_requires=dependencies, 49 | python_requires='>=3.8', 50 | ) 51 | -------------------------------------------------------------------------------- /templates/logging-nt.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 1, 3 | "disable_existing_loggers": false, 4 | "formatters": { 5 | "simple": { 6 | "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s" 7 | } 8 | }, 9 | "handlers": { 10 | "console": { 11 | "class": "logging.StreamHandler", 12 | "level": "DEBUG", 13 | "formatter": "simple", 14 | "stream": "ext://sys.stdout" 15 | }, 16 | "info_file_handler": { 17 | "class": "logging.handlers.RotatingFileHandler", 18 | "level": "INFO", 19 | "formatter": "simple", 20 | "filename": "%LOCALAPPDATA%/extract_msg/extract_msg.log", 21 | "maxBytes": 10485760, 22 | "backupCount": 20, 23 | "encoding": "utf8" 24 | }, 25 | "error_file_handler": { 26 | "class": "logging.handlers.RotatingFileHandler", 27 | "level": "ERROR", 28 | "formatter": "simple", 29 | "filename": "%LOCALAPPDATA%/extract_msg/extract_msg.log", 30 | "maxBytes": 10485760, 31 | "backupCount": 20, 32 | "encoding": "utf8" 33 | }, 34 | "warning_file_handler": { 35 | "class": "logging.handlers.RotatingFileHandler", 36 | "level": "WARNING", 37 | "formatter": "simple", 38 | "filename": "%LOCALAPPDATA%/extract_msg/extract_msg.log", 39 | "maxBytes": 10485760, 40 | "backupCount": 20, 41 | "encoding": "utf8" 42 | } 43 | }, 44 | "loggers": { 45 | "my_module": { 46 | "level": "ERROR", 47 | "handlers": ["console"], 48 | "propagate": "no" 49 | } 50 | }, 51 | "root": { 52 | "level": "DEBUG", 53 | "handlers": ["console", "info_file_handler", "error_file_handler", "warning_file_handler"] 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /templates/logging-posix.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 1, 3 | "disable_existing_loggers": false, 4 | "formatters": { 5 | "simple": { 6 | "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s" 7 | } 8 | }, 9 | "handlers": { 10 | "console": { 11 | "class": "logging.StreamHandler", 12 | "level": "DEBUG", 13 | "formatter": "simple", 14 | "stream": "ext://sys.stdout" 15 | }, 16 | "info_file_handler": { 17 | "class": "logging.handlers.RotatingFileHandler", 18 | "level": "INFO", 19 | "formatter": "simple", 20 | "filename": "/var/log/extract_msg/extract_msg.log", 21 | "maxBytes": 10485760, 22 | "backupCount": 20, 23 | "encoding": "utf8" 24 | }, 25 | "error_file_handler": { 26 | "class": "logging.handlers.RotatingFileHandler", 27 | "level": "ERROR", 28 | "formatter": "simple", 29 | "filename": "/var/log/extract_msg/extract_msg.log", 30 | "maxBytes": 10485760, 31 | "backupCount": 20, 32 | "encoding": "utf8" 33 | }, 34 | "warning_file_handler": { 35 | "class": "logging.handlers.RotatingFileHandler", 36 | "level": "WARNING", 37 | "formatter": "simple", 38 | "filename": "/var/log/extract_msg/extract_msg.log", 39 | "maxBytes": 10485760, 40 | "backupCount": 20, 41 | "encoding": "utf8" 42 | } 43 | }, 44 | "loggers": { 45 | "my_module": { 46 | "level": "ERROR", 47 | "handlers": ["console"], 48 | "propagate": "no" 49 | } 50 | }, 51 | "root": { 52 | "level": "DEBUG", 53 | "handlers": ["console", "info_file_handler", "error_file_handler", "warning_file_handler"] 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /tests.py: -------------------------------------------------------------------------------- 1 | """ 2 | The tests for various parts of extract_msg. Tests can be run on user files by 3 | defining the EXTRACT_MSG_TEST_DIR environment variable and setting it to the 4 | folder with your own tests files. It must match the folder structure for the 5 | specific test for that test to run. 6 | """ 7 | import unittest 8 | 9 | from extract_msg_tests import * 10 | 11 | 12 | if __name__ == '__main__': 13 | unittest.main(verbosity = 2) 14 | --------------------------------------------------------------------------------