├── .bumpversion.cfg ├── .github └── workflows │ ├── checks.yml │ ├── publish.yml │ └── tests.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── pyproject.toml ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── files │ ├── NonID3.mp3 │ ├── foo.exe │ ├── foo.gif │ ├── foo.html │ ├── foo.mp3 │ ├── foo.mp4 │ ├── foo.pdf │ ├── foo.ps │ ├── foo.ttf │ ├── foo.txt │ ├── foo.webm │ ├── foo.xml │ └── foo.zip ├── requirements.txt ├── test_main.py ├── test_mimegroups.py └── test_utils.py ├── tox.ini └── xtractmime ├── __init__.py ├── _patterns.py ├── _utils.py └── mimegroups.py /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.2.1 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:xtractmime/__init__.py] 7 | 8 | [bumpversion:file:setup.py] 9 | -------------------------------------------------------------------------------- /.github/workflows/checks.yml: -------------------------------------------------------------------------------- 1 | name: Checks 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | checks: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | fail-fast: false 10 | matrix: 11 | include: 12 | - python-version: 3 13 | env: 14 | TOXENV: black 15 | - python-version: 3 16 | env: 17 | TOXENV: bandit 18 | - python-version: 3 19 | env: 20 | TOXENV: flake8 21 | - python-version: 3.8 22 | env: 23 | TOXENV: typing 24 | 25 | steps: 26 | - uses: actions/checkout@v2 27 | 28 | - name: Set up Python ${{ matrix.python-version }} 29 | uses: actions/setup-python@v2 30 | with: 31 | python-version: ${{ matrix.python-version }} 32 | 33 | - name: Run check 34 | env: ${{ matrix.env }} 35 | run: | 36 | pip install -U pip 37 | pip install -U tox 38 | tox 39 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | on: 3 | release: 4 | types: [published] 5 | 6 | jobs: 7 | publish: 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v2 12 | 13 | - name: Set up Python 3 14 | uses: actions/setup-python@v2 15 | with: 16 | python-version: 3 17 | 18 | - name: Publish to PyPI 19 | run: | 20 | pip install --upgrade pip 21 | pip install --upgrade setuptools wheel twine 22 | python setup.py sdist bdist_wheel 23 | export TWINE_USERNAME=__token__ 24 | export TWINE_PASSWORD=${{ secrets.PYPI_TOKEN }} 25 | twine upload dist/* 26 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | tests-ubuntu: 7 | name: "Test: py${{ matrix.python-version }}, Ubuntu" 8 | runs-on: ubuntu-latest 9 | strategy: 10 | fail-fast: false 11 | matrix: 12 | python-version: [3.7, 3.8, 3.9, "3.10", "3.11", "pypy3.7"] 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | 17 | - name: Set up Python ${{ matrix.python-version }} 18 | uses: actions/setup-python@v4 19 | with: 20 | python-version: ${{ matrix.python-version }} 21 | 22 | - name: Install tox 23 | run: pip install tox 24 | 25 | - name: Run tests 26 | run: tox -e py 27 | 28 | - name: Upload coverage report 29 | run: bash <(curl -s https://codecov.io/bash) 30 | 31 | tests-other-os: 32 | name: "Test: py3.8, ${{ matrix.os }}" 33 | runs-on: "${{ matrix.os }}" 34 | strategy: 35 | fail-fast: false 36 | matrix: 37 | os: [macos-latest, windows-latest] 38 | 39 | steps: 40 | - uses: actions/checkout@v3 41 | 42 | - name: Set up Python 3.8 43 | uses: actions/setup-python@v4 44 | with: 45 | python-version: 3.8 46 | 47 | - name: Install tox 48 | run: pip install tox 49 | 50 | - name: Run tests 51 | run: tox -e py 52 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .~lock* 3 | .DS_Store 4 | .mypy_cache/ 5 | *.egg-info/ 6 | .tox/ 7 | .coverage 8 | htmlcov/ 9 | coverage.xml 10 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 0.2.1 (2024-01-16) 4 | 5 | A specified content type is now ignored if it is not a valid MIME type. 6 | 7 | ## 0.2.0 (2023-08-31) 8 | 9 | Dropped Python 3.6 support, added official Python 3.10, 3.11 and PyPy support. 10 | 11 | A specified content type is no longer ignored for being a variant of 12 | `plain/text`, unless it is one of the 4 specific variants affected by the old 13 | Apache bug [13986](https://bz.apache.org/bugzilla/show_bug.cgi?id=13986). 14 | 15 | ## 0.1.0 (2022-06-21) 16 | 17 | Initial release. 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2021 Akshay Sharma 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # xtractmime 2 | 3 | `xtractmime` is a [BSD-licensed](https://opensource.org/licenses/BSD-3-Clause) 4 | Python 3.7+ implementation of the [MIME Sniffing 5 | Standard](https://mimesniff.spec.whatwg.org/). 6 | 7 | Install from [`PyPI`](https://pypi.python.org/pypi/xtractmime): 8 | 9 | ``` 10 | pip install xtractmime 11 | ``` 12 | 13 | --- 14 | 15 | ## Basic usage 16 | 17 | Below mentioned are some simple examples of using `xtractmime.extract_mime`: 18 | 19 | ```python 20 | >>> from xtractmime import extract_mime 21 | >>> extract_mime(b'Sample text content') 22 | b'text/plain' 23 | >>> extract_mime(b'', content_types=(b'text/html',)) 24 | b'text/html' 25 | ``` 26 | 27 | Additional functionality to check if a MIME type belongs to a specific MIME type group using 28 | methods included in `xtractmime.mimegroups`: 29 | 30 | ```python 31 | >>> from xtractmime.mimegroups import is_html_mime_type, is_image_mime_type 32 | >>> mime_type = b'text/html' 33 | >>> is_html_mime_type(mime_type) 34 | True 35 | >>> is_image_mime_type(mime_type) 36 | False 37 | ``` 38 | 39 | --- 40 | 41 | ## API Reference 42 | 43 | ### function `xtractmime.extract_mime(*args, **kwargs) -> Optional[bytes]` 44 | **Parameters:** 45 | 46 | * `body: bytes` 47 | * `content_types: Optional[Tuple[bytes]] = None` 48 | * `http_origin: bool = True` 49 | * `no_sniff: bool = False` 50 | * `extra_types: Optional[Tuple[Tuple[bytes, bytes, Optional[Set[bytes]], bytes], ...]] = None` 51 | * `supported_types: Set[bytes] = None` 52 | 53 | Return the [MIME type essence](https://mimesniff.spec.whatwg.org/#mime-type-essence) (e.g. `text/html`) matching the input data, or 54 | `None` if no match can be found. 55 | 56 | The `body` parameter is the byte sequence of which MIME type is to be determined. `xtractmime` only considers the first few 57 | bytes of the `body` and the specific number of bytes read is defined in the `xtractmime.RESOURCE_HEADER_BUFFER_LENGTH` constant. 58 | 59 | `content_types` is a tuple of MIME types given in the resource metadata. For example, for resources retrieved via HTTP, users should pass the list of MIME types mentioned in the `Content-Type` header. 60 | 61 | `http_origin` indicates if the resource has been retrieved via HTTP (`True`, default) or not (`False`). 62 | 63 | `no_sniff` is a flag which is *`True`* if the user agent does not wish to 64 | perform sniffing on the resource and *`False`* (by default) otherwise. Users may want to set 65 | this parameter to *`True`* if the [`X-Content-Type-Options`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Content-Type-Options) response header is set to `nosniff`. For more info, see [here](https://mimesniff.spec.whatwg.org/#no-sniff-flag). 66 | 67 | `extra_types` is a tuple of patterns to support detecting additional MIME types. Each entry in the tuple should follow the format 68 | **(Byte Pattern, Pattern Mask, Leading Bytes, MIME type)**: 69 | 70 | * **Byte Pattern** is a byte sequence to compare with the first few bytes (``xtractmime.RESOURCE_HEADER_BUFFER_LENGTH``) of the `body`. 71 | * **Pattern Mask** is a byte sequence that indicates the significance of **Byte Pattern** bytes: `b"\xff"` indicates the matching byte is strictly significant, `b"\xdf"` indicates that the byte is significant in an ASCII case-insensitive way, and `b"\x00"` indicates that the byte is not significant. 72 | * **Leading Bytes** is a set of bytes to be ignored while matching the leading bytes in the content. 73 | * **MIME type** should be returned if the pattern matches. 74 | 75 | **Sample `extra_types`:** 76 | ```python 77 | extra_types = ((b'test', b'\xff\xff\xff\xff', None, b'text/test'), ...) 78 | ``` 79 | 80 | --- 81 | **NOTE** 82 | 83 | *Be careful while using the `extra_types` argument, as it may introduce some privilege escalation vulnerabilities for `xtractmime`. For more info, see [here](https://mimesniff.spec.whatwg.org/#ref-for-mime-type%E2%91%A1%E2%91%A8).* 84 | 85 | --- 86 | 87 | Optional `supported_types` is a set of all [MIME types supported the by user agent](https://mimesniff.spec.whatwg.org/#supported-by-the-user-agent). If `supported_types` is not 88 | specified, all MIME types are assumed to be supported. Using this parameter can improve the performance of `xtractmime`. 89 | 90 | ### function `xtractmime.is_binary_data(input_bytes: bytes) -> bool` 91 | 92 | Return *`True`* if the provided byte sequence contains any binary data bytes, else *`False`* 93 | 94 | ### MIME type group functions 95 | 96 | The following functions return `True` if a given MIME type belongs to a certain 97 | [MIME type group](https://mimesniff.spec.whatwg.org/#mime-type-groups), or 98 | `False` otherwise: 99 | ``` 100 | xtractmime.mimegroups.is_archive_mime_type(mime_type: bytes) -> bool 101 | xtractmime.mimegroups.is_audio_video_mime_type(mime_type: bytes) -> bool 102 | xtractmime.mimegroups.is_font_mime_type(mime_type: bytes) -> bool 103 | xtractmime.mimegroups.is_html_mime_type(mime_type: bytes) -> bool 104 | xtractmime.mimegroups.is_image_mime_type(mime_type: bytes) -> bool 105 | xtractmime.mimegroups.is_javascript_mime_type(mime_type: bytes) -> bool 106 | xtractmime.mimegroups.is_json_mime_type(mime_type: bytes) -> bool 107 | xtractmime.mimegroups.is_scriptable_mime_type(mime_type: bytes) -> bool 108 | xtractmime.mimegroups.is_xml_mime_type(mime_type: bytes) -> bool 109 | xtractmime.mimegroups.is_zip_mime_type(mime_type: bytes) -> bool 110 | ``` 111 | **Example** 112 | ```python 113 | >>> from xtractmime.mimegroups import is_html_mime_type, is_image_mime_type, is_zip_mime_type 114 | >>> mime_type = b'text/html' 115 | >>> is_html_mime_type(mime_type) 116 | True 117 | >>> is_image_mime_type(mime_type) 118 | False 119 | >>> is_zip_mime_type(mime_type) 120 | False 121 | ``` 122 | 123 | 124 | ## Changelog 125 | 126 | See the [changelog](CHANGELOG.md) 127 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 99 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E203, W503 3 | max-line-length = 99 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | 4 | with open("README.md", "r", encoding="utf-8") as desc: 5 | long_description = desc.read() 6 | 7 | setuptools.setup( 8 | name="xtractmime", 9 | version="0.2.1", 10 | license="BSD", 11 | description=( 12 | "Implementation of the MIME Sniffing standard (https://mimesniff.spec.whatwg.org/)" 13 | ), 14 | long_description=long_description, 15 | long_description_content_type="text/markdown", 16 | author="Akshay Sharma", 17 | author_email="akshaysharmajs@gmail.com", 18 | url="https://github.com/scrapy/xtractmime", 19 | packages=["xtractmime"], 20 | python_requires=">=3.7", 21 | classifiers=[ 22 | "Development Status :: 1 - Planning", 23 | "License :: OSI Approved :: BSD License", 24 | "Programming Language :: Python", 25 | "Programming Language :: Python :: 3.7", 26 | "Programming Language :: Python :: 3.8", 27 | "Programming Language :: Python :: 3.9", 28 | "Programming Language :: Python :: 3.10", 29 | "Programming Language :: Python :: 3.11", 30 | "Programming Language :: Python :: Implementation :: CPython", 31 | "Programming Language :: Python :: Implementation :: PyPy", 32 | "Framework :: Scrapy", 33 | "Intended Audience :: Developers", 34 | "Topic :: Internet :: WWW/HTTP", 35 | "Topic :: Software Development :: Libraries :: Application Frameworks", 36 | "Topic :: Software Development :: Libraries :: Python Modules", 37 | ], 38 | ) 39 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/xtractmime/26757354487c1c8ce8c8810fd0404f6c76e8519e/tests/__init__.py -------------------------------------------------------------------------------- /tests/files/NonID3.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/xtractmime/26757354487c1c8ce8c8810fd0404f6c76e8519e/tests/files/NonID3.mp3 -------------------------------------------------------------------------------- /tests/files/foo.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/xtractmime/26757354487c1c8ce8c8810fd0404f6c76e8519e/tests/files/foo.exe -------------------------------------------------------------------------------- /tests/files/foo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/xtractmime/26757354487c1c8ce8c8810fd0404f6c76e8519e/tests/files/foo.gif -------------------------------------------------------------------------------- /tests/files/foo.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | A Sample HTML Document (Test File) 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |

A Sample HTML Document (Test File)

13 |

A blank HTML document for testing purposes.

14 |

Go back to the demo

15 |

Read the HTML5 download attribute guide

16 | 17 | 18 | -------------------------------------------------------------------------------- /tests/files/foo.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/xtractmime/26757354487c1c8ce8c8810fd0404f6c76e8519e/tests/files/foo.mp3 -------------------------------------------------------------------------------- /tests/files/foo.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/xtractmime/26757354487c1c8ce8c8810fd0404f6c76e8519e/tests/files/foo.mp4 -------------------------------------------------------------------------------- /tests/files/foo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/xtractmime/26757354487c1c8ce8c8810fd0404f6c76e8519e/tests/files/foo.pdf -------------------------------------------------------------------------------- /tests/files/foo.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/xtractmime/26757354487c1c8ce8c8810fd0404f6c76e8519e/tests/files/foo.ttf -------------------------------------------------------------------------------- /tests/files/foo.txt: -------------------------------------------------------------------------------- 1 | Quod equidem non reprehendo; 2 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quibus natura iure responderit non esse verum aliunde finem beate vivendi, a se principia rei gerendae peti; Quae enim adhuc protulisti, popularia sunt, ego autem a te elegantiora desidero. Duo Reges: constructio interrete. Tum Lucius: Mihi vero ista valde probata sunt, quod item fratri puto. Bestiarum vero nullum iudicium puto. Nihil enim iam habes, quod ad corpus referas; Deinde prima illa, quae in congressu solemus: Quid tu, inquit, huc? Et homini, qui ceteris animantibus plurimum praestat, praecipue a natura nihil datum esse dicemus? 3 | 4 | Iam id ipsum absurdum, maximum malum neglegi. Quod ea non occurrentia fingunt, vincunt Aristonem; Atqui perspicuum est hominem e corpore animoque constare, cum primae sint animi partes, secundae corporis. Fieri, inquam, Triari, nullo pacto potest, ut non dicas, quid non probes eius, a quo dissentias. Equidem e Cn. An dubium est, quin virtus ita maximam partem optineat in rebus humanis, ut reliquas obruat? 5 | 6 | Quis istum dolorem timet? 7 | Summus dolor plures dies manere non potest? Dicet pro me ipsa virtus nec dubitabit isti vestro beato M. Tubulum fuisse, qua illum, cuius is condemnatus est rogatione, P. Quod si ita sit, cur opera philosophiae sit danda nescio. 8 | 9 | Ex eorum enim scriptis et institutis cum omnis doctrina liberalis, omnis historia. 10 | Quod si ita est, sequitur id ipsum, quod te velle video, omnes semper beatos esse sapientes. Cum enim fertur quasi torrens oratio, quamvis multa cuiusque modi rapiat, nihil tamen teneas, nihil apprehendas, nusquam orationem rapidam coerceas. Ita redarguitur ipse a sese, convincunturque scripta eius probitate ipsius ac moribus. At quanta conantur! Mundum hunc omnem oppidum esse nostrum! Incendi igitur eos, qui audiunt, vides. Vide, ne magis, inquam, tuum fuerit, cum re idem tibi, quod mihi, videretur, non nova te rebus nomina inponere. Qui-vere falsone, quaerere mittimus-dicitur oculis se privasse; Si ista mala sunt, in quae potest incidere sapiens, sapientem esse non esse ad beate vivendum satis. At vero si ad vitem sensus accesserit, ut appetitum quendam habeat et per se ipsa moveatur, quid facturam putas? 11 | 12 | Quem si tenueris, non modo meum Ciceronem, sed etiam me ipsum abducas licebit. 13 | Stulti autem malorum memoria torquentur, sapientes bona praeterita grata recordatione renovata delectant. 14 | Esse enim quam vellet iniquus iustus poterat inpune. 15 | Quae autem natura suae primae institutionis oblita est? 16 | Verum tamen cum de rebus grandioribus dicas, ipsae res verba rapiunt; 17 | Hoc est non modo cor non habere, sed ne palatum quidem. 18 | Voluptatem cum summum bonum diceret, primum in eo ipso parum vidit, deinde hoc quoque alienum; Sed tu istuc dixti bene Latine, parum plane. Nam haec ipsa mihi erunt in promptu, quae modo audivi, nec ante aggrediar, quam te ab istis, quos dicis, instructum videro. Fatebuntur Stoici haec omnia dicta esse praeclare, neque eam causam Zenoni desciscendi fuisse. Non autem hoc: igitur ne illud quidem. Ratio quidem vestra sic cogit. Cum audissem Antiochum, Brute, ut solebam, cum M. An quod ita callida est, ut optime possit architectari voluptates? 19 | 20 | Idemne, quod iucunde? 21 | Haec mihi videtur delicatior, ut ita dicam, molliorque ratio, quam virtutis vis gravitasque postulat. Sed quoniam et advesperascit et mihi ad villam revertendum est, nunc quidem hactenus; Cuius ad naturam apta ratio vera illa et summa lex a philosophis dicitur. Neque solum ea communia, verum etiam paria esse dixerunt. Sed nunc, quod agimus; A mene tu? -------------------------------------------------------------------------------- /tests/files/foo.webm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/xtractmime/26757354487c1c8ce8c8810fd0404f6c76e8519e/tests/files/foo.webm -------------------------------------------------------------------------------- /tests/files/foo.xml: -------------------------------------------------------------------------------- 1 |  2 | 3 | <0> 4 | 4051 5 | manoj 6 | manoj@gmail.com 7 | Test@123 8 | 9 | 7f471974-ae46-4ac0-a882-1980c300c4d6 10 | 11 | 12 | 0 13 | 0 14 | 15 | 0 16 | 1 17 | 1 18 | Images/9b291404-bc2e-4806-88c5-08d29e65a5ad.png 19 | Images/44af97d9-b8c9-4ec1-a099-010671db25b7.png 20 | false 21 | false 22 | false 23 | false 24 | 2020-01-01T11:13:27.1107739 25 | 2020-01-02T09:16:49.284864 26 | 77.389849 27 | 28.6282231 28 | Unnamed Road, Chhijarsi, Sector 63, Noida, Uttar Pradesh 201307, India 29 | 127 30 | 0 31 | 32 | <1> 33 | 4050 34 | pankaj 35 | p1@gmail.com 36 | Test@123 37 | 38 | e269eeef-1de1-4438-885a-e30a9ad26106 39 | 40 | 41 | 0 42 | 0 43 | 44 | 0 45 | 1 46 | 1 47 | 48 | 49 | false 50 | false 51 | false 52 | false 53 | 2020-01-01T07:39:34.1618239 54 | 2020-01-01T07:39:34.161824 55 | 0 56 | 0 57 | 58 | 0 59 | 0 60 | 61 | <2> 62 | 3050 63 | Neeraj1993 64 | neeraj.singh@adequateinfosoft.com 65 | 286956 66 | 67 | 562c2fb5-6799-4b51-8733-a60564c96adc 68 | 69 | 70 | 0 71 | 0 72 | 73 | 0 74 | 1 75 | 1 76 | 77 | 78 | false 79 | false 80 | false 81 | false 82 | 2019-12-27T10:16:05.6578091 83 | 2019-12-27T10:22:30.8416992 84 | 77.389849 85 | 28.6282231 86 | Unnamed Road, Chhijarsi, Sector 63, Noida, Uttar Pradesh 201307, India 87 | 0 88 | 0 89 | 90 | <3> 91 | 3049 92 | Sophia 93 | sophia@gmail.com 94 | Test@123 95 | Yo 96 | f3bc9393-ad13-41a2-a69b-b607a42d829f 97 | 98 | 18302 Lorance Trail, Little Rock, AR 72206, USA 99 | 0 100 | 0 101 | 2019-12-19T11:54:19 102 | 2 103 | 1 104 | 1 105 | 106 | 107 | false 108 | false 109 | false 110 | false 111 | 2019-12-26T07:36:22.3481221 112 | 2019-12-26T07:36:22.3481222 113 | 77.3674236 114 | 28.6260665 115 | 35, Block A, Industrial Area, Sector 62, Noida, Uttar Pradesh 201309, India 116 | 36 117 | 0 118 | 119 | <4> 120 | 3048 121 | Raju Prasad 122 | raju.nsit@gmail.com 123 | Raju@1234 124 | Don't Quit Your Day Dream 125 | b3eda104-0771-4804-8be2-0e6d7c16412d 126 | 127 | Karbala Rd, Block G, Sector 5, Dakshinpuri, New Delhi, Delhi 110044, India 128 | 0 129 | 0 130 | 2019-12-01T01:55:34 131 | 1 132 | 1 133 | 1 134 | Images/291d0a5a-c8c2-45ac-b79f-cc04e0c6f31e.png 135 | Images/7aa45b5d-3adf-4da0-a0fe-694f069e9049.png 136 | true 137 | true 138 | true 139 | true 140 | 2019-12-26T07:17:08.3460039 141 | 2019-12-26T14:27:00.4327446 142 | 77.3674236 143 | 28.6260665 144 | 35, Block A, Industrial Area, Sector 62, Noida, Uttar Pradesh 201309, India 145 | 5 146 | 0 147 | 148 | <5> 149 | 3047 150 | Ankiish Thapliyal 151 | thapliyalankiish958@gmail.com 152 | Test@1234 153 | I failed many opportunities may times but I am the one who tries again and again every time with a smile. 154 | caf070df-d2c2-4e99-ba06-9482ef5eb7ba 155 | 156 | Dehradun 157 | 0 158 | 0 159 | 2019-12-31T03:02:48 160 | 1 161 | 1 162 | 1 163 | Images/0da0ca00-cfaf-41e8-ab39-cbff55ec64b3.png 164 | Images/088fd182-2a2d-4a72-946d-a90d047e4557.png 165 | true 166 | true 167 | true 168 | true 169 | 2019-12-26T07:13:01.9262534 170 | 2019-12-30T11:11:00.4082038 171 | 77.3790036 172 | 28.630186199999997 173 | Suite No. 203 H - 15 Rise tower, Sector 63, H Block, Sector 62, Noida, Uttar Pradesh 201301, India 174 | 167 175 | 0 176 | 177 | <6> 178 | 3046 179 | Aryan Thapliyal 180 | ashithewarrior@gmail.com 181 | Test@1234 182 | Testing is required for an effective performance of software application or product. It's important to ensure that the application should not result into any failures because it can be very expensive in the future or in the later stages of the development 183 | c8cb5636-cd01-4133-adac-360cecc710af 184 | 185 | Dehradun 186 | 0 187 | 0 188 | 2019-12-01T05:20:43 189 | 1 190 | 1 191 | 1 192 | Images/8a8a93f3-5696-4387-84e5-ac632cc85ec1.png 193 | Images/cdaf5fa5-e786-49a6-a1d5-453b532f3d8d.png 194 | false 195 | false 196 | false 197 | false 198 | 2019-12-26T07:10:53.2318238 199 | 2019-12-30T05:32:04.0700419 200 | 77.37901219999999 201 | 28.630237299999997 202 | H15, H Block, Sector 63, Noida, Uttar Pradesh 201301, India 203 | 160 204 | 0 205 | 206 | <7> 207 | 3045 208 | shivam 209 | shivam@gmail.com 210 | Shivam@123 211 | 212 | 59b6f0af-fc25-4a6b-b865-5fb29023c6df 213 | 214 | 215 | 0 216 | 0 217 | 218 | 0 219 | 1 220 | 1 221 | Images/da45be05-a363-4b27-bdf7-1a8c9a912ae1.png 222 | Images/4885d2c3-3c86-4ed7-b28b-e1900d618724.png 223 | false 224 | false 225 | false 226 | false 227 | 2019-12-26T07:01:24.5014557 228 | 2019-12-26T07:02:16.9663295 229 | 77.3674236 230 | 28.6260665 231 | 35, Block A, Industrial Area, Sector 62, Noida, Uttar Pradesh 201309, India 232 | 11 233 | 0 234 | 235 | <8> 236 | 3044 237 | Navya Upadhyay 238 | navya.adequate@gmail.com 239 | navya2123 240 | 241 | 0362f1b5-80be-4dff-bd40-c236c79561eb 242 | 243 | 244 | 0 245 | 0 246 | 247 | 0 248 | 1 249 | 1 250 | 251 | 252 | false 253 | false 254 | false 255 | false 256 | 2019-12-26T06:53:22.1076016 257 | 2019-12-26T06:53:22.1076017 258 | 0 259 | 0 260 | 261 | 5 262 | 0 263 | 264 | <9> 265 | 3043 266 | Ritu singh 267 | ritusinghadequate@gmail.com 268 | ritusingh1234 269 | 270 | a268517f-960a-418a-9c89-48708372702d 271 | 272 | 273 | 0 274 | 0 275 | 276 | 0 277 | 1 278 | 1 279 | 280 | 281 | false 282 | false 283 | false 284 | false 285 | 2019-12-26T06:53:10.2343196 286 | 2019-12-26T06:53:10.2343197 287 | 0 288 | 0 289 | 290 | 5 291 | 0 292 | 293 | <10> 294 | 3042 295 | faiz 296 | faizadequate@gmail.com 297 | Test@1234 298 | I am who I am. Your approval is not needed. 299 | 33dd5829-0a01-4b3b-80d3-1a63a75b3436 300 | 301 | New Delhi 302 | 0 303 | 0 304 | 2019-12-12T12:10:02 305 | 1 306 | 1 307 | 1 308 | Images/80cdd447-4a3a-483f-b222-86989cc3d37e.png 309 | Images/1717f4f1-47e1-4f5f-9129-72d7933aa15a.png 310 | false 311 | false 312 | false 313 | false 314 | 2019-12-26T06:30:58.6524884 315 | 2019-12-30T06:57:19.58494 316 | 77.379012 317 | 28.630232 318 | H15, H Block, Sector 63, Noida, Uttar Pradesh 201301, India 319 | 38 320 | 0 321 | 322 | <11> 323 | 3041 324 | Martin Wilson 325 | ravencomputer667@gmail.com 326 | martin@123 327 | Travelers, 328 | 18ceb759-241c-43d3-a9eb-5344ea2ef1d9 329 | 330 | Moorgate Station (Stop N), London EC2Y 5EJ, UK 331 | 0 332 | 0 333 | 2019-12-11T12:29:13 334 | 1 335 | 1 336 | 1 337 | Images/023d1a36-ef5e-4248-a9a1-900069fe3f28.png 338 | Images/0e773003-964f-42c8-b591-b33fa0563a85.png 339 | true 340 | true 341 | false 342 | false 343 | 2019-12-26T06:22:36.1180762 344 | 2019-12-26T06:29:14.6591852 345 | 0 346 | 0 347 | 348 | 18 349 | 0 350 | 351 | <12> 352 | 3040 353 | Shweta Singh 354 | amelia.claire.hi@gmail.com 355 | Test@1234 356 | What others think of me is none of my business.I’m the most awesome person I know. 357 | e32b4e12-18f3-4fd0-9a3d-2021d8381dff 358 | 359 | India - Pakistan Border Rd, Bakhasar, Rajasthan 344706, India 360 | 0 361 | 0 362 | 2019-12-02T11:47:56 363 | 2 364 | 1 365 | 1 366 | Images/085985b6-0662-4e1d-b5b4-20016d31514f.png 367 | Images/7efb014a-f551-4702-b5af-2fe11cca2a88.png 368 | false 369 | false 370 | false 371 | false 372 | 2019-12-26T06:20:51.0353245 373 | 2019-12-30T06:24:31.9803752 374 | 77.3790045 375 | 28.630205699999998 376 | Suite No. 203 H - 15 Rise tower, Sector 63, H Block, Sector 62, Noida, Uttar Pradesh 201301, India 377 | 77 378 | 0 379 | 380 | <13> 381 | 3039 382 | jagjit 383 | jagjit.singh.adequate.36@gmail.com 384 | Test@1234 385 | Take a chance and live life to the fullest. When we are at our most daring is when we feel the most fulfillment. 386 | 1e150816-7075-4d6d-b3d6-eb1c0e415e61 387 | 388 | Delhi - Meerut Expy, Nai Basti Dundahera, Ghaziabad, Uttar Pradesh 201009, India 389 | 0 390 | 0 391 | 2012-01-03T12:00:00 392 | 1 393 | 1 394 | 1 395 | Images/fe49d058-de88-4805-8e2d-0c002b090d15.png 396 | Images/a3dc98f7-0754-4616-bbc0-0395e19bb899.png 397 | false 398 | false 399 | false 400 | false 401 | 2019-12-26T06:20:32.873132 402 | 2020-01-02T06:29:36.6383214 403 | 77.3790121 404 | 28.630189800000004 405 | H15, H Block, Sector 63, Noida, Uttar Pradesh 201301, India 406 | 32 407 | 0 408 | 409 | <14> 410 | 3038 411 | Ashi 123 412 | ashi123@gmail.com 413 | Test@1234 414 | 415 | 61a43ad3-02b3-4431-8c1b-1d29933147e1 416 | 417 | 418 | 0 419 | 0 420 | 421 | 0 422 | 1 423 | 1 424 | 425 | 426 | false 427 | false 428 | false 429 | false 430 | 2019-12-26T05:30:27.4225101 431 | 2019-12-26T05:30:27.4225102 432 | 0 433 | 0 434 | 435 | 0 436 | 0 437 | 438 | <15> 439 | 3037 440 | shivamvermaa4 441 | shivam.vemaa4@gmail.com 442 | Shivam@123 443 | 444 | 97534ce9-5386-4b92-9741-fc1860bbcd1a 445 | 446 | 447 | 0 448 | 0 449 | 450 | 0 451 | 1 452 | 1 453 | 454 | 455 | false 456 | false 457 | false 458 | false 459 | 2019-12-24T11:58:30.3613651 460 | 2019-12-24T11:58:30.3613651 461 | 77.389849 462 | 28.6282231 463 | Unnamed Road, Chhijarsi, Sector 63, Noida, Uttar Pradesh 201307, India 464 | 10 465 | 0 466 | 467 | <16> 468 | 3036 469 | Sophia 470 | n2@gmail.com 471 | Test@123 472 | WE 473 | 1ebfb0ac-7d98-43fb-ba41-0e40091c6071 474 | 475 | Saint-Louis-Strasse 3, 4056 Basel, Switzerland 476 | 0 477 | 0 478 | 2019-12-18T03:18:47 479 | 1 480 | 1 481 | 1 482 | Images/f3f7694a-0de4-4598-b141-39ad550d1912.png 483 | Images/ae9868ef-f87c-4cc2-8429-5cb46dba6cbd.png 484 | false 485 | false 486 | false 487 | false 488 | 2019-12-24T11:36:36.1739424 489 | 2019-12-26T09:48:57.2586835 490 | 77.389849 491 | 28.6282231 492 | Unnamed Road, Chhijarsi, Sector 63, Noida, Uttar Pradesh 201307, India 493 | 45 494 | 0 495 | 496 | <17> 497 | 3035 498 | n1 499 | n1@gmail.com 500 | Test@123 501 | An ordinary Bharitya 502 | c40422ef-27ac-4e40-adf5-87403143dc99 503 | 504 | 451 DERA THAKRAN HARIDWAR ROAD MAHANT BALBIR SINGH MARKET, Haridwar Road, Haridwar Rd, Chauda Bigha, Rishikesh, Uttarakhand 249201, India 505 | 0 506 | 0 507 | 2019-12-17T12:33:26 508 | 1 509 | 1 510 | 1 511 | Images/4e60754a-e8a7-4db7-b266-f28b2bcf00ae.png 512 | Images/9ca02453-ecfe-4852-9124-aa0595a9e692.png 513 | false 514 | false 515 | false 516 | false 517 | 2019-12-24T11:36:03.4815197 518 | 2020-01-02T07:20:34.2742376 519 | 77.389849 520 | 28.6282231 521 | Unnamed Road, Chhijarsi, Sector 63, Noida, Uttar Pradesh 201307, India 522 | 187 523 | 0 524 | 525 | <18> 526 | 3034 527 | neha 528 | neha@gmail.com 529 | Test@123 530 | 531 | 6aaaa999-8e14-40ca-b043-087624c48fd1 532 | 533 | 534 | 0 535 | 0 536 | 537 | 0 538 | 1 539 | 1 540 | 541 | 542 | false 543 | false 544 | false 545 | false 546 | 2019-12-24T10:35:00.1292083 547 | 2019-12-24T10:35:00.1292083 548 | 77.3674236 549 | 28.6260665 550 | 35, Block A, Industrial Area, Sector 62, Noida, Uttar Pradesh 201309, India 551 | 53 552 | 0 553 | 554 | <19> 555 | 3033 556 | sanjay 557 | sanjay@gmail.com 558 | Test@123 559 | 560 | c9989beb-685a-41d7-b6de-77dd208979df 561 | 562 | 563 | 0 564 | 0 565 | 566 | 0 567 | 1 568 | 1 569 | 570 | 571 | false 572 | false 573 | false 574 | false 575 | 2019-12-24T08:16:23.8466413 576 | 2019-12-24T08:16:23.8466414 577 | 77.3674236 578 | 28.6260665 579 | 35, Block A, Industrial Area, Sector 62, Noida, Uttar Pradesh 201309, India 580 | 81 581 | 0 582 | 583 | <20> 584 | 3032 585 | Akku2 586 | akku2@gmail.com 587 | zxcvbnm1 588 | 589 | 12ac2112-c7b3-47d3-8d48-e839a1ce3955 590 | 591 | 592 | 0 593 | 0 594 | 595 | 0 596 | 1 597 | 1 598 | 599 | 600 | false 601 | false 602 | false 603 | false 604 | 2019-12-24T07:30:32.9445148 605 | 2019-12-24T07:30:32.9445148 606 | 77.3790472 607 | 28.6302199 608 | H15, H Block, Sector 63, Noida, Uttar Pradesh 201301, India 609 | 0 610 | 0 611 | 612 | <21> 613 | 3031 614 | Ashish Thapliyal 615 | ashi1@gmail.com 616 | Test@1234 617 | I Am Who I Am, Your Approval Is Not Needed. I Was Reminded That My Blood Type Is Be Positive 618 | 8889db9b-01f6-4305-b07d-aecc516037bb 619 | 620 | NH34, Shatabdi Nagar, Rithani, Meerut, Uttar Pradesh 250103, India 621 | 0 622 | 0 623 | 1996-10-22T12:00:00 624 | 1 625 | 1 626 | 1 627 | Images/fc8af47d-ef15-4c25-9d3f-a7a58c7444f9.png 628 | Images/92d0e3d1-c075-4dd8-8e8c-196f30e1e689.png 629 | true 630 | true 631 | true 632 | true 633 | 2019-12-24T06:52:08.6757278 634 | 2020-01-02T06:30:51.6595305 635 | 77.3790169 636 | 28.6301942 637 | H15, H Block, Sector 63, Noida, Uttar Pradesh 201301, India 638 | 206 639 | 0 640 | 641 | <22> 642 | 3030 643 | singh 644 | singh@gmail.com 645 | Test@123 646 | jdfgjghjgf 647 | 280a1f00-8597-4eee-8d46-9c8d5a120ac7 648 | 649 | Jg Halli Rd, Karnataka 577533, India 650 | 0 651 | 0 652 | 2019-12-04T03:25:02 653 | 1 654 | 1 655 | 1 656 | Images/2f6c09a2-d758-45af-8724-e69a6b2eeed4.png 657 | Images/4c997464-7384-4f26-bcab-ba6d0f1474b9.png 658 | false 659 | false 660 | false 661 | false 662 | 2019-12-24T06:32:23.0727945 663 | 2019-12-24T09:39:40.1944746 664 | 77.38982399999999 665 | 28.6285824 666 | noidya sector 63 Vidya polymer, Chhijarsi, Sector 63, Noida, Uttar Pradesh 201307, India 667 | 30 668 | 0 669 | 670 | <23> 671 | 3029 672 | Akku Testing 673 | akku1@gmail.com 674 | zxcvbnm1 675 | For some reason you've got "bootstrap-sass": "^3.3.7" so it looks like you are mixing Bootstrap versions and might be using Bootstrap 3, effectively. 676 | 7b0400eb-cd4f-45df-a8db-0c0871d46906 677 | 678 | Noida-Greater Noida Expressway, Amit Nagar, Sadarpur, Greater Noida, Uttar Pradesh, India 679 | 0 680 | 0 681 | 2019-12-18T06:26:55 682 | 2 683 | 1 684 | 1 685 | Images/eba5299c-5405-45cc-ac33-ef725d3e7bec.png 686 | Images/b1d0cff4-3f1f-4571-a638-1e1094eb86bb.png 687 | true 688 | true 689 | true 690 | false 691 | 2019-12-24T06:31:54.9038705 692 | 2019-12-24T13:00:12.9520741 693 | 77.3790246 694 | 28.6302159 695 | H15, H Block, Sector 63, Noida, Uttar Pradesh 201301, India 696 | 161 697 | 0 698 | 699 | <24> 700 | 3028 701 | Aakankshi Gupta 702 | aakankshi.cuminte@gmail.com 703 | 116331365713184193775 704 | 705 | c916c01f-5844-445b-9f5a-e60d77ecb620 706 | 707 | 708 | 0 709 | 0 710 | 711 | 0 712 | 1 713 | 1 714 | Images/9d38d28e-4021-4063-a703-821d1878a18c.png 715 | 716 | false 717 | false 718 | false 719 | false 720 | 2019-12-16T06:56:43.320768 721 | 2019-12-16T06:56:43.3207681 722 | 77.37902439999999 723 | 28.630209599999997 724 | H15, H Block, Sector 63, Noida, Uttar Pradesh 201301, India 725 | 0 726 | 0 727 | 728 | <25> 729 | 3026 730 | Manresh Chandra 5 731 | manresh5@gmail.com 732 | zxcvbnm1 733 | about.me is a personal web hosting service co-founded by Ryan Freitas, Tony Conrad and Tim Young in October 2009.a 734 | 3c8ad706-9667-477a-adb6-3539f7b10a67 735 | 736 | Deák Ferenc tér 737 | 0 738 | 0 739 | 2019-12-11T11:24:07 740 | 3 741 | 1 742 | 1 743 | 744 | 745 | true 746 | true 747 | true 748 | false 749 | 2019-12-16T05:13:28.5717641 750 | 2019-12-16T05:13:28.5717641 751 | 77.3790338 752 | 28.630177699999997 753 | H15, H Block, Sector 63, Noida, Uttar Pradesh 201301, India 754 | 0 755 | 0 756 | 757 | <26> 758 | 3025 759 | Manresh Chandra 4 760 | manresh4@gmail.com 761 | zxcvbnm1 762 | 763 | 8131e59d-7c83-4024-9423-0639a5887b52 764 | 765 | 766 | 0 767 | 0 768 | 769 | 0 770 | 1 771 | 1 772 | 773 | 774 | false 775 | false 776 | false 777 | false 778 | 2019-12-16T05:11:34.473505 779 | 2019-12-16T05:11:34.473505 780 | 0 781 | 0 782 | 783 | 0 784 | 0 785 | 786 | <27> 787 | 3024 788 | Manresh Chandra 3 789 | manresh3@gmail.com 790 | zxcvbnm1 791 | 792 | dc752b59-3d36-4b07-bc5c-6cd5913bae83 793 | 794 | 795 | 0 796 | 0 797 | 798 | 0 799 | 1 800 | 1 801 | 802 | 803 | false 804 | false 805 | false 806 | false 807 | 2019-12-16T05:03:39.4564421 808 | 2019-12-16T05:03:39.4564423 809 | 0 810 | 0 811 | 812 | 3 813 | 0 814 | 815 | <28> 816 | 3023 817 | Manresh Chandra 2 818 | manresh2@gmail.com 819 | zxcvbnm1 820 | 821 | 867f49f2-8af8-45bf-9bd5-28ec641d3c44 822 | 823 | 824 | 0 825 | 0 826 | 827 | 0 828 | 1 829 | 1 830 | 831 | 832 | false 833 | false 834 | false 835 | false 836 | 2019-12-16T05:01:14.0246654 837 | 2019-12-16T05:01:14.0246654 838 | 0 839 | 0 840 | 841 | 0 842 | 0 843 | 844 | <29> 845 | 3022 846 | Raju Prasad 847 | raju.prasad@adequateinfosoft.com 848 | Raju@1234 849 | 850 | 58ecfd42-7f10-440e-8007-29c6fa524040 851 | 852 | 853 | 0 854 | 0 855 | 856 | 0 857 | 1 858 | 1 859 | 860 | 861 | false 862 | false 863 | false 864 | false 865 | 2019-12-13T14:36:34.2151655 866 | 2019-12-13T14:36:34.2151655 867 | 0 868 | 0 869 | 870 | 0 871 | 0 872 | 873 | <30> 874 | 3021 875 | Ashok Patel 876 | ashoktest@gmail.com 877 | ashokpatel457 878 | 879 | 520302ff-b08d-445f-be70-11c02d257e7e 880 | 881 | 882 | 0 883 | 0 884 | 885 | 0 886 | 1 887 | 1 888 | 889 | 890 | false 891 | false 892 | false 893 | false 894 | 2019-12-13T13:47:59.4288322 895 | 2019-12-13T13:47:59.4288322 896 | 0 897 | 0 898 | 899 | 3 900 | 0 901 | 902 | <31> 903 | 2021 904 | manresh1 905 | manresh1@gmail.com 906 | zxcvbnm1 907 | 908 | 9305e1c5-82e2-4a79-b0ca-37bb5880f505 909 | 910 | 911 | 0 912 | 0 913 | 914 | 0 915 | 1 916 | 1 917 | 918 | 919 | false 920 | false 921 | false 922 | false 923 | 2019-12-13T13:17:31.2904215 924 | 2019-12-13T13:17:31.2904216 925 | 0 926 | 0 927 | 928 | 2 929 | 0 930 | 931 | <32> 932 | 2020 933 | Manresh Chandra 934 | manresh@gmail.com 935 | zxcvbnm1 936 | My Name is Manresh Chandra. I'm self Motivated Person. I considers mine a ‘forever student,’ 937 | bc370f5f-7d13-4fe6-9c2c-848198394f8a 938 | 939 | 940 | 0 941 | 0 942 | 943 | 0 944 | 1 945 | 1 946 | 947 | 948 | false 949 | false 950 | false 951 | false 952 | 2019-12-13T07:08:47.9644619 953 | 2019-12-13T07:08:47.9644619 954 | 0 955 | 0 956 | 957 | 3 958 | 0 959 | 960 | <33> 961 | 2019 962 | Test 963 | aakankshi7@gmail.com 964 | zxcvbnm1 965 | test 966 | 5a1218d2-5d66-4255-9137-0f7e700bdcab 967 | 968 | Noida 969 | 0 970 | 0 971 | 2019-11-12T00:00:00 972 | 2 973 | 1 974 | 1 975 | 976 | Images/6f1437c7-7796-4d27-8b9a-041f3c5b86d4.png 977 | false 978 | false 979 | false 980 | false 981 | 2019-12-05T05:39:51.4928759 982 | 2019-12-05T06:41:24.107938 983 | 0 984 | 0 985 | 986 | 2 987 | 0 988 | 989 | <34> 990 | 2018 991 | aakankshi6 992 | aakankshi6@gmail.com 993 | zxcvbnm1 994 | A traveller without observation is a bird without wings . Life is a journey, not a destination. Never stop exploring 995 | 0ab8257f-7574-476d-b96f-508d3917af9b 996 | 997 | Noida 998 | 0 999 | 0 1000 | 2019-12-18T00:00:00 1001 | 3 1002 | 1 1003 | 1 1004 | Images/2e38c3c4-9e55-4b07-aedf-7e91934c167b.png 1005 | Images/938e342d-bd4e-4121-b222-9cb28f670d36.png 1006 | false 1007 | false 1008 | false 1009 | false 1010 | 2019-12-05T05:16:04.4212331 1011 | 2019-12-05T12:25:13.6221215 1012 | 0 1013 | 0 1014 | 1015 | 0 1016 | 0 1017 | 1018 | <35> 1019 | 2017 1020 | aakankshi5 1021 | aakankshi5@gmail.com 1022 | zxcvbnm1 1023 | 1024 | b6fcf970-3271-492b-866c-76238f126c3f 1025 | 1026 | 1027 | 0 1028 | 0 1029 | 1030 | 0 1031 | 1 1032 | 1 1033 | Images/86652baa-c60c-4ef8-8d65-0fda9bde5ae9.png 1034 | Images/51e3fb5e-a9a6-4b3d-8da0-7e882d7774db.png 1035 | false 1036 | false 1037 | false 1038 | false 1039 | 2019-12-05T05:06:49.1838089 1040 | 2019-12-05T05:22:00.9403881 1041 | 0 1042 | 0 1043 | 1044 | 1 1045 | 0 1046 | 1047 | <36> 1048 | 2016 1049 | aakankshi4 1050 | aakankshi4@gmail.com 1051 | zxcvbnm1 1052 | 1053 | 1b43db89-29a4-4966-ae57-86d705978dd3 1054 | 1055 | 1056 | 0 1057 | 0 1058 | 1059 | 0 1060 | 1 1061 | 1 1062 | Images/3a369b02-20f5-46fc-b6d5-811c071d8afa.png 1063 | Images/7b23cd21-0474-473a-9b44-4bf97f67fba9.png 1064 | false 1065 | false 1066 | false 1067 | false 1068 | 2019-12-05T05:04:21.8383669 1069 | 2019-12-05T05:21:57.4139934 1070 | 0 1071 | 0 1072 | 1073 | 0 1074 | 0 1075 | 1076 | <37> 1077 | 2015 1078 | aakankshi3 1079 | aakankshi3@gmail.com 1080 | zxcvbnm1 1081 | 1082 | d65f85e3-0f8d-4c67-8de8-1546a7518cba 1083 | 1084 | 1085 | 0 1086 | 0 1087 | 1088 | 0 1089 | 1 1090 | 1 1091 | Images/a855f15e-f3b5-4873-8fdd-49172f8d5d51.png 1092 | Images/77db74bd-6df5-4dd3-abcc-f021e6486521.png 1093 | false 1094 | false 1095 | false 1096 | false 1097 | 2019-12-05T05:02:05.1655627 1098 | 2019-12-05T05:21:54.2379269 1099 | 0 1100 | 0 1101 | 1102 | 0 1103 | 0 1104 | 1105 | <38> 1106 | 2014 1107 | Aakankshi 2 1108 | aakankshi2@gmail.com 1109 | zxcvbnm1 1110 | ewewrewrew 1111 | 4e2923a9-b059-4dc9-8d92-ce0cffa84208 1112 | 1113 | Dada Dev Mandir Road 1114 | 0 1115 | 0 1116 | 2019-12-11T00:00:00 1117 | 0 1118 | 1 1119 | 1 1120 | 1121 | 1122 | true 1123 | true 1124 | true 1125 | false 1126 | 2019-12-05T05:00:19.5034062 1127 | 2019-12-05T05:00:19.5034063 1128 | 77.3790249 1129 | 28.6302141 1130 | H15, H Block, Sector 63, Noida, Uttar Pradesh 201301, India 1131 | 5 1132 | 0 1133 | 1134 | <39> 1135 | 2013 1136 | Aakankshi 1 1137 | aakankshi1@gmail.com 1138 | zxcvbnm1 1139 | foundations in psychology and sociology and stassy 1140 | d7fa7194-5ac2-4e32-b11b-3792e5cc067c 1141 | 1142 | Delhi 1143 | 0 1144 | 0 1145 | 2019-12-26T06:12:30 1146 | 1 1147 | 1 1148 | 1 1149 | 1150 | 1151 | true 1152 | true 1153 | true 1154 | true 1155 | 2019-12-05T04:38:32.7808454 1156 | 2019-12-06T05:41:05.3597939 1157 | 77.389849 1158 | 28.6282231 1159 | Unnamed Road, Chhijarsi, Sector 63, Noida, Uttar Pradesh 201307, India 1160 | 269 1161 | 36 1162 | 1163 | <40> 1164 | 2012 1165 | Aakankshi Gupta 1166 | aakankshi@gmail.com 1167 | zxcvbnm1 1168 | 1169 | 4c02ee6e-2cb5-4b75-888f-4adb7c790dad 1170 | 1171 | 1172 | 0 1173 | 0 1174 | 1175 | 0 1176 | 1 1177 | 1 1178 | 1179 | 1180 | false 1181 | false 1182 | false 1183 | false 1184 | 2019-12-05T04:35:42.6189131 1185 | 2019-12-05T04:35:42.6189131 1186 | 0 1187 | 0 1188 | 1189 | 3 1190 | 0 1191 | 1192 | <41> 1193 | 2011 1194 | Aankashi 1195 | alokpatel@gmail.com 1196 | pankaj@123 1197 | its me 1198 | 4516169a-58e3-4d04-a355-8c608be5d583 1199 | 1200 | New Delhi 1201 | 0 1202 | 0 1203 | 2019-12-05T00:00:00 1204 | 1 1205 | 1 1206 | 1 1207 | Images/9b0f8891-98ec-4b10-91b8-a9f8fbb6d108.png 1208 | Images/fe2c72d4-8eb6-47e6-9e8e-5d1e15e2b9ea.png 1209 | false 1210 | false 1211 | false 1212 | false 1213 | 2019-11-30T05:36:52.1332523 1214 | 2019-12-05T05:21:49.0948144 1215 | 0 1216 | 0 1217 | 1218 | 4 1219 | 0 1220 | 1221 | <42> 1222 | 2010 1223 | Test 10 1224 | test02@gmail.com 1225 | 123456 1226 | 1227 | ba805b5d-f38f-4a33-9fcb-60f30923479d 1228 | 1229 | 1230 | 0 1231 | 0 1232 | 1233 | 0 1234 | 1 1235 | 1 1236 | 1237 | 1238 | false 1239 | false 1240 | false 1241 | false 1242 | 2019-11-27T10:41:07.8215121 1243 | 2019-11-27T10:41:07.8215122 1244 | 0 1245 | 0 1246 | 1247 | 3 1248 | 0 1249 | 1250 | <43> 1251 | 2009 1252 | Test 9 1253 | test10@gmail.com 1254 | 123456 1255 | 1256 | 1fa26ffa-465e-499f-ae9d-3e36fb94a0ea 1257 | 1258 | 1259 | 0 1260 | 0 1261 | 1262 | 0 1263 | 1 1264 | 1 1265 | 1266 | 1267 | false 1268 | false 1269 | false 1270 | false 1271 | 2019-11-27T10:20:10.5816545 1272 | 2019-11-27T10:20:10.5816545 1273 | 0 1274 | 0 1275 | 1276 | 0 1277 | 0 1278 | 1279 | <44> 1280 | 2008 1281 | Test 8 1282 | test3@test.com 1283 | 123456 1284 | 1285 | 822e8f5c-74ba-41f9-911e-9e5a47351dc5 1286 | 1287 | 1288 | 0 1289 | 0 1290 | 1291 | 0 1292 | 1 1293 | 1 1294 | 1295 | 1296 | false 1297 | false 1298 | false 1299 | false 1300 | 2019-11-27T10:09:42.6969868 1301 | 2019-11-27T10:09:42.6969869 1302 | 0 1303 | 0 1304 | 1305 | 0 1306 | 0 1307 | 1308 | <45> 1309 | 2007 1310 | Neeraj Singh 1311 | neirajsingh100@gmail.com 1312 | 100569704678350206382 1313 | 1314 | 47d25887-1933-4cbc-a4eb-6cc829c9cad6 1315 | 1316 | 1317 | 0 1318 | 0 1319 | 1320 | 0 1321 | 1 1322 | 1 1323 | /images/9c2dbeed-5ce9-4411-a549-255dc35da175.png 1324 | Images/d3cfe52d-93b0-4f35-87c9-d0c2f18f8daa.png 1325 | false 1326 | false 1327 | false 1328 | false 1329 | 2019-11-26T12:42:18.6778716 1330 | 2019-12-05T05:20:40.9827126 1331 | 0 1332 | 0 1333 | 1334 | 2 1335 | 0 1336 | 1337 | <46> 1338 | 2006 1339 | Hr. Niels Henriksen 1340 | contact@adequateinfosoft.com 1341 | 354343138574152 1342 | i am software developer 1343 | 347d684e-da9b-450e-bc94-cd5a99b04234 1344 | 1345 | Jaypee Greens Pari Chowk, Tugalpur Village, Greater Noida, Uttar Pradesh 201310, India 1346 | 0 1347 | 0 1348 | 2019-12-28T06:04:42 1349 | 1 1350 | 1 1351 | 1 1352 | 1353 | Images/ac6a163f-af9f-46db-8327-94ab870000f4.png 1354 | false 1355 | false 1356 | false 1357 | false 1358 | 2019-11-26T05:58:34.894975 1359 | 2019-12-05T05:20:37.5393081 1360 | 77.3790241 1361 | 28.6302086 1362 | H15, H Block, Sector 63, Noida, Uttar Pradesh 201301, India 1363 | 11 1364 | 0 1365 | 1366 | <47> 1367 | 2005 1368 | Test 6 1369 | pankajadequate@gmail.com 1370 | sndsjkdsaj161564sasa 1371 | 1372 | 7a4ae2e1-5b65-4aad-96f0-43336fad5a3b 1373 | 1374 | 1375 | 0 1376 | 0 1377 | 1378 | 0 1379 | 1 1380 | 1 1381 | 1382 | Images/b372a834-231a-4ea1-beef-5c3c3fb33ef2.png 1383 | false 1384 | false 1385 | false 1386 | false 1387 | 2019-11-25T13:12:47.6690112 1388 | 2019-12-05T05:20:34.2935856 1389 | 0 1390 | 0 1391 | 1392 | 2 1393 | 0 1394 | 1395 | <48> 1396 | 2004 1397 | Test 5 1398 | pankajws@gmail.com 1399 | Pankaj@123 1400 | 1401 | 1fd08288-57aa-41d9-ae44-29fcbb0a78a6 1402 | 1403 | 1404 | 0 1405 | 0 1406 | 1407 | 0 1408 | 1 1409 | 1 1410 | 1411 | Images/519c95d8-79d3-4d18-bac8-fc75b817eac9.png 1412 | false 1413 | false 1414 | false 1415 | false 1416 | 2019-11-25T11:29:06.938541 1417 | 2019-12-05T05:20:30.604524 1418 | 0 1419 | 0 1420 | 1421 | 4 1422 | 0 1423 | 1424 | <49> 1425 | 2 1426 | rajat Saxena 1427 | rajat.nirmal@gmail.com 1428 | test 1429 | abcd 1430 | 00fb57db-10ad-4661-b3e0-7366c6698ac0 1431 | 1432 | 3465 1433 | 8.7977 1434 | 5.8348759 1435 | 2019-11-17T16:56:35.8 1436 | 1 1437 | 1 1438 | 1 1439 | Images/0f0ab4a5-4f00-4275-b2dc-be3b157f5e53.png 1440 | Images/0153a968-6dd0-4c9c-9b92-8995e99985d3.png 1441 | false 1442 | false 1443 | false 1444 | false 1445 | 2019-11-17T16:56:35.8 1446 | 2019-12-05T05:21:02.1337931 1447 | 0 1448 | 0 1449 | 1450 | 43 1451 | 0 1452 | 1453 | <50> 1454 | 3 1455 | Pankaj Patel 1456 | Pankaj@gmail.com 1457 | 123456 1458 | 1459 | f6b4bcc8-448b-4b65-84cb-48e9569c5cfe 1460 | 1461 | 1462 | 0 1463 | 0 1464 | 1465 | 0 1466 | 1 1467 | 1 1468 | Images/0eff867e-62ec-43a7-a535-7e83b6b27a30.png 1469 | Images/4bbe816b-a60c-419f-a87b-34508c633d30.png 1470 | false 1471 | false 1472 | false 1473 | false 1474 | 0001-01-01T00:00:00 1475 | 2019-12-05T13:25:01.977078 1476 | 0 1477 | 0 1478 | 1479 | 62 1480 | 0 1481 | 1482 | <51> 1483 | 1003 1484 | Test 2 1485 | pankajabc@gmail.com 1486 | Pankaj@123 1487 | An ordinary bhartiya.. 1488 | c8f53478-4cfb-4382-8531-b44b6d20cd8c 1489 | 1490 | Jaipur Engineering College Rd, Kukas, Rajasthan 302028, India 1491 | 0 1492 | 0 1493 | 2019-12-31T11:06:38 1494 | 2 1495 | 1 1496 | 1 1497 | Images/07cac01f-2a12-4125-ba8b-8eff42333925.png 1498 | Images/db6fce41-9924-4c78-9099-c2890e6f0cac.png 1499 | false 1500 | false 1501 | false 1502 | false 1503 | 0001-01-01T00:00:00 1504 | 2019-12-23T13:43:27.5816869 1505 | 77.389849 1506 | 28.6282231 1507 | Unnamed Road, Chhijarsi, Sector 63, Noida, Uttar Pradesh 201307, India 1508 | 16 1509 | 0 1510 | 1511 | <52> 1512 | 1004 1513 | Test 2 1514 | pankajabcd@gmail.com 1515 | Pankaj@123 1516 | 1517 | 3f92cdef-1d8a-4f08-8468-9938dc9284ce 1518 | 1519 | 1520 | 0 1521 | 0 1522 | 1523 | 0 1524 | 1 1525 | 1 1526 | 1527 | Images/7da06d0e-e110-44e9-b417-4b014fe8a889.png 1528 | false 1529 | false 1530 | false 1531 | false 1532 | 0001-01-01T00:00:00 1533 | 2019-12-05T05:19:23.3650433 1534 | 77.3674236 1535 | 28.6260665 1536 | 35, Block A, Industrial Area, Sector 62, Noida, Uttar Pradesh 201309, India 1537 | 3 1538 | 0 1539 | 1540 | <53> 1541 | 1005 1542 | Test 3 1543 | test@test.com 1544 | 123456 1545 | 1546 | cc2ed6c5-0a59-4aca-9b8e-a686e0a4fa03 1547 | 1548 | 1549 | 0 1550 | 0 1551 | 1552 | 0 1553 | 1 1554 | 1 1555 | 1556 | Images/6d9afca5-3e79-4979-9091-a83d8df7b388.png 1557 | false 1558 | false 1559 | false 1560 | false 1561 | 0001-01-01T00:00:00 1562 | 2019-12-05T05:19:29.704463 1563 | 0 1564 | 0 1565 | 1566 | 44 1567 | 0 1568 | 1569 | <54> 1570 | 1006 1571 | Test 4 1572 | test1@test.com 1573 | 123456 1574 | 1575 | f33f3325-bb08-41a5-b545-d2b5c443720f 1576 | 1577 | 1578 | 0 1579 | 0 1580 | 1581 | 0 1582 | 1 1583 | 1 1584 | Images/b3f2677f-10a8-46e6-8d48-cfba2cc1e0b5.png 1585 | Images/e9845113-f874-469d-8e89-53a86f351887.png 1586 | false 1587 | false 1588 | false 1589 | false 1590 | 0001-01-01T00:00:00 1591 | 2019-12-05T05:22:11.8963884 1592 | 0 1593 | 0 1594 | 1595 | 1 1596 | 0 1597 | 1598 | -------------------------------------------------------------------------------- /tests/files/foo.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapy/xtractmime/26757354487c1c8ce8c8810fd0404f6c76e8519e/tests/files/foo.zip -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest-cov>=2.8 2 | pytest>=5.4 -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from xtractmime import ( 4 | _find_unknown_mimetype, 5 | _sniff_mislabled_binary, 6 | _sniff_mislabled_feed, 7 | extract_mime, 8 | is_binary_data, 9 | ) 10 | 11 | 12 | class TestMain: 13 | 14 | sample_xml1 = b""" 15 | 16 | 17 | 18 | 19 | XYZ 20 | A 21 | 22 | """ 23 | 24 | sample_xml2 = b""" 25 | 26 | 27 | 28 | XYZ 29 | A 30 | 31 | """ 32 | 33 | sample_xml3 = b""" 34 | 35 | 36 | 37 | XYZ 38 | A 39 | 40 | """ 41 | 42 | sample_xml4 = b""" 43 | 48 | """ 49 | 50 | sample_xml5 = b""" 51 | 56 | """ 57 | 58 | extra_types = ((b"test", b"\xff\xff\xff\xff", None, b"text/test"),) 59 | 60 | @pytest.mark.parametrize( 61 | "body,content_types,http_origin,no_sniff,extra_types,supported_types,expected", 62 | [ 63 | ("foo.pdf", None, True, False, None, None, b"application/pdf"), 64 | ("foo.gif", (b"image/gif",), True, True, None, None, b"image/gif"), 65 | ("foo.txt", (b"text/plain",), True, False, None, None, b"text/plain"), 66 | ("foo.xml", (b"text/xml",), True, False, None, None, b"text/xml"), 67 | ("foo.html", (b"text/html",), True, False, None, None, b"text/html"), 68 | ("foo.gif", (b"image/gif",), True, False, None, (b"image/gif",), b"image/gif"), 69 | ("foo.mp4", (b"video/mp4",), True, False, None, (b"video/mp4",), b"video/mp4"), 70 | (b"GIF87a", (b"image/gif",), True, False, None, (b"image/x-icon",), b"image/gif"), 71 | (b"ID3", (b"audio/mpeg",), True, False, None, (b"audio/basic",), b"audio/mpeg"), 72 | (b"\x00\x00\x00\x00", (b"text/test",), True, False, None, None, b"text/test"), 73 | (b"", (b"text/html; charset=utf-8",), True, False, None, None, b"text/html"), 74 | (b"", (b"text/htmlpdfthing",), True, False, None, None, b"text/htmlpdfthing"), 75 | (b"", None, True, False, None, None, b"text/plain"), 76 | ( 77 | b"test", 78 | None, 79 | True, 80 | False, 81 | extra_types, 82 | None, 83 | b"text/test", 84 | ), 85 | ( 86 | b"TEST", 87 | None, 88 | True, 89 | False, 90 | extra_types, 91 | None, 92 | b"text/plain", 93 | ), 94 | # Even if the body is binary, if the Content-Type says it is text, 95 | # we interpret it as text, as long as the Content-Type is not one 96 | # of the 4 affected by the Apache bug. 97 | # 98 | # https://mimesniff.spec.whatwg.org/#interpreting-the-resource-metadata 99 | *( 100 | ( 101 | b"\x00\x01\xff", 102 | (supplied_content_type,), 103 | True, 104 | False, 105 | None, 106 | None, 107 | expected_content_type, 108 | ) 109 | for (supplied_content_type, expected_content_type) in ( 110 | (b"text/json", b"text/json"), 111 | *( 112 | (supplied_content_type, b"text/plain") 113 | for supplied_content_type in ( 114 | b"text/plain; charset=Iso-8859-1", 115 | b"text/plain; charset=utf-8", 116 | b"text/plain; charset=windows-1252", 117 | ) 118 | ), 119 | *( 120 | (supplied_content_type, b"application/octet-stream") 121 | for supplied_content_type in ( 122 | b"text/plain", 123 | b"text/plain; charset=ISO-8859-1", 124 | b"text/plain; charset=iso-8859-1", 125 | b"text/plain; charset=UTF-8", 126 | ) 127 | ), 128 | ) 129 | ), 130 | # Malformed MIME type 131 | *( 132 | (b"...", (mime_type,), True, False, None, None, b"text/plain") 133 | for mime_type in ( 134 | b"javascript charset=UTF-8", 135 | b"a/b/c", 136 | b"a/[", 137 | b"[/a", 138 | ) 139 | ), 140 | ], 141 | ) 142 | def test_extract_mime( 143 | self, body, content_types, http_origin, no_sniff, extra_types, supported_types, expected 144 | ): 145 | if isinstance(body, str): 146 | with open(f"tests/files/{body}", "rb") as input_file: 147 | body = input_file.read() 148 | assert ( 149 | extract_mime( 150 | body, 151 | content_types=content_types, 152 | http_origin=http_origin, 153 | no_sniff=no_sniff, 154 | extra_types=extra_types, 155 | supported_types=supported_types, 156 | ) 157 | == expected 158 | ) 159 | 160 | @pytest.mark.parametrize( 161 | "input_bytes,expected", 162 | [ 163 | ("foo.txt", b"text/plain"), 164 | ("foo.exe", b"application/octet-stream"), 165 | (b"\xfe\xff", b"text/plain"), 166 | ], 167 | ) 168 | def test_sniff_mislabled_binary(self, input_bytes, expected): 169 | if isinstance(input_bytes, str): 170 | with open(f"tests/files/{input_bytes}", "rb") as input_file: 171 | input_bytes = input_file.read() 172 | assert _sniff_mislabled_binary(input_bytes) == expected 173 | 174 | @pytest.mark.parametrize( 175 | "input_bytes,sniff_scriptable,extra_types,expected", 176 | [ 177 | ("foo.pdf", True, None, b"application/pdf"), 178 | ("foo.gif", False, None, b"image/gif"), 179 | ("foo.mp4", False, None, b"video/mp4"), 180 | ("foo.zip", False, None, b"application/zip"), 181 | ("foo.txt", False, None, b"text/plain"), 182 | ("foo.exe", False, None, b"application/octet-stream"), 183 | (b"test", False, ((b"test", b"\xff\xff\xff\xff", None, b"text/test"),), b"text/test"), 184 | ], 185 | ) 186 | def test_find_unknown_mimetype(self, input_bytes, sniff_scriptable, extra_types, expected): 187 | if isinstance(input_bytes, str): 188 | with open(f"tests/files/{input_bytes}", "rb") as input_file: 189 | input_bytes = input_file.read() 190 | assert _find_unknown_mimetype(input_bytes, sniff_scriptable, extra_types) == expected 191 | 192 | @pytest.mark.parametrize( 193 | "input_bytes,supplied_type,expected", 194 | [ 195 | ("foo.xml", b"text/xml", b"text/xml"), 196 | (sample_xml1, b"application/rss+xml", b"application/rss+xml"), 197 | (sample_xml2, b"application/atom+xml", b"application/atom+xml"), 198 | (sample_xml3, b"text/xml", b"text/xml"), 199 | (sample_xml4, b"application/rss+xml", b"application/rss+xml"), 200 | (sample_xml5, b"application/rss+xml", b"application/rss+xml"), 201 | (b"test", b"text/test", b"text/test"), 202 | (b" ", None, None), 203 | (b"<", None, None), 204 | (b"": 103 | index += 3 104 | loop_break = True 105 | break 106 | 107 | index += 1 108 | 109 | if loop_break: 110 | break 111 | 112 | if input_bytes[index : index + 1] == b"!": 113 | index += 1 114 | while True: 115 | if not input_bytes[index : index + 1]: 116 | return supplied_type 117 | 118 | if input_bytes[index : index + 1] == b">": 119 | index += 1 120 | loop_break = True 121 | break 122 | 123 | index += 1 124 | 125 | if loop_break: 126 | break 127 | 128 | if input_bytes[index : index + 1] == b"?": 129 | index += 1 130 | while True: 131 | if not input_bytes[index : index + 1]: 132 | return supplied_type 133 | 134 | if input_bytes[index : index + 2] == b"?>": 135 | index += 2 136 | loop_break = True 137 | break 138 | 139 | index += 1 140 | 141 | if loop_break: 142 | break 143 | 144 | if input_bytes[index : index + 3] == b"rss": 145 | return b"application/rss+xml" 146 | 147 | if input_bytes[index : index + 4] == b"feed": 148 | return b"application/atom+xml" 149 | 150 | if input_bytes[index : index + 7] == b"rdf:RDF": 151 | index += 7 152 | while True: 153 | if not input_bytes[index : index + 1]: 154 | return supplied_type 155 | 156 | if input_bytes[index : index + 24] == b"http://purl.org/rss/1.0/": 157 | index += 24 158 | while True: 159 | if not input_bytes[index : index + 1]: 160 | return supplied_type 161 | 162 | if ( 163 | input_bytes[index : index + 43] 164 | == b"http://www.w3.org/1999/02/22-rdf-syntax-ns#" 165 | ): 166 | return b"application/rss+xml" 167 | 168 | index += 1 169 | 170 | if ( 171 | input_bytes[index : index + 43] 172 | == b"http://www.w3.org/1999/02/22-rdf-syntax-ns#" 173 | ): 174 | index += 43 175 | while True: 176 | if not input_bytes[index : index + 1]: 177 | return supplied_type 178 | 179 | if input_bytes[index : index + 24] == b"http://purl.org/rss/1.0/": 180 | return b"application/rss+xml" 181 | 182 | index += 1 183 | 184 | index += 1 185 | 186 | return supplied_type 187 | 188 | return supplied_type 189 | 190 | 191 | _TOKEN = rb"^\s*[-!#$%&'*+.0-9A-Z^_`a-z{|}~]+\s*$" 192 | 193 | 194 | def _is_valid_mime_type(mime_type): 195 | """Return True if the specified MIME type is valid as per RFC 2045, or 196 | False otherwise. 197 | 198 | Only the type and subtype are validated, parameters are ignored. 199 | """ 200 | parts = mime_type.split(b"/", maxsplit=1) 201 | if len(parts) < 2: 202 | return False 203 | _type, subtype_and_params = parts 204 | if not re.match(_TOKEN, _type): 205 | return False 206 | subtype = subtype_and_params.split(b";", maxsplit=1)[0] 207 | if not re.match(_TOKEN, subtype): 208 | return False 209 | return True 210 | 211 | 212 | def extract_mime( 213 | body: bytes, 214 | *, 215 | content_types: Optional[Tuple[bytes]] = None, 216 | http_origin: bool = True, 217 | no_sniff: bool = False, 218 | extra_types: Optional[Tuple[Tuple[bytes, bytes, Optional[Set[bytes]], bytes], ...]] = None, 219 | supported_types: Optional[Set[bytes]] = None, 220 | ) -> Optional[bytes]: 221 | extra_types = extra_types or tuple() 222 | supplied_type = content_types[-1] if content_types else b"" 223 | check_for_apache = http_origin and supplied_type in _APACHE_TYPES 224 | if not _is_valid_mime_type(supplied_type): 225 | supplied_type = b"" 226 | supplied_type = supplied_type.split(b";")[0].strip().lower() 227 | resource_header = memoryview(body)[:RESOURCE_HEADER_BUFFER_LENGTH] 228 | 229 | if supplied_type in (b"", b"unknown/unknown", b"application/unknown", b"*/*"): 230 | return _find_unknown_mimetype(resource_header, not no_sniff, extra_types) 231 | 232 | if no_sniff: 233 | return supplied_type 234 | 235 | if check_for_apache: 236 | return _sniff_mislabled_binary(resource_header) 237 | 238 | if supplied_type.endswith(b"+xml") or supplied_type in {b"text/xml", b"application/xml"}: 239 | return supplied_type 240 | 241 | if is_html_mime_type(supplied_type): 242 | return _sniff_mislabled_feed(resource_header, supplied_type) 243 | 244 | if supported_types: 245 | if is_image_mime_type(supplied_type): 246 | matched_type = get_image_mime(resource_header) 247 | if matched_type in supported_types: 248 | return matched_type 249 | 250 | if is_audio_video_mime_type(supplied_type): 251 | matched_type = get_audio_video_mime(resource_header) 252 | if matched_type in supported_types: 253 | return matched_type 254 | 255 | return supplied_type 256 | -------------------------------------------------------------------------------- /xtractmime/_patterns.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Set, Tuple 2 | 3 | 4 | #: Section 3 5 | #: https://mimesniff.spec.whatwg.org/commit-snapshots/609a3a3c935fbb805b46cf3d90768d695a1dcff2/#terminology # noqa: E501 6 | BINARY_BYTES = tuple( 7 | bytes.fromhex(byte) 8 | for byte in ( 9 | "00", 10 | "01", 11 | "02", 12 | "03", 13 | "04", 14 | "05", 15 | "06", 16 | "07", 17 | "08", 18 | "0B", 19 | "0E", 20 | "0F", 21 | "10", 22 | "11", 23 | "12", 24 | "13", 25 | "14", 26 | "15", 27 | "16", 28 | "17", 29 | "18", 30 | "19", 31 | "1A", 32 | "1C", 33 | "1D", 34 | "1E", 35 | "1F", 36 | ) 37 | ) 38 | WHITESPACE_BYTES = {b"\t", b"\r", bytes.fromhex("0c"), b"\n", b" "} 39 | 40 | #: Section 4.6 41 | #: https://mimesniff.spec.whatwg.org/commit-snapshots/609a3a3c935fbb805b46cf3d90768d695a1dcff2/#mime-type-groups # noqa: E501 42 | FONT_TYPES = [ 43 | b"application/font-cff", 44 | b"application/font-off", 45 | b"application/font-sfnt", 46 | b"application/font-ttf", 47 | b"application/font-woff", 48 | b"application/vnd.ms-fontobject", 49 | b"application/vnd.ms-opentype", 50 | ] 51 | ARCHIVE_TYPES = [ 52 | b"application/x-rar-compressed", 53 | b"application/zip", 54 | b"application/x-gzip", 55 | ] 56 | JAVASCRIPT_TYPES = [ 57 | b"application/ecmascript", 58 | b"application/javascript", 59 | b"application/x-ecmascript", 60 | b"application/x-javascript", 61 | b"text/ecmascript", 62 | b"text/javascript", 63 | b"text/javascript1.0", 64 | b"text/javascript1.1", 65 | b"text/javascript1.2", 66 | b"text/javascript1.3", 67 | b"text/javascript1.4", 68 | b"text/javascript1.5", 69 | b"text/jscript", 70 | b"text/livescript", 71 | b"text/x-ecmascript", 72 | b"text/x-javascript", 73 | ] 74 | 75 | #: Section 5.1, step 2 76 | #: https://mimesniff.spec.whatwg.org/commit-snapshots/609a3a3c935fbb805b46cf3d90768d695a1dcff2/#interpreting-the-resource-metadata # noqa: E501 77 | _APACHE_TYPES = [ 78 | b"text/plain", 79 | b"text/plain; charset=ISO-8859-1", 80 | b"text/plain; charset=iso-8859-1", 81 | b"text/plain; charset=UTF-8", 82 | ] 83 | 84 | #: Section 6.1, step 1 85 | #: https://mimesniff.spec.whatwg.org/commit-snapshots/609a3a3c935fbb805b46cf3d90768d695a1dcff2/#matching-an-image-type-pattern # noqa: E501 86 | IMAGE_PATTERNS = ( 87 | (bytes.fromhex("00000100"), bytes.fromhex("ffffffff"), None, b"image/x-icon"), 88 | (bytes.fromhex("00000200"), bytes.fromhex("ffffffff"), None, b"image/x-icon"), 89 | (b"BM", bytes.fromhex("ffff"), None, b"image/bmp"), 90 | ( 91 | b"GIF87a", 92 | bytes.fromhex("ffffffffffff"), 93 | None, 94 | b"image/gif", 95 | ), 96 | ( 97 | b"GIF89a", 98 | bytes.fromhex("ffffffffffff"), 99 | None, 100 | b"image/gif", 101 | ), 102 | ( 103 | b"RIFF" + bytes.fromhex("00000000") + b"WEBPVP", 104 | bytes.fromhex("ffffffff00000000ffffffffffff"), 105 | None, 106 | b"image/webp", 107 | ), 108 | ( 109 | bytes.fromhex("89") + b"PNG\r\n" + bytes.fromhex("1a") + b"\n", 110 | bytes.fromhex("ffffffffffffffff"), 111 | None, 112 | b"image/png", 113 | ), 114 | ( 115 | bytes.fromhex("ffd8ff"), 116 | bytes.fromhex("ffffff"), 117 | None, 118 | b"image/jpeg", 119 | ), 120 | ) 121 | 122 | #: Section 6.2, step 1 123 | #: https://mimesniff.spec.whatwg.org/commit-snapshots/609a3a3c935fbb805b46cf3d90768d695a1dcff2/#matching-an-audio-or-video-type-pattern # noqa: E501 124 | AUDIO_VIDEO_PATTERNS = ( 125 | ( 126 | b".snd", 127 | bytes.fromhex("ffffffff"), 128 | None, 129 | b"audio/basic", 130 | ), 131 | ( 132 | b"FORM" + bytes.fromhex("00000000") + b"AIFF", 133 | bytes.fromhex("ffffffff00000000ffffffff"), 134 | None, 135 | b"audio/aiff", 136 | ), 137 | ( 138 | b"ID3", 139 | bytes.fromhex("ffffff"), 140 | None, 141 | b"audio/mpeg", 142 | ), 143 | ( 144 | b"OggS" + bytes.fromhex("00"), 145 | bytes.fromhex("ffffffffff"), 146 | None, 147 | b"application/ogg", 148 | ), 149 | ( 150 | b"MThd" + bytes.fromhex("00000006"), 151 | bytes.fromhex("ffffffffffffffff"), 152 | None, 153 | b"audio/midi", 154 | ), 155 | ( 156 | b"RIFF" + bytes.fromhex("00000000") + b"AVI ", 157 | bytes.fromhex("ffffffff00000000ffffffff"), 158 | None, 159 | b"video/avi", 160 | ), 161 | ( 162 | b"RIFF" + bytes.fromhex("00000000") + b"WAVE", 163 | bytes.fromhex("ffffffff00000000ffffffff"), 164 | None, 165 | b"audio/wave", 166 | ), 167 | ) 168 | 169 | #: Section 6.3, step 1 170 | #: https://mimesniff.spec.whatwg.org/commit-snapshots/609a3a3c935fbb805b46cf3d90768d695a1dcff2/#matching-a-font-type-pattern # noqa: E501 171 | FONT_PATTERNS = ( 172 | ( 173 | ( 174 | bytes.fromhex("00000000000000000000000000000000000000000000000000000000000000000000") 175 | + b"LP" 176 | ), 177 | ( 178 | bytes.fromhex( 179 | "00000000000000000000000000000000000000000000000000000000000000000000ffff" 180 | ) 181 | ), 182 | None, 183 | b"application/vnd.ms-fontobject", 184 | ), 185 | ( 186 | bytes.fromhex("00010000"), 187 | bytes.fromhex("ffffffff"), 188 | None, 189 | b"font/ttf", 190 | ), 191 | (b"OTTO", bytes.fromhex("ffffffff"), None, b"font/otf"), 192 | ( 193 | b"ttcf", 194 | bytes.fromhex("ffffffff"), 195 | None, 196 | b"font/collection", 197 | ), 198 | ( 199 | b"wOFF", 200 | bytes.fromhex("ffffffff"), 201 | None, 202 | b"font/woff", 203 | ), 204 | ( 205 | b"wOF2", 206 | bytes.fromhex("ffffffff"), 207 | None, 208 | b"font/woff2", 209 | ), 210 | ) 211 | 212 | #: Section 6.4, step 1 213 | #: https://mimesniff.spec.whatwg.org/commit-snapshots/609a3a3c935fbb805b46cf3d90768d695a1dcff2/#matching-an-archive-type-pattern # noqa: E501 214 | ARCHIVE_PATTERNS = ( 215 | (bytes.fromhex("1f8b08"), bytes.fromhex("ffffff"), None, b"application/x-gzip"), 216 | ( 217 | b"PK" + bytes.fromhex("0304"), 218 | bytes.fromhex("ffffffff"), 219 | None, 220 | b"application/zip", 221 | ), 222 | ( 223 | b"Rar " + bytes.fromhex("1a0700"), 224 | bytes.fromhex("ffffffffffffff"), 225 | None, 226 | b"application/x-rar-compressed", 227 | ), 228 | ) 229 | 230 | #: Section 7.1, step 1 231 | #: https://mimesniff.spec.whatwg.org/commit-snapshots/609a3a3c935fbb805b46cf3d90768d695a1dcff2/#identifying-a-resource-with-an-unknown-mime-type # noqa: E501 232 | TEXT_PATTERNS = tuple( 233 | (prefix + suffix, bytes.fromhex(mask), WHITESPACE_BYTES, b"text/html") 234 | for prefix, mask, in ( 235 | (b" bool: 53 | input_size = len(input_bytes) 54 | pattern_size = len(byte_pattern) 55 | mask_size = len(pattern_mask) 56 | 57 | if pattern_size != mask_size: 58 | raise ValueError("pattern's length should match mask's length") 59 | 60 | if input_size < pattern_size: 61 | return False 62 | 63 | input_index, pattern_index = 0, 0 64 | 65 | if lstrip: 66 | while input_index < input_size and input_bytes[input_index : input_index + 1] in lstrip: 67 | input_index += 1 68 | 69 | while pattern_index < pattern_size: 70 | masked_byte = bytes([input_bytes[input_index] & pattern_mask[pattern_index]]) 71 | if masked_byte != byte_pattern[pattern_index : pattern_index + 1]: 72 | return False 73 | input_index += 1 74 | pattern_index += 1 75 | 76 | return True 77 | 78 | 79 | def is_mp4_signature(input_bytes: bytes) -> bool: 80 | input_size = len(input_bytes) 81 | if input_size < 12: 82 | return False 83 | 84 | box_size = unpack(">I", input_bytes[0:4])[0] 85 | 86 | if input_size < box_size or box_size % 4: 87 | return False 88 | 89 | if input_bytes[4:8] != b"ftyp": 90 | return False 91 | 92 | if input_bytes[8:11] == b"mp4": 93 | return True 94 | 95 | bytes_read = 16 96 | while bytes_read < box_size: 97 | if input_bytes[bytes_read : bytes_read + 3] == b"mp4": 98 | return True 99 | bytes_read += 4 100 | 101 | return False 102 | 103 | 104 | def parse_vint_number_size(input_bytes: memoryview) -> int: 105 | """Return an integer value by which the index in the current input bytes of a 106 | WebM file should be incremented 107 | 108 | Based on https://mimesniff.spec.whatwg.org/commit-snapshots/609a3a3c935fbb805b46cf3d90768d695a1dcff2/#signature-for-webm, # noqa: E501 109 | This implementation doesn't compute the value for "parsed number" as there is 110 | no specific use of it in implementing the function "is_webm_signature()". 111 | """ 112 | input_size = len(input_bytes) 113 | mask = 128 114 | max_vint_size = 8 115 | limit = min(max_vint_size, input_size) 116 | first_byte = input_bytes[0] 117 | for number_size in range(1, limit): 118 | if first_byte & mask: 119 | return number_size 120 | mask >>= 1 121 | return limit 122 | 123 | 124 | def is_webm_signature(input_bytes: bytes) -> bool: 125 | """Return True if the input bytes belong to a WebM file, or False otherwise. 126 | 127 | Based on https://mimesniff.spec.whatwg.org/commit-snapshots/609a3a3c935fbb805b46cf3d90768d695a1dcff2/#signature-for-webm, # noqa: E501 128 | This implementation has been slightly changed according to the 129 | https://github.com/whatwg/mimesniff/issues/93 which suggests that index can 130 | never be more than 38, and input_size can be as much as 1445 according to the 131 | standards which means that "less than" mentioned on line 6.1.5 should actually 132 | read "greater than or equal to". 133 | """ 134 | input_size = len(input_bytes) 135 | if input_size < 4: 136 | return False 137 | 138 | if input_bytes[:4] != bytes.fromhex("1a 45 df a3"): 139 | return False 140 | 141 | index = 4 142 | 143 | limit = min(input_size, 38) 144 | while index < limit: 145 | if input_bytes[index : index + 2] == bytes.fromhex("42 82"): 146 | index += 2 147 | 148 | if index >= input_size: 149 | break 150 | 151 | number_size = parse_vint_number_size(memoryview(input_bytes)[index:]) 152 | index += number_size 153 | 154 | if index >= input_size - 4: 155 | break 156 | 157 | if input_bytes[index : index + 4] == b"webm": 158 | return True 159 | index += 1 160 | 161 | return False 162 | 163 | 164 | def match_mp3_header(input_bytes: bytes, input_size: int, index: int) -> bool: 165 | if input_size < 4: 166 | return False 167 | 168 | if input_bytes[index : index + 1] != bytes.fromhex("ff") or bytes( 169 | [input_bytes[index + 1] & 224] 170 | ) != bytes.fromhex("e0"): 171 | return False 172 | 173 | layer = (input_bytes[index + 1] & 6) >> 1 174 | if layer == 0: 175 | return False 176 | 177 | bit_rate = (input_bytes[index + 2] & 240) >> 4 178 | 179 | if bit_rate == 15: 180 | return False 181 | 182 | sample_rate = (input_bytes[index + 2] & 12) >> 2 183 | if sample_rate == 3: 184 | return False 185 | 186 | final_layer = (input_bytes[index + 1] & 6) >> 1 187 | 188 | if 4 - final_layer != 3: 189 | return False 190 | 191 | return True 192 | 193 | 194 | def parse_mp3_frame(input_bytes: bytes) -> Tuple[int, int, int, int]: 195 | version = (input_bytes[1] & 24) >> 3 196 | bit_rate_index = (input_bytes[2] & 240) >> 4 197 | 198 | if version & 1: 199 | bit_rate = MP3_RATES[bit_rate_index] 200 | else: 201 | bit_rate = MP25_RATES[bit_rate_index] 202 | 203 | sample_rate_index = (input_bytes[2] & 12) >> 2 204 | freq = SAMPLE_RATES[sample_rate_index] 205 | 206 | if version == 2: 207 | freq >>= 1 208 | elif version == 0: 209 | freq >>= 2 210 | 211 | pad = (input_bytes[2] & 2) >> 1 212 | 213 | return version, bit_rate, freq, pad 214 | 215 | 216 | def mp3_framesize(version, bit_rate, freq, pad) -> int: 217 | if (version & 1) == 0: 218 | scale = 72 219 | else: 220 | scale = 144 221 | 222 | size = bit_rate * scale / freq 223 | 224 | if pad: 225 | size += 1 226 | 227 | return int(size) 228 | 229 | 230 | def is_mp3_non_ID3_signature(input_bytes: bytes) -> bool: 231 | """Return True if the input bytes belong to an MP3 file without ID3 232 | metadata, or False otherwise. 233 | 234 | This implementation does not match with standards due to various 235 | problems with the algorithm according to https://github.com/whatwg/mimesniff/issues/70. 236 | 237 | The current implementation follows 238 | https://dxr.mozilla.org/mozilla-central/source/toolkit/components/mediasniffer/mp3sniff.c 239 | as the algorithm for MP3 without ID3 sniffing mentioned in standards is originally 240 | based on mp3sniff.c. 241 | """ 242 | input_size = len(input_bytes) 243 | index = 0 244 | 245 | if not match_mp3_header(input_bytes, input_size, index): 246 | return False 247 | 248 | version, bit_rate, freq, pad = parse_mp3_frame(input_bytes) 249 | 250 | skipped_bytes = mp3_framesize(version, bit_rate, freq, pad) 251 | 252 | if skipped_bytes < 4 or skipped_bytes + 4 >= input_size: 253 | return False 254 | 255 | index += skipped_bytes 256 | 257 | if match_mp3_header(input_bytes, input_size, index): 258 | return True 259 | else: 260 | return False 261 | 262 | 263 | def get_image_mime(input_bytes: bytes) -> Optional[bytes]: 264 | for pattern in IMAGE_PATTERNS: 265 | if is_match_mime_pattern(input_bytes, pattern[0], pattern[1], pattern[2]): 266 | return pattern[3] 267 | 268 | return None 269 | 270 | 271 | def get_audio_video_mime(input_bytes: bytes) -> Optional[bytes]: 272 | for pattern in AUDIO_VIDEO_PATTERNS: 273 | if is_match_mime_pattern(input_bytes, pattern[0], pattern[1], pattern[2]): 274 | return pattern[3] 275 | 276 | if is_mp4_signature(input_bytes): 277 | return b"video/mp4" 278 | 279 | if is_webm_signature(input_bytes): 280 | return b"video/webm" 281 | 282 | if is_mp3_non_ID3_signature(input_bytes): 283 | return b"audio/mpeg" 284 | 285 | return None 286 | 287 | 288 | def get_font_mime(input_bytes: bytes) -> Optional[bytes]: 289 | for pattern in FONT_PATTERNS: 290 | if is_match_mime_pattern(input_bytes, pattern[0], pattern[1], pattern[2]): 291 | return pattern[3] 292 | 293 | return None 294 | 295 | 296 | def get_archive_mime(input_bytes: bytes) -> Optional[bytes]: 297 | for pattern in ARCHIVE_PATTERNS: 298 | if is_match_mime_pattern(input_bytes, pattern[0], pattern[1], pattern[2]): 299 | return pattern[3] 300 | 301 | return None 302 | 303 | 304 | def get_text_mime(input_bytes: bytes) -> Optional[bytes]: 305 | for pattern in TEXT_PATTERNS: 306 | if is_match_mime_pattern(input_bytes, pattern[0], pattern[1], pattern[2]): 307 | return pattern[3] 308 | 309 | return None 310 | 311 | 312 | def get_extra_mime( 313 | input_bytes: bytes, 314 | extra_types: Optional[Tuple[Tuple[bytes, bytes, Optional[Set[bytes]], bytes], ...]], 315 | ) -> Optional[bytes]: 316 | for pattern in EXTRA_PATTERNS: 317 | if is_match_mime_pattern(input_bytes, pattern[0], pattern[1], pattern[2]): 318 | return pattern[3] 319 | 320 | if extra_types: 321 | for pattern in extra_types: 322 | if is_match_mime_pattern(input_bytes, pattern[0], pattern[1], pattern[2]): 323 | return pattern[3] 324 | 325 | return None 326 | -------------------------------------------------------------------------------- /xtractmime/mimegroups.py: -------------------------------------------------------------------------------- 1 | from xtractmime._patterns import ( 2 | ARCHIVE_TYPES, 3 | FONT_TYPES, 4 | JAVASCRIPT_TYPES, 5 | ) 6 | 7 | 8 | def is_image_mime_type(mime_type: bytes) -> bool: 9 | return mime_type.startswith(b"image/") 10 | 11 | 12 | def is_audio_video_mime_type(mime_type: bytes) -> bool: 13 | video_types = (b"audio/", b"video/") 14 | return mime_type.startswith(video_types) or mime_type == b"application/ogg" 15 | 16 | 17 | def is_font_mime_type(mime_type: bytes) -> bool: 18 | return mime_type.startswith(b"font/") or mime_type in FONT_TYPES 19 | 20 | 21 | def is_zip_mime_type(mime_type: bytes) -> bool: 22 | return mime_type.endswith(b"+zip") or mime_type == b"application/zip" 23 | 24 | 25 | def is_archive_mime_type(mime_type: bytes) -> bool: 26 | return mime_type in ARCHIVE_TYPES 27 | 28 | 29 | def is_xml_mime_type(mime_type: bytes) -> bool: 30 | return mime_type.endswith(b"+xml") or mime_type in (b"text/xml", b"application/xml") 31 | 32 | 33 | def is_html_mime_type(mime_type: bytes) -> bool: 34 | return mime_type == b"text/html" 35 | 36 | 37 | def is_scriptable_mime_type(mime_type: bytes) -> bool: 38 | if is_xml_mime_type(mime_type): 39 | return True 40 | 41 | if is_html_mime_type(mime_type): 42 | return True 43 | 44 | return mime_type == b"application/pdf" 45 | 46 | 47 | def is_javascript_mime_type(mime_type: bytes) -> bool: 48 | return mime_type.lower() in JAVASCRIPT_TYPES 49 | 50 | 51 | def is_json_mime_type(mime_type: bytes) -> bool: 52 | return mime_type.endswith(b"+json") or mime_type in (b"application/json", b"text/json") 53 | --------------------------------------------------------------------------------