├── tests ├── __init__.py ├── http │ ├── __init__.py │ └── parsers │ │ ├── __init__.py │ │ ├── parse_multipart_test.py │ │ └── parse_object_test.py ├── utils.py └── client │ ├── http_client_test.py │ ├── request_test.py │ └── decoder_test.py ├── .flake8 ├── setup.cfg ├── docs └── rets_1_7_2.pdf ├── rets ├── client │ ├── __init__.py │ ├── utils.py │ ├── record.py │ ├── object_type.py │ ├── client.py │ ├── resource.py │ ├── resource_class.py │ └── decoder.py ├── http │ ├── __init__.py │ ├── parsers │ │ ├── __init__.py │ │ ├── parse_object.py │ │ └── parse.py │ ├── data.py │ └── client.py ├── __init__.py └── errors.py ├── .travis.yml ├── CHANGELOG.md ├── .bumpversion.cfg ├── CONTRIBUTING.md ├── bin ├── _check_remote_matches ├── deploy └── release ├── .gitignore ├── LICENSE.md ├── setup.py └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/http/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/http/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | lint=flake8 3 | test=pytest 4 | -------------------------------------------------------------------------------- /docs/rets_1_7_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendoor-labs/rets/HEAD/docs/rets_1_7_2.pdf -------------------------------------------------------------------------------- /rets/client/__init__.py: -------------------------------------------------------------------------------- 1 | from rets.client.client import RetsClient 2 | 3 | __all__ = [ 4 | 'RetsClient', 5 | ] 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "3.5" 5 | - "3.6" 6 | 7 | install: python setup.py -q install 8 | 9 | script: python setup.py -q test 10 | -------------------------------------------------------------------------------- /rets/http/__init__.py: -------------------------------------------------------------------------------- 1 | from rets.http.client import RetsHttpClient 2 | from rets.http.data import Metadata, Object, SearchResult, SystemMetadata 3 | 4 | __all__ = [ 5 | 'Metadata', 6 | 'Object', 7 | 'RetsHttpClient', 8 | 'SearchResult', 9 | 'SystemMetadata', 10 | ] 11 | -------------------------------------------------------------------------------- /rets/client/utils.py: -------------------------------------------------------------------------------- 1 | from rets.http.client import RetsHttpClient 2 | 3 | 4 | def get_metadata_data(http_client: RetsHttpClient, type_: str, **kwargs): 5 | metadata_structs = http_client.get_metadata(type_, **kwargs) 6 | if metadata_structs: 7 | return metadata_structs[0].data 8 | return () 9 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # v0.3.0 2 | * Removes dependencies on beautifulsoup and lxml 3 | * Implements user agent digest authentication 4 | * RetsHttpClient infers the RETS server's base url from the full login url 5 | * parse_multipart ignores parts with 20403 No Object Found errors 6 | 7 | # v0.2.0 8 | * Added support for Python 3.3 or later 9 | 10 | # v0.1.0 11 | * Initial release 12 | -------------------------------------------------------------------------------- /rets/http/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | from rets.http.parsers.parse import ( 2 | parse_capability_urls, 3 | parse_metadata, 4 | parse_search, 5 | parse_system, 6 | ) 7 | from rets.http.parsers.parse_object import parse_object 8 | 9 | __all__ = [ 10 | 'parse_capability_urls', 11 | 'parse_metadata', 12 | 'parse_object', 13 | 'parse_search', 14 | 'parse_system', 15 | ] 16 | -------------------------------------------------------------------------------- /rets/client/record.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence 2 | 3 | from rets.http import Object 4 | 5 | 6 | class Record: 7 | 8 | def __init__(self, resource_class, data: dict): 9 | self.resource_class = resource_class 10 | self.data = data 11 | 12 | def __repr__(self) -> str: 13 | return '' % ( 14 | self.resource_class.resource.name, 15 | self.resource_class.name, 16 | ) 17 | -------------------------------------------------------------------------------- /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | commit = True 3 | tag = True 4 | current_version = 0.4.12 5 | parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+))? 6 | serialize = 7 | {major}.{minor}.{patch}-{release} 8 | {major}.{minor}.{patch} 9 | 10 | [bumpversion:file:setup.py] 11 | 12 | [bumpversion:file:rets/__init__.py] 13 | 14 | [bumpversion:part:release] 15 | optional_value = gamma 16 | values = 17 | gamma 18 | dev 19 | 20 | -------------------------------------------------------------------------------- /rets/__init__.py: -------------------------------------------------------------------------------- 1 | from rets.client import RetsClient 2 | from rets.http.client import RetsHttpClient 3 | from rets.http.data import Metadata, Object, SearchResult, SystemMetadata 4 | 5 | __title__ = 'rets' 6 | __version__ = '0.4.12' 7 | __author__ = 'Martin Liu ' 8 | __license__ = 'MIT License' 9 | 10 | __all__ = [ 11 | 'RetsClient', 12 | 'RetsHttpClient', 13 | 'Metadata', 14 | 'Object', 15 | 'SearchResult', 16 | 'SystemMetadata', 17 | ] 18 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to rets 2 | 3 | ## Pull requests are welcome! 4 | 5 | 1. Fork the repo 6 | 2. Create a topic branch 7 | 3. Make logically grouped commits with clear commit messages 8 | 4. Push your commits to your fork 9 | 5. Open a pull request against `opendoor-labs/rets` 10 | 11 | ## Issues 12 | 13 | If you believe there to is a bug, please provide the maintainers with enough detail to reproduce or a link to an app exhibiting unexpected behavior. For help, please start with Stack Overflow. 14 | -------------------------------------------------------------------------------- /bin/_check_remote_matches: -------------------------------------------------------------------------------- 1 | #!/bin/bash -eu 2 | # Pulled from: http://stackoverflow.com/a/3278427/303423 3 | 4 | UPSTREAM=${1:-'@{u}'} 5 | LOCAL=$(git rev-parse @) 6 | REMOTE=$(git rev-parse "$UPSTREAM") 7 | BASE=$(git merge-base @ "$UPSTREAM") 8 | 9 | if [ $LOCAL = $REMOTE ]; then 10 | echo "Up-to-date" 11 | exit 0 12 | elif [ $LOCAL = $BASE ]; then 13 | echo "Need to pull" 14 | elif [ $REMOTE = $BASE ]; then 15 | echo "Need to push" 16 | else 17 | echo "Diverged" 18 | fi 19 | 20 | exit 1 21 | -------------------------------------------------------------------------------- /rets/http/data.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | Metadata = namedtuple('Metadata', ( 4 | 'type_', 5 | 'resource', 6 | 'class_', 7 | 'data', 8 | )) 9 | 10 | Object = namedtuple('Object', ( 11 | 'mime_type', 12 | 'content_id', 13 | 'description', 14 | 'object_id', 15 | 'url', 16 | 'preferred', 17 | 'data', 18 | )) 19 | 20 | SearchResult = namedtuple('SearchResult', ( 21 | 'count', 22 | 'max_rows', 23 | 'data', 24 | )) 25 | 26 | SystemMetadata = namedtuple('SystemMetadata', ( 27 | 'system_id', 28 | 'system_description', 29 | 'system_date', 30 | 'system_version', 31 | 'time_zone_offset', 32 | 'comments', 33 | )) 34 | -------------------------------------------------------------------------------- /rets/errors.py: -------------------------------------------------------------------------------- 1 | 2 | class RetsError(RuntimeError): 3 | pass 4 | 5 | 6 | class RetsClientError(RetsError): 7 | pass 8 | 9 | 10 | class RetsParseError(RetsClientError): 11 | pass 12 | 13 | 14 | class RetsResponseError(RetsClientError): 15 | 16 | def __init__(self, content: str, headers: dict): 17 | super().__init__('Unexpected response from RETS') 18 | self.content = content 19 | self.headers = headers 20 | 21 | 22 | class RetsApiError(RetsClientError): 23 | 24 | def __init__(self, reply_code: int, reply_text: str, xml: str): 25 | super().__init__('[%i] %s\n\n%s' % (reply_code, reply_text, xml)) 26 | self.reply_code = reply_code 27 | self.reply_text = reply_text 28 | self.xml = xml 29 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | from requests import Response 2 | from requests.structures import CaseInsensitiveDict 3 | from requests.utils import get_encoding_from_headers 4 | 5 | 6 | def make_response(status_code: int = 200, 7 | content: bytes = b'', 8 | headers: dict = None, 9 | reason: str = None, 10 | encoding: str = None, 11 | ) -> Response: 12 | response = Response() 13 | response.status_code = status_code 14 | response._content = content 15 | response._content_consumed = True 16 | response.headers = CaseInsensitiveDict(headers or {}) 17 | response.encoding = encoding or get_encoding_from_headers(headers or {}) 18 | response.reason = reason 19 | return response 20 | -------------------------------------------------------------------------------- /bin/deploy: -------------------------------------------------------------------------------- 1 | #!/bin/bash -eu 2 | 3 | show_cmd() { echo "\$ $@" >&2; "$@"; echo; } 4 | 5 | dir="`dirname "$0"`" 6 | 7 | eval "$(docopts -h - : "$@" < [--repository] [] 9 | 10 | The version you want to release. Format should be major.minor.patch 11 | --repository The repository to deploy to. Options are local, pypi 12 | EOF 13 | )" 14 | 15 | 16 | deploy_dir="$dir/deploy-worktree-$version" 17 | 18 | # Remove any stale deploy dir from the same version 19 | rm -rf "$deploy_dir" 20 | 21 | show_cmd git worktree add "$deploy_dir" "v$version" 22 | if [ -z "$repo" ]; then 23 | (show_cmd cd "$deploy_dir" && show_cmd python setup.py bdist_wheel upload) 24 | else 25 | (show_cmd cd "$deploy_dir" && show_cmd python setup.py bdist_wheel upload --repository=$repo) 26 | fi 27 | show_cmd rm -r "$deploy_dir" 28 | show_cmd git worktree prune 29 | echo 30 | echo 'Success' 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | eggs/ 15 | lib/ 16 | lib64/ 17 | parts/ 18 | sdist/ 19 | var/ 20 | *.egg-info/ 21 | .installed.cfg 22 | *.egg 23 | *.eggs 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Sphinx documentation 48 | docs/_build/ 49 | 50 | # PyBuilder 51 | target/ 52 | 53 | # PyCharm 54 | .idea/ 55 | 56 | .DS_Store 57 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Opendoor Labs, Inc. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /rets/client/object_type.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Mapping, Sequence, Union 2 | 3 | from rets.http import Object, RetsHttpClient 4 | 5 | 6 | class ObjectType: 7 | 8 | def __init__(self, resource, metadata: dict, http_client: RetsHttpClient): 9 | self.resource = resource 10 | self._http = http_client 11 | self._metadata = metadata 12 | 13 | @property 14 | def name(self) -> str: 15 | return self._metadata['ObjectType'] 16 | 17 | @property 18 | def mime_type(self) -> str: 19 | if 'MIMEType' in self._metadata: 20 | return self._metadata['MIMEType'] 21 | return self._metadata['MimeType'] 22 | 23 | @property 24 | def metadata(self) -> dict: 25 | return dict(self._metadata) 26 | 27 | def get(self, resource_keys: Union[str, Mapping[str, Any], Sequence[str]], 28 | **kwargs) -> Sequence[Object]: 29 | return self._http.get_object(self.resource.name, self.name, resource_keys, **kwargs) 30 | 31 | def __repr__(self) -> str: 32 | return '' % (self.resource.name, self.name) 33 | -------------------------------------------------------------------------------- /tests/client/http_client_test.py: -------------------------------------------------------------------------------- 1 | from http.cookiejar import Cookie 2 | 3 | from mock import mock 4 | from requests.cookies import RequestsCookieJar 5 | from rets import RetsHttpClient 6 | 7 | 8 | def test_cookie_dict(): 9 | c = RetsHttpClient('login_url', 'username', 'password') 10 | c._session = mock.MagicMock() 11 | jar = RequestsCookieJar() 12 | c1 = Cookie(1, 'name1', 'value1', 80, 80, 'domain', 'domain_specified', 'domain_initial_dot', 'path', 13 | 'path_specified', True, True, False, 'comment', 'comment_url', 'rest') 14 | c2 = Cookie(1, 'name2', 'value2', 80, 80, 'domain', 'domain_specified', 'domain_initial_dot', 'path', 15 | 'path_specified', True, True, False, 'comment', 'comment_url', 'rest') 16 | c3 = Cookie(1, 'name1', 'value1', 80, 80, 'domain', 'domain_specified3', 'domain_initial_dot3', 'path3', 17 | 'path_specified3', True, True, False, 'comment', 'comment_url', 'rest') 18 | 19 | jar.set_cookie(c1) 20 | jar.set_cookie(c2) 21 | jar.set_cookie(c3) 22 | c._session.cookies = jar 23 | 24 | assert c.cookie_dict == {'name1': 'value1', 'name2': 'value2'} 25 | -------------------------------------------------------------------------------- /bin/release: -------------------------------------------------------------------------------- 1 | #!/bin/bash -eu 2 | 3 | show_cmd() { echo "\$ $@" >&2; "$@"; echo; } 4 | show_usage() { echo "Usage: $0 "; exit 1; } 5 | 6 | dir="`dirname "$0"`" 7 | 8 | if [ "$#" -lt 1 -o "${1:-}" = '-h' -o "${1:-}" = '--help' ]; then 9 | show_usage 10 | fi 11 | 12 | part="$1" 13 | if [ "$part" != 'major' -a "$part" != 'minor' -a "$part" != 'patch' ]; then 14 | show_usage 15 | fi 16 | 17 | current_branch="`git branch | grep \* | cut -d ' ' -f2`" 18 | if [ "$current_branch" != 'master' ]; then 19 | echo "Must be on master to release" 20 | exit 1 21 | fi 22 | 23 | show_cmd git remote update 24 | show_cmd "$dir"/_check_remote_matches 25 | 26 | show_cmd git status --porcelain 27 | if [ -n "$(git status --porcelain)" ]; then 28 | echo "Working directory not clean" 29 | exit 1 30 | fi 31 | 32 | show_cmd bumpversion --verbose --dry-run "$part" 2>&1 | egrep '\$|(Would (add|commit|tag))' 33 | 34 | echo 35 | read -p "You're about to push a new release, continue? " -n 1 -r < /dev/tty 36 | echo 37 | if [[ $REPLY =~ ^[Yy]$ ]] 38 | then 39 | echo 40 | show_cmd bumpversion --verbose "$part" 2>&1 | egrep '\$|(Adding changes|Committing|Tagging)' 41 | echo 42 | show_cmd git push 43 | show_cmd git push --tags 44 | echo 45 | fi 46 | -------------------------------------------------------------------------------- /tests/client/request_test.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock, call 2 | 3 | from rets.http.client import ( 4 | RetsHttpClient 5 | ) 6 | 7 | 8 | def test_rets_ua_authorization_false(): 9 | send_auth = False 10 | 11 | client = RetsHttpClient(login_url='test.url', 12 | username='user', 13 | password='pass', 14 | send_rets_ua_authorization=send_auth, 15 | ) 16 | client._session = MagicMock() 17 | client._http_request(url='test.url') 18 | 19 | assert client._send_rets_ua_authorization == send_auth 20 | 21 | assert client._session.post.called 22 | assert 'RETS-UA-Authorization' not in client._session.post.call_args_list[0][1]['headers'] 23 | 24 | 25 | def test_rets_ua_authorization_default(): 26 | # by default, sends 'RETS-UA-Authorization' 27 | client = RetsHttpClient(login_url='test.url', 28 | username='user', 29 | password='pass', 30 | ) 31 | client._session = MagicMock() 32 | client._http_request(url='test.url') 33 | 34 | assert client._session.post.called 35 | assert 'RETS-UA-Authorization' in client._session.post.call_args_list[0][1]['headers'] 36 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from setuptools import setup 4 | 5 | if sys.version_info < (3, 5): 6 | print('rets requires Python 3.5 or later') 7 | sys.exit(1) 8 | 9 | 10 | long_desc = 'Python 3 client for the Real Estate Transaction Standard (RETS) Version 1.7.2' 11 | 12 | install_requires = [ 13 | 'requests>=2.12.3', 14 | 'requests-toolbelt>=0.7.0,!=0.9.0', 15 | 'udatetime==0.0.17', 16 | 'lxml>=4.3.0', 17 | ] 18 | 19 | setup_requires = [ 20 | 'pytest-runner', 21 | ] 22 | 23 | tests_requires = [ 24 | 'flake8', 25 | 'pytest', 26 | ] 27 | 28 | packages = [ 29 | 'rets', 30 | 'rets.client', 31 | 'rets.http', 32 | 'rets.http.parsers', 33 | ] 34 | 35 | setup( 36 | name='rets-python', 37 | version='0.4.12', 38 | description='rets-python', 39 | long_description=long_desc, 40 | author='Martin Liu', 41 | author_email='martin@opendoor.com', 42 | url='https://github.com/opendoor-labs/rets', 43 | classifiers=[ 44 | 'Intended Audience :: Developers', 45 | 'Intended Audience :: Financial and Insurance Industry', 46 | 'Intended Audience :: Information Technology', 47 | 'Intended Audience :: Other Audience', 48 | 'License :: OSI Approved :: MIT License', 49 | 'Natural Language :: English', 50 | 'Operating System :: OS Independent', 51 | 'Programming Language :: Python', 52 | 'Programming Language :: Python :: 3', 53 | 'Programming Language :: Python :: 3.5', 54 | 'Programming Language :: Python :: 3.6', 55 | 'Programming Language :: Python :: 3 :: Only', 56 | 'Programming Language :: Python :: Implementation :: CPython', 57 | 'Programming Language :: Python :: Implementation :: PyPy', 58 | 'Topic :: Internet :: WWW/HTTP', 59 | 'Topic :: Internet :: WWW/HTTP :: Indexing/Search', 60 | ], 61 | license='MIT License', 62 | install_requires=install_requires, 63 | setup_requires=setup_requires, 64 | tests_require=tests_requires, 65 | packages=packages, 66 | ) 67 | -------------------------------------------------------------------------------- /rets/client/client.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Sequence 2 | 3 | from rets.client.resource import Resource 4 | from rets.client.utils import get_metadata_data 5 | from rets.http import RetsHttpClient 6 | 7 | """ 8 | Example of metadata dict: 9 | 10 | metadata = [{ 11 | 'ResourceID': 'Property', 12 | 'KeyField': 'Matrix_Unique_ID', 13 | '_classes': [ 14 | { 15 | 'ClassName': 'Listing', 16 | 'HasKeyIndex': '1', 17 | '_table': [ 18 | ... column fields 19 | ], 20 | } 21 | ], 22 | '_object_types': [ 23 | 'ObjectType': 'LargePhoto', 24 | 'MIMEType': 'image/jpeg', 25 | ] 26 | }, { 27 | 'ResourceID': 'Agent', 28 | 'KeyField': 'Matrix_Unique_ID', 29 | '_classes': [ 30 | { 31 | 'ClassName': 'Listing', 32 | 'HasKeyIndex': '1', 33 | } 34 | ], 35 | }] 36 | """ 37 | 38 | 39 | class RetsClient: 40 | 41 | def __init__(self, 42 | *args, 43 | http_client: RetsHttpClient = None, 44 | metadata: Sequence[dict] = (), 45 | capability_urls: dict = None, 46 | cookie_dict: dict = None, 47 | **kwargs): 48 | self.http = http_client or RetsHttpClient(*args, 49 | capability_urls=capability_urls, cookie_dict=cookie_dict, 50 | **kwargs) 51 | if not (capability_urls and cookie_dict): 52 | self.http.login() 53 | self._resources = self._resources_from_metadata(metadata) 54 | 55 | @property 56 | def metadata(self) -> Sequence[dict]: 57 | return tuple(resource.metadata for resource in self._resources) 58 | 59 | @property 60 | def resources(self) -> Sequence[Resource]: 61 | if not self._resources: 62 | # TODO(ML) Differentiate between not having the metadata and 63 | # having an empty metadata 64 | self._resources = self._fetch_resources() 65 | return self._resources 66 | 67 | def get_resource(self, name: str) -> Optional[Resource]: 68 | for resource in self.resources: 69 | if resource.name == name: 70 | return resource 71 | raise KeyError('unknown resource %s' % name) 72 | 73 | def _fetch_resources(self) -> Sequence[Resource]: 74 | metadata = get_metadata_data(self.http, 'resource') 75 | return self._resources_from_metadata(metadata) 76 | 77 | def _resources_from_metadata(self, metadata: Sequence[dict]) -> Sequence[Resource]: 78 | return tuple(Resource(m, self.http) for m in metadata) 79 | -------------------------------------------------------------------------------- /rets/client/resource.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Sequence 2 | 3 | from rets.client.resource_class import ResourceClass 4 | from rets.client.object_type import ObjectType 5 | from rets.client.utils import get_metadata_data 6 | from rets.http import RetsHttpClient 7 | 8 | 9 | class Resource: 10 | 11 | def __init__(self, metadata: dict, http_client: RetsHttpClient): 12 | self._http = http_client 13 | self._metadata = metadata 14 | self._classes = self._classes_from_metadata(metadata.get('_classes', ())) 15 | self._object_types = self._object_types_from_metadata(metadata.get('_object_types', ())) 16 | 17 | @property 18 | def name(self) -> str: 19 | return self._metadata['ResourceID'] 20 | 21 | @property 22 | def key_field(self) -> str: 23 | return self._metadata['KeyField'] 24 | 25 | @property 26 | def metadata(self) -> dict: 27 | metadata = dict(self._metadata) 28 | if self._classes: 29 | metadata['_classes'] = tuple(resource_class.metadata for resource_class in self._classes) 30 | if self._object_types: 31 | metadata['_object_types'] = tuple(object_type.metadata for object_type in self._object_types) 32 | return metadata 33 | 34 | @property 35 | def classes(self) -> Sequence[ResourceClass]: 36 | if not self._classes: 37 | self._classes = self._fetch_classes() 38 | return self._classes 39 | 40 | def get_class(self, name: str) -> Optional[ResourceClass]: 41 | for resource_class in self.classes: 42 | if resource_class.name == name: 43 | return resource_class 44 | raise KeyError('unknown class %s' % name) 45 | 46 | @property 47 | def object_types(self) -> Sequence[ObjectType]: 48 | if not self._object_types: 49 | self._object_types = self._fetch_object_types() 50 | return self._object_types 51 | 52 | def get_object_type(self, name: str) -> Optional[ObjectType]: 53 | for resource_object in self.object_types: 54 | if resource_object.name == name: 55 | return resource_object 56 | raise KeyError('unknown object type %s' % name) 57 | 58 | def _fetch_classes(self) -> Sequence[ResourceClass]: 59 | metadata = get_metadata_data(self._http, 'class', resource=self.name) 60 | return self._classes_from_metadata(metadata) 61 | 62 | def _fetch_object_types(self) -> Sequence[ObjectType]: 63 | metadata = get_metadata_data(self._http, 'object', resource=self.name) 64 | return self._object_types_from_metadata(metadata) 65 | 66 | def _classes_from_metadata(self, classes_metadata: Sequence[dict]) -> Sequence[ResourceClass]: 67 | return tuple(ResourceClass(self, m, self._http) for m in classes_metadata) 68 | 69 | def _object_types_from_metadata(self, object_types_metadata: Sequence[dict]) -> Sequence[ObjectType]: 70 | return tuple(ObjectType(self, m, self._http) for m in object_types_metadata) 71 | 72 | def __repr__(self) -> str: 73 | return '' % self.name 74 | -------------------------------------------------------------------------------- /rets/client/resource_class.py: -------------------------------------------------------------------------------- 1 | from typing import FrozenSet, Mapping, Sequence, Union 2 | 3 | from rets.client.decoder import RecordDecoder 4 | from rets.client.record import Record 5 | from rets.client.utils import get_metadata_data 6 | from rets.errors import RetsClientError 7 | from rets.http import RetsHttpClient, SearchResult 8 | 9 | 10 | class ResourceClass: 11 | 12 | def __init__(self, resource, metadata: dict, http_client: RetsHttpClient): 13 | self.resource = resource 14 | self._http = http_client 15 | self._metadata = metadata 16 | self._table = metadata.get('_table') 17 | self._fields = None 18 | 19 | @property 20 | def name(self) -> str: 21 | return self._metadata['ClassName'] 22 | 23 | @property 24 | def has_key_index(self) -> bool: 25 | return self._metadata.get('HasKeyIndex') == '1' 26 | 27 | @property 28 | def metadata(self) -> dict: 29 | metadata = dict(self._metadata) 30 | if self._table: 31 | metadata['_table'] = self._table 32 | return metadata 33 | 34 | @property 35 | def table(self) -> Sequence[dict]: 36 | if self._table is None: 37 | self._table = tuple(get_metadata_data(self._http, 'table', resource=self.resource.name, class_=self.name)) 38 | return self._table 39 | 40 | @property 41 | def fields(self) -> FrozenSet[str]: 42 | if self._fields is None: 43 | self._fields = frozenset(field['SystemName'] for field in self.table) 44 | return self._fields 45 | 46 | def search(self, 47 | query: Union[str, Mapping[str, str]], 48 | fields: Sequence[str] = None, 49 | parse: bool = True, 50 | include_tz: bool = False, 51 | **kwargs) -> SearchResult: 52 | query = self._validate_query(query) 53 | if fields: 54 | fields = self._validate_fields(fields) 55 | 56 | result = self._http.search( 57 | resource=self.resource.name, 58 | class_=self.name, 59 | query=query, 60 | select=fields, 61 | **kwargs, 62 | ) 63 | 64 | if parse: 65 | decoder = RecordDecoder(self.table, include_tz) 66 | rows = decoder.decode(result.data) 67 | else: 68 | rows = result.data 69 | 70 | return SearchResult( 71 | count=result.count, 72 | max_rows=result.max_rows, 73 | data=tuple(Record(self, row) for row in rows) if rows else tuple(), 74 | ) 75 | 76 | def _validate_query(self, query: Union[str, Mapping[str, str]]) -> str: 77 | if isinstance(query, str): 78 | return query 79 | self._assert_fields(query) 80 | return ','.join('(%s=%s)' % (field, value) for field, value in query.items()) 81 | 82 | def _validate_fields(self, fields: Sequence[str]) -> str: 83 | self._assert_fields(fields) 84 | return ','.join(fields) 85 | 86 | def _assert_fields(self, fields: Sequence[str]) -> None: 87 | permissible = self.fields 88 | invalid = tuple(f for f in fields if f not in permissible) 89 | if invalid: 90 | raise RetsClientError('invalid fields %s' % ','.join(invalid)) 91 | 92 | def __repr__(self) -> str: 93 | return '' % (self.resource.name, self.name) 94 | -------------------------------------------------------------------------------- /rets/client/decoder.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | from collections import OrderedDict 4 | from datetime import datetime, time, timezone 5 | from decimal import Decimal 6 | from functools import partial 7 | from typing import Any, Sequence 8 | 9 | import udatetime 10 | 11 | from rets.errors import RetsParseError 12 | 13 | logger = logging.getLogger('rets') 14 | 15 | 16 | class RecordDecoder: 17 | 18 | def __init__(self, table: Sequence[dict], include_tz: bool = False): 19 | self._metadata_map = {field['SystemName']: field for field in table} 20 | self._include_tz = include_tz 21 | 22 | def decode(self, rows: Sequence[dict]) -> Sequence[dict]: 23 | if not rows: 24 | return () 25 | 26 | # Build dict of field to decoder functions, assuming that all rows have the same fields. 27 | decoders = self._build_decoders(tuple(rows[0].keys())) 28 | 29 | def decode_field(field: str, value: str) -> Any: 30 | if value == '': 31 | return None 32 | try: 33 | return decoders[field](value) 34 | except Exception as e: 35 | raise ValueError(f"Error decoding field {field} with value {value}. Error: {e}") from e 36 | 37 | return tuple(OrderedDict((field, decode_field(field, value)) for field, value in row.items()) 38 | for row in rows) 39 | 40 | def _build_decoders(self, fields: Sequence[str]) -> dict: 41 | decoders = {} 42 | for field in fields: 43 | try: 44 | field_metadata = self._metadata_map[field] 45 | except KeyError: 46 | logger.warning('field %s not found in table metadata', field) 47 | field_metadata = {'DataType': 'Character'} 48 | 49 | decoders[field] = _get_decoder( 50 | data_type=field_metadata['DataType'], 51 | interpretation=field_metadata.get('Interpretation', ''), 52 | include_tz=self._include_tz, 53 | ) 54 | 55 | return decoders 56 | 57 | 58 | def _get_decoder(data_type: str, interpretation: str, include_tz: bool = False): 59 | if interpretation == _LOOKUP_TYPE: 60 | return str 61 | elif interpretation in _LOOKUP_MULTI_TYPES: 62 | return lambda value: value.split(',') 63 | 64 | if data_type in _TIMEZONE_AWARE_DECODERS: 65 | return partial(_TIMEZONE_AWARE_DECODERS[data_type], include_tz=include_tz) 66 | 67 | try: 68 | return _DECODERS[data_type] 69 | except KeyError: 70 | raise RetsParseError('unknown data type %s' % data_type) from None 71 | 72 | 73 | def _decode_datetime(value: str, include_tz: bool) -> datetime: 74 | # Correct `0000-00-00` to `0000-00-00T00:00:00` 75 | if len(value) == 10: 76 | value = '%sT00:00:00' % value[0:10] 77 | # Correct `0000-00-00 00:00:00` to `0000-00-00T00:00:00` 78 | elif value[10] == ' ': 79 | value = '%sT%s' % (value[0:10], value[11:]) 80 | 81 | decoded = udatetime.from_string(value) 82 | if not include_tz: 83 | return decoded.astimezone(timezone.utc).replace(tzinfo=None) 84 | return decoded 85 | 86 | 87 | def _decode_time(value: str, include_tz: bool) -> time: 88 | decoded = _decode_datetime('1970-01-01T' + value, include_tz) 89 | return decoded.time().replace(tzinfo=decoded.tzinfo) 90 | 91 | 92 | def _decode_date(value: str, include_tz: bool) -> datetime: 93 | try: 94 | decoded = datetime.strptime(value, '%Y-%m-%d') 95 | return decoded 96 | except ValueError: 97 | return _decode_datetime(value, include_tz) 98 | 99 | 100 | _LOOKUP_TYPE = 'Lookup' 101 | 102 | _LOOKUP_MULTI_TYPES = frozenset(('LookupMulti', 'LookupBitstring', 'LookupBitmask')) 103 | 104 | _TIMEZONE_AWARE_DECODERS = { 105 | 'DateTime': _decode_datetime, 106 | 'Time': _decode_time, 107 | 'Date': _decode_date, 108 | } 109 | 110 | _DECODERS = { 111 | 'Boolean': lambda value: value == '1', 112 | 'Character': str, 113 | 'Tiny': int, 114 | 'Small': int, 115 | 'Int': int, 116 | 'Long': int, 117 | 'Decimal': Decimal, 118 | 'Number': int, 119 | # Point is new "Edm.GeographyPoint" from RESO, look online for spec. Can store as Postgres Point, see https://bit.ly/2BDPgUS 120 | 'Point': str, 121 | } 122 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![PyPI Version](https://badge.fury.io/py/rets-python.svg)](https://pypi.python.org/pypi/rets-python) 2 | [![Code Health](https://landscape.io/github/opendoor-labs/rets/master/landscape.svg?style=flat)](https://landscape.io/github/opendoor-labs/rets/master) 3 | [![Build Status](https://travis-ci.org/opendoor-labs/rets.svg?branch=master)](https://travis-ci.org/opendoor-labs/rets) 4 | [![Python Version](https://img.shields.io/pypi/pyversions/rets-python.svg)](https://pypi.python.org/pypi/rets-python) 5 | [![License](https://img.shields.io/pypi/l/rets-python.svg)](https://pypi.python.org/pypi/rets-python) 6 | 7 | # RETS Python 3 Client 8 | 9 | Python 3 client for the Real Estate Transaction Standard (RETS) Version 1.7.2. Supports Python 3.5 or later. 10 | 11 | ``` 12 | pip install rets-python 13 | ``` 14 | 15 | ## Example 16 | 17 | Standard usage 18 | 19 | ```python 20 | >>> from rets.client import RetsClient 21 | 22 | >>> client = RetsClient( 23 | login_url='http://my.rets.server/rets/login', 24 | username='username', 25 | password='password', 26 | # Ensure that you are using the right auth_type for this particular MLS 27 | # auth_type='basic', 28 | # Alternatively authenticate using user agent password 29 | # user_agent='rets-python/0.3', 30 | # user_agent_password='' 31 | ) 32 | 33 | >>> resource = client.get_resource('Property') 34 | 35 | >>> resource.key_field 36 | 'LIST_1' 37 | 38 | >>> resource_class = resource.get_class('A') 39 | 40 | >>> resource_class.has_key_index 41 | True 42 | 43 | >>> photo_object_type = resource.get_object_type('HiRes') 44 | 45 | >>> photo_object_type.mime_type 46 | 'image/jpeg' 47 | ``` 48 | 49 | You can retrieve listings by performing a search query on the ResourceClass object. The results 50 | will include associated search metadata. 51 | 52 | ```python 53 | >>> search_result = resource_class.search(query='(LIST_87=2017-01-01+)', limit=10) 54 | 55 | >>> search_result.count 56 | 11941 57 | 58 | >>> search_result.max_rows 59 | False 60 | 61 | >>> len(search_result.data) 62 | 10 63 | ``` 64 | 65 | The values returned by the search query will be automatically decoded into Python builtin types. 66 | 67 | ```python 68 | >>> listing = search_result.data[0] 69 | 70 | >>> listing.data 71 | { 72 | 'internal_listing_id': '20170104191513476022000000', 73 | 'mls_number': '5650160', 74 | 'mod_timestamp': datetime(2017, 8, 2, 12, 5, 17), 75 | 'list_date': datetime(2017, 8, 2), 76 | 'list_price': 250000, 77 | ... 78 | } 79 | 80 | >>> listing.data[listing.resource_class.resource.key_field] 81 | '20170104191513476022000000' 82 | ``` 83 | 84 | Photos can also be retrieved in bulk from the ObjectType object using the resource keys of the records. 85 | 86 | ```python 87 | >>> all_photos = photo_object_type.get( 88 | resource_keys=[listing.data[listing.resource_class.resource.key_field] for listing in listings], 89 | location=True, 90 | ) 91 | 92 | >>> len(all_photos) 93 | 232 94 | 95 | >>> all_photos[0] 96 | Object(mime_type='image/jpeg', content_id='20071218141725529770000000', description='Primary Photo', object_id='1', url='...', preferred=True, data=None) 97 | ``` 98 | 99 | Low level RETS HTTP client usage: 100 | 101 | ```python 102 | from rets.http import RetsHttpClient 103 | 104 | client = RetsHttpClient( 105 | login_url='http://my.rets.server/rets/login', 106 | username='username', 107 | password='password', 108 | # Alternatively authenticate using user agent password 109 | # user_agent='rets-python/0.3', 110 | # user_agent_password='' 111 | ) 112 | 113 | # Authenticate and fetch available transactions 114 | client.login() 115 | 116 | # See available Resources 117 | client.get_metadata('resource') 118 | 119 | # See available Classes for the Property resource 120 | client.get_metadata('class', resource='Property') 121 | 122 | # See the Table definition for Class A 123 | client.get_metadata('table', resource='Property', class_='A') 124 | 125 | # Get a sample of recent listings 126 | search_result = client.search( 127 | resource='Property', 128 | class_='A', 129 | query='(LIST_87=2017-01-01+)', 130 | select='LIST_87,LIST_105,LIST_1', 131 | limit=10, 132 | count=1, 133 | ) 134 | 135 | # Get the KeyField values of the listings 136 | resource_keys = [r['LIST_1'] for r in search_result.data] 137 | 138 | # Fetch the photo URLs for those recent listings 139 | objects = client.get_object( 140 | resource='Property', 141 | object_type='HiRes', 142 | resource_keys=resource_keys, 143 | location=True, 144 | ) 145 | ``` 146 | # Developing/Releasing 147 | To release a new version, use `bin/release ` 148 | 149 | This package is deployed to: https://pypi.org/manage/project/rets-python/releases/ 150 | 151 | To deploy, you can try `bin/deploy`, but it may give you a SSL error. Alternatively, to deploy, see: https://packaging.python.org/tutorials/packaging-projects/ or in summary: 152 | 153 | 154 | Update version number in the following files (0.4.10 -> 0.4.11) 155 | * setup.py 156 | * build/lib/rets/__init__.py 157 | * rets/__init__.py 158 | 159 | 160 | ``` 161 | python3 -m pip install --user --upgrade setuptools wheel 162 | python3 setup.py sdist bdist_wheel 163 | python3 -m pip install --user --upgrade twine 164 | python3 -m twine upload dist/* 165 | ``` 166 | -------------------------------------------------------------------------------- /rets/http/parsers/parse_object.py: -------------------------------------------------------------------------------- 1 | import mimetypes 2 | from typing import Optional, Sequence 3 | import cgi 4 | 5 | from requests import Response 6 | from requests.structures import CaseInsensitiveDict 7 | from requests_toolbelt.multipart.decoder import MultipartDecoder 8 | 9 | from rets.errors import RetsApiError, RetsResponseError 10 | from rets.http.data import Object 11 | from rets.http.parsers.parse import DEFAULT_ENCODING, ResponseLike, parse_xml 12 | 13 | 14 | def parse_object(response: Response) -> Sequence[Object]: 15 | """ 16 | Parse the response from a GetObject transaction. If there are multiple 17 | objects to be returned then the response should be a multipart response. 18 | The headers of the response (or each part in the multipart response) 19 | contains the metadata for the object, including the location if requested. 20 | The body of the response should contain the binary content of the object, 21 | an XML document specifying a transaction status code, or left empty. 22 | """ 23 | content_type = response.headers.get('content-type') 24 | 25 | if content_type and 'multipart/parallel' in content_type: 26 | return _parse_multipart(response) 27 | 28 | object_ = _parse_body_part(response) 29 | return (object_,) if object_ is not None else () 30 | 31 | 32 | def _parse_multipart(response: ResponseLike) -> Sequence[Object]: 33 | """ 34 | RFC 2045 describes the format of an Internet message body containing a MIME message. The 35 | body contains one or more body parts, each preceded by a boundary delimiter line, and the 36 | last one followed by a closing boundary delimiter line. After its boundary delimiter line, 37 | each body part then consists of a header area, a blank line, and a body area. 38 | 39 | HTTP/1.1 200 OK 40 | Server: Apache/2.0.13 41 | Date: Fri, 22 OCT 2004 12:03:38 GMT 42 | Cache-Control: private 43 | RETS-Version: RETS/1.7.2 44 | MIME-Version: 1.0 45 | Content-Type: multipart/parallel; boundary="simple boundary" 46 | 47 | --simple boundary 48 | Content-Type: image/jpeg 49 | Content-ID: 123456 50 | Object-ID: 1 51 | 52 | 53 | 54 | --simple boundary 55 | Content-Type: text/xml 56 | Content-ID: 123457 57 | Object-ID: 1 58 | 59 | 60 | 61 | --simple boundary-- 62 | """ 63 | encoding = response.encoding or DEFAULT_ENCODING 64 | multipart = MultipartDecoder.from_response(response, encoding) 65 | # We need to decode the headers because MultipartDecoder returns bytes keys and values, 66 | # while requests.Response.headers uses str keys and values. 67 | for part in multipart.parts: 68 | part.headers = _decode_headers(part.headers, encoding) 69 | 70 | objects = (_parse_body_part(part) for part in multipart.parts) 71 | return tuple(object_ for object_ in objects if object_ is not None) 72 | 73 | 74 | def _parse_body_part(part: ResponseLike) -> Optional[Object]: 75 | headers = part.headers 76 | 77 | content_id = headers.get('content-id') 78 | object_id = headers.get('object-id') 79 | preferred = 'preferred' in headers 80 | description = headers.get('content-description') 81 | location = headers.get('location') 82 | content_type = headers.get('content-type') 83 | mime_type = _parse_mime_type(content_type) if content_type else None 84 | 85 | # Check XML responses first, it may contain an error description. 86 | if mime_type == 'text/xml': 87 | try: 88 | parse_xml(part) 89 | except RetsApiError as e: 90 | if e.reply_code == 20403: # No object found 91 | return None 92 | elif e.reply_code == 20407: # Access to object not allowed 93 | return None 94 | raise 95 | 96 | # All RETS responses _must_ have `Content-ID` and `Object-ID` headers. 97 | if not content_id or not object_id: 98 | raise RetsResponseError(part.content, part.headers) 99 | 100 | # Respond with `Location` header redirect. 101 | if location: 102 | return Object( 103 | mime_type=_guess_mime_type(location) or mime_type, 104 | content_id=content_id, 105 | description=description, 106 | object_id=object_id, 107 | url=location, 108 | preferred=preferred, 109 | data=None, 110 | ) 111 | 112 | # Check the `Content-Type` header exists for object responses. 113 | if mime_type is None or mime_type == 'text/html': 114 | raise RetsResponseError(part.content, part.headers) 115 | 116 | return Object( 117 | mime_type=mime_type, 118 | content_id=content_id, 119 | description=description, 120 | object_id=object_id, 121 | url=None, 122 | preferred=preferred, 123 | data=part.content or None, 124 | ) 125 | 126 | 127 | def _guess_mime_type(location: str) -> Optional[str]: 128 | mime_type, _ = mimetypes.guess_type(location) 129 | return mime_type 130 | 131 | 132 | def _parse_mime_type(content_type: str) -> Optional[str]: 133 | # Parse mime type from content-type header, e.g. 'image/jpeg;charset=US-ASCII' -> 'image/jpeg' 134 | mime_type, _ = cgi.parse_header(content_type) 135 | return mime_type or None 136 | 137 | 138 | def _decode_headers(headers: CaseInsensitiveDict, encoding: str) -> CaseInsensitiveDict: 139 | return CaseInsensitiveDict({ 140 | k.decode(encoding): v.decode(encoding) 141 | for k, v in headers.items() 142 | }) 143 | -------------------------------------------------------------------------------- /tests/client/decoder_test.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, time, timedelta, timezone 2 | from decimal import Decimal 3 | 4 | import pytest 5 | 6 | from rets.client.decoder import ( 7 | RecordDecoder, 8 | _get_decoder, 9 | _decode_datetime, 10 | _decode_time, 11 | _decode_date, 12 | ) 13 | 14 | 15 | @pytest.fixture 16 | def decoder(): 17 | return RecordDecoder(({ 18 | 'SystemName': 'mls_number', 19 | 'DataType': 'Character', 20 | }, { 21 | 'SystemName': 'mod_timestamp', 22 | 'DataType': 'DateTime', 23 | }, { 24 | 'SystemName': 'list_date', 25 | 'DataType': 'Date', 26 | }, { 27 | 'SystemName': 'list_price', 28 | 'DataType': 'Int', 29 | })) 30 | 31 | 32 | def test_decode_rows(decoder): 33 | rows = decoder.decode(({ 34 | 'mls_number': '1', 35 | 'mod_timestamp': '2017-08-01T12:00:00', 36 | 'list_date': '2017-08-01', 37 | 'list_price': '150000', 38 | }, { 39 | 'mls_number': '2', 40 | 'mod_timestamp': '2017-08-02T12:00:00', 41 | 'list_date': '2017-08-02', 42 | 'list_price': '250000', 43 | })) 44 | 45 | assert rows == ({ 46 | 'mls_number': '1', 47 | 'mod_timestamp': datetime(2017, 8, 1, 12), 48 | 'list_date': datetime(2017, 8, 1), 49 | 'list_price': 150000, 50 | }, { 51 | 'mls_number': '2', 52 | 'mod_timestamp': datetime(2017, 8, 2, 12), 53 | 'list_date': datetime(2017, 8, 2), 54 | 'list_price': 250000, 55 | }) 56 | 57 | 58 | def test_decode_rows_missing_field(decoder): 59 | rows = decoder.decode(({ 60 | 'new_field': 'value', 61 | },)) 62 | 63 | assert rows == ({'new_field': 'value'},) 64 | 65 | 66 | def test_get_decoder(): 67 | parser = _get_decoder('Character', '') 68 | assert parser('test') == 'test' 69 | 70 | parser = _get_decoder('Boolean', '') 71 | assert parser('1') == True 72 | assert parser('0') == False 73 | 74 | parser = _get_decoder('Date', '') 75 | assert parser('2017-01-02') == datetime(2017, 1, 2) 76 | 77 | parser = _get_decoder('Tiny', '') 78 | assert parser('1') == 1 79 | 80 | parser = _get_decoder('Small', '') 81 | assert parser('1') == 1 82 | 83 | parser = _get_decoder('Int', '') 84 | assert parser('1') == 1 85 | 86 | parser = _get_decoder('Long', '') 87 | assert parser('1') == 1 88 | 89 | parser = _get_decoder('Decimal', '') 90 | assert parser('1.2345') == Decimal('1.2345') 91 | 92 | parser = _get_decoder('Character', 'Lookup') 93 | assert parser('test') == 'test' 94 | 95 | parser = _get_decoder('Character', 'LookupMulti') 96 | assert parser('a,b,c') == ['a', 'b', 'c'] 97 | 98 | parser = _get_decoder('Number', '') 99 | assert parser('214') == 214 100 | 101 | 102 | def test_decode_datetime(): 103 | assert _decode_datetime('2017-01-02T03:04:05', True) == \ 104 | datetime(2017, 1, 2, 3, 4, 5, tzinfo=timezone(timedelta(0))) 105 | # TODO: The standard specifies that the second fraction is limited to one 106 | # digit, however udatetime only permits 3 or 6 digits. 107 | assert _decode_datetime('2017-01-02T03:04:05.600', True) == \ 108 | datetime(2017, 1, 2, 3, 4, 5, 600000, tzinfo=timezone(timedelta(0))) 109 | assert _decode_datetime('2020-10-12 10:46:54.146488', True) == \ 110 | datetime(2020, 10, 12, 10, 46, 54, 146488, tzinfo=timezone(timedelta(0))) 111 | assert _decode_datetime('2017-01-02T03:04:05Z', True) == \ 112 | datetime(2017, 1, 2, 3, 4, 5, tzinfo=timezone(timedelta(0))) 113 | assert _decode_datetime('2017-01-02T03:04:05+00:00', True) == \ 114 | datetime(2017, 1, 2, 3, 4, 5, tzinfo=timezone(timedelta(0))) 115 | assert _decode_datetime('2017-01-02T03:04:05-00:00', True) == \ 116 | datetime(2017, 1, 2, 3, 4, 5, tzinfo=timezone(timedelta(0))) 117 | assert _decode_datetime('2017-01-02T03:04:05+07:08', True) == \ 118 | datetime(2017, 1, 2, 3, 4, 5, tzinfo=timezone(timedelta(hours=7, minutes=8))) 119 | assert _decode_datetime('2017-01-02T03:04:05.600+07:08', True) == \ 120 | datetime(2017, 1, 2, 3, 4, 5, 600000, tzinfo=timezone(timedelta(hours=7, minutes=8))) 121 | assert _decode_datetime('2017-01-02T03:04:05-07:08', True) == \ 122 | datetime(2017, 1, 2, 3, 4, 5, tzinfo=timezone(timedelta(hours=-7, minutes=-8))) 123 | assert _decode_datetime('2017-01-02T03:04:05', False) == \ 124 | datetime(2017, 1, 2, 3, 4, 5) 125 | assert _decode_datetime('2017-01-02T03:04:05.600', False) == \ 126 | datetime(2017, 1, 2, 3, 4, 5, 600000) 127 | assert _decode_datetime('2017-01-02 03:04:05.600', False) == \ 128 | datetime(2017, 1, 2, 3, 4, 5, 600000) 129 | assert _decode_datetime('2017-01-02T03:04:05Z', False) == datetime(2017, 1, 2, 3, 4, 5) 130 | assert _decode_datetime('2017-01-02T03:04:05+00:00', False) == datetime(2017, 1, 2, 3, 4, 5) 131 | assert _decode_datetime('2017-01-02T03:04:05-00:00', False) == datetime(2017, 1, 2, 3, 4, 5) 132 | assert _decode_datetime('2017-01-02T12:00:00+07:08', False) == datetime(2017, 1, 2, 4, 52) 133 | assert _decode_datetime('2017-01-02T12:00:00-07:08', False) == datetime(2017, 1, 2, 19, 8) 134 | assert _decode_datetime('2017-01-01 00:00:00', False) == datetime(2017, 1, 1, 0, 0) 135 | assert _decode_datetime('2017-01-01', False) == datetime(2017, 1, 1, 0, 0) 136 | 137 | 138 | def test_decode_time(): 139 | assert _decode_time('03:04:05', True) == time(3, 4, 5, tzinfo=timezone(timedelta(0))) 140 | # TODO: The standard specifies that the second fraction is limited to one 141 | # digit, however udatetime only permits 3 or 6 digits. 142 | assert _decode_time('03:04:05.600', True) == time(3, 4, 5, 600000, tzinfo=timezone(timedelta(0))) 143 | assert _decode_time('03:04:05Z', True) == time(3, 4, 5, tzinfo=timezone(timedelta(0))) 144 | assert _decode_time('03:04:05+00:00', True) == time(3, 4, 5, tzinfo=timezone(timedelta(0))) 145 | assert _decode_time('03:04:05-00:00', True) == time(3, 4, 5, tzinfo=timezone(timedelta(0))) 146 | assert _decode_time('03:04:05+07:08', True) == time(3, 4, 5, tzinfo=timezone(timedelta(hours=7, minutes=8))) 147 | assert _decode_time('03:04:05-07:08', True) == time(3, 4, 5, tzinfo=timezone(timedelta(hours=-7, minutes=-8))) 148 | assert _decode_time('03:04:05.600+07:08', True) == \ 149 | time(3, 4, 5, 600000, tzinfo=timezone(timedelta(hours=7, minutes=8))) 150 | assert _decode_time('03:04:05', False) == time(3, 4, 5) 151 | assert _decode_time('03:04:05.600', False) == time(3, 4, 5, 600000) 152 | assert _decode_time('03:04:05Z', False) == time(3, 4, 5) 153 | assert _decode_time('03:04:05+00:00', False) == time(3, 4, 5) 154 | assert _decode_time('03:04:05-00:00', False) == time(3, 4, 5) 155 | assert _decode_time('12:00:00+07:08', False) == time(4, 52) 156 | assert _decode_time('12:00:00-07:08', False) == time(19, 8) 157 | 158 | 159 | def test_decode_date(): 160 | assert _decode_date('2017-01-02T00:00:00.000', False) == datetime(2017, 1, 2, 0, 0, 0) 161 | assert _decode_date('2017-01-02', False) == datetime(2017, 1, 2, 0, 0, 0) 162 | -------------------------------------------------------------------------------- /tests/http/parsers/parse_multipart_test.py: -------------------------------------------------------------------------------- 1 | from rets import Object 2 | from rets.http.parsers import parse_object 3 | from tests.utils import make_response 4 | 5 | 6 | def test_parse_object_single_location_true(): 7 | headers = { 8 | 'Content-Type': 'image/jpeg;charset=US-ASCII', 9 | 'Content-ID': '20170817170218718581000000', 10 | 'Object-ID': '1', 11 | 'Location': 'http://cdn.rets.com/1.jpg', 12 | 'Content-Description': 'anthem', 13 | 'Preferred': '1', 14 | } 15 | body = b'' 16 | response = make_response(200, body, headers) 17 | 18 | assert parse_object(response) == ( 19 | Object( 20 | mime_type='image/jpeg', 21 | content_id='20170817170218718581000000', 22 | description='anthem', 23 | object_id='1', 24 | url='http://cdn.rets.com/1.jpg', 25 | preferred=True, 26 | data=None, 27 | ), 28 | ) 29 | 30 | 31 | def test_parse_object_single_location_false(): 32 | headers = { 33 | 'Content-Type': 'image/jpeg;charset=US-ASCII', 34 | 'Content-ID': '20170817170218718581000000', 35 | 'Object-ID': '1', 36 | 'Content-Description': 'anthem', 37 | 'Preferred': '1', 38 | } 39 | body = b'binary content' 40 | response = make_response(200, body, headers) 41 | 42 | assert parse_object(response) == ( 43 | Object( 44 | mime_type='image/jpeg', 45 | content_id='20170817170218718581000000', 46 | description='anthem', 47 | object_id='1', 48 | url=None, 49 | preferred=True, 50 | data=b'binary content', 51 | ), 52 | ) 53 | 54 | 55 | def test_parse_object_not_found(): 56 | headers = { 57 | 'Content-Type': 'text/xml;charset=US-ASCII', 58 | 'Content-ID': '201708171702187185810000009999', 59 | 'Object-ID': '1', 60 | 'Location': '', 61 | } 62 | body = b'' 63 | response = make_response(200, body, headers) 64 | 65 | assert parse_object(response) == () 66 | 67 | 68 | def test_parse_object_multi_location_true(): 69 | headers = { 70 | 'Content-Type': 'multipart/parallel;boundary="FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr";' 71 | 'charset=US-ASCII', 72 | } 73 | body = ( 74 | b'\r\n--FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr' 75 | b'\r\nContent-Type: image/jpeg' 76 | b'\r\nContent-ID: 20170817170218718581000000' 77 | b'\r\nObject-ID: 1' 78 | b'\r\nLocation: http://cdn.rets.com/1.jpg' 79 | b'\r\nContent-Description: anthem' 80 | b'\r\nPreferred: 1' 81 | b'\r\n\r\n' 82 | b'\r\n--FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr' 83 | b'\r\nContent-Type: image/jpeg' 84 | b'\r\nContent-ID: 20170817170218718581000000' 85 | b'\r\nObject-ID: 2' 86 | b'\r\nLocation: http://cdn.rets.com/2.jpg' 87 | b'\r\nContent-Description: anthem2' 88 | b'\r\n\r\n' 89 | b'\r\n--FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr--' 90 | b'\r\n' 91 | ) 92 | response = make_response(200, body, headers) 93 | 94 | assert parse_object(response) == ( 95 | Object( 96 | mime_type='image/jpeg', 97 | content_id='20170817170218718581000000', 98 | description='anthem', 99 | object_id='1', 100 | url='http://cdn.rets.com/1.jpg', 101 | preferred=True, 102 | data=None, 103 | ), 104 | Object( 105 | mime_type='image/jpeg', 106 | content_id='20170817170218718581000000', 107 | description='anthem2', 108 | object_id='2', 109 | url='http://cdn.rets.com/2.jpg', 110 | preferred=False, 111 | data=None, 112 | ), 113 | ) 114 | 115 | 116 | def test_parse_object_multi_location_false(): 117 | headers = { 118 | 'Content-Type': 'multipart/parallel;boundary="FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr";' 119 | 'charset=US-ASCII', 120 | } 121 | body = ( 122 | b'\r\n--FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr' 123 | b'\r\nContent-Type: image/jpeg' 124 | b'\r\nContent-ID: 20170817170218718581000000' 125 | b'\r\nObject-ID: 1' 126 | b'\r\nContent-Description: anthem' 127 | b'\r\nPreferred: 1' 128 | b'\r\n' 129 | b'\r\nbinary content 1' 130 | b'\r\n--FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr' 131 | b'\r\nContent-Type: image/jpeg' 132 | b'\r\nContent-ID: 20170817170218718581000000' 133 | b'\r\nObject-ID: 2' 134 | b'\r\nContent-Description: anthem2' 135 | b'\r\n' 136 | b'\r\nbinary content 2' 137 | b'\r\n--FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr--' 138 | b'\r\n' 139 | ) 140 | response = make_response(200, body, headers) 141 | 142 | assert parse_object(response) == ( 143 | Object( 144 | mime_type='image/jpeg', 145 | content_id='20170817170218718581000000', 146 | description='anthem', 147 | object_id='1', 148 | url=None, 149 | preferred=True, 150 | data=b'binary content 1', 151 | ), 152 | Object( 153 | mime_type='image/jpeg', 154 | content_id='20170817170218718581000000', 155 | description='anthem2', 156 | object_id='2', 157 | url=None, 158 | preferred=False, 159 | data=b'binary content 2', 160 | ), 161 | ) 162 | 163 | 164 | def test_parse_object_no_encoding(): 165 | # Note: there is no charset in the content-type 166 | headers = { 167 | 'Content-Type': 'multipart/parallel;boundary="FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr"' 168 | } 169 | body = ( 170 | b'\r\n--FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr' 171 | b'\r\nContent-Type: image/jpeg' 172 | b'\r\nContent-ID: 20170817170218718581000000' 173 | b'\r\nObject-ID: 1' 174 | b'\r\nLocation: http://cdn.rets.com/1.jpg' 175 | b'\r\nContent-Description: anthem' 176 | b'\r\nPreferred: 1' 177 | b'\r\n\r\n' 178 | b'\r\n--FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr--' 179 | b'\r\n' 180 | ) 181 | response = make_response(200, body, headers) 182 | 183 | assert parse_object(response) == ( 184 | Object( 185 | mime_type='image/jpeg', 186 | content_id='20170817170218718581000000', 187 | description='anthem', 188 | object_id='1', 189 | url='http://cdn.rets.com/1.jpg', 190 | preferred=True, 191 | data=None, 192 | ), 193 | ) 194 | 195 | 196 | def test_parse_object_location_true_content_type_xml(): 197 | headers = { 198 | 'Content-Type': 'multipart/parallel; boundary=2ce97979.83bf.368b.86c2.cc9295f41e3d', 199 | } 200 | body = ( 201 | b'\r\n--2ce97979.83bf.368b.86c2.cc9295f41e3d' 202 | b'\r\nContent-ID: 8240151' 203 | b'\r\nObject-ID: 1' 204 | b'\r\nLocation: http://cdn.rets.com/1.jpg' 205 | b'\r\nContent-Description: Welcome Home!' 206 | b'\r\nContent-Type: text/xml' 207 | b'\r\n' 208 | b'\r\n\r\n\r\n' 209 | b'\r\n--2ce97979.83bf.368b.86c2.cc9295f41e3d--' 210 | b'\r\n' 211 | ) 212 | response = make_response(200, body, headers) 213 | 214 | assert parse_object(response) == ( 215 | Object( 216 | mime_type='image/jpeg', 217 | content_id='8240151', 218 | description='Welcome Home!', 219 | object_id='1', 220 | url='http://cdn.rets.com/1.jpg', 221 | preferred=False, 222 | data=None, 223 | ), 224 | ) 225 | -------------------------------------------------------------------------------- /rets/http/parsers/parse.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from itertools import zip_longest 3 | from typing import Iterable, Sequence, Tuple, Union 4 | from lxml import etree 5 | 6 | from requests import Response 7 | from requests_toolbelt.multipart.decoder import BodyPart 8 | 9 | from rets.errors import RetsParseError, RetsApiError, RetsResponseError 10 | from rets.http.data import Metadata, SearchResult, SystemMetadata 11 | 12 | DEFAULT_ENCODING = 'utf-8' 13 | 14 | ResponseLike = Union[Response, BodyPart] 15 | 16 | 17 | def parse_xml(response: ResponseLike) -> etree.Element: 18 | encoding = response.encoding or DEFAULT_ENCODING 19 | try: 20 | root = etree.fromstring(response.content.decode(encoding), parser=etree.XMLParser(recover=True)) 21 | except ValueError as e: 22 | if str(e) == "Unicode strings with encoding declaration are not supported. Please use bytes input or XML fragments without declaration.": 23 | # parse bytes directly, rather than from string 24 | root = etree.XML(response.content) 25 | else: 26 | raise e 27 | 28 | if root is None: 29 | raise RetsResponseError(response.content, response.headers) 30 | 31 | reply_code, reply_text = _parse_rets_status(root) 32 | if reply_code and reply_text != "Operation Successful": 33 | raise RetsApiError(reply_code, reply_text, response.content) 34 | 35 | return root 36 | 37 | 38 | def parse_capability_urls(response: Response) -> dict: 39 | """ 40 | Parses the list of capability URLs from the response of a successful Login transaction. 41 | 42 | The capability url list is the set of functions or URLs to which the Login grants access. 43 | A capability consists of a key and a URL. The list returned from the server in the login 44 | reply must include URLs for Search, Login, and GetMetadata, and optionally may include 45 | URLs for Action, ChangePassword, GetObject, LoginComplete, Logout, ServerInformation, 46 | and Update. 47 | 48 | 49 | 50 | MemberName=member_name 51 | User=user_id,user_level,user_class,agent_code 52 | Broker=RETSOFFIC 53 | MetadataVersion=01.09.02991 54 | MetadataTimestamp=2016-11-24T05:24:06Z 55 | MinMetadataTimestamp=2016-11-24T05:24:06Z 56 | Login=/rets2_1/Login 57 | Search=/rets2_1/Search 58 | GetMetadata=/rets2_1/GetMetadata 59 | GetObject=/rets2_1/GetObject 60 | Logout=/rets2_1/Logout 61 | 62 | 63 | """ 64 | elem = parse_xml(response) 65 | response_elem = elem.find('RETS-RESPONSE') 66 | if response_elem is None: 67 | return {} 68 | raw_arguments = response_elem.text.strip().split('\n') 69 | return dict((s.strip() for s in arg.split('=', 1)) for arg in raw_arguments) 70 | 71 | 72 | def parse_metadata(response: Response) -> Sequence[Metadata]: 73 | """ 74 | Parse the information from a GetMetadata transaction. 75 | 76 | 77 | 78 | ResourceID StandardName 79 | ActiveAgent ActiveAgent 80 | Office Office 81 | OpenHouse OpenHouse 82 | Property Property 83 | RentalSchedule RentalSchedule 84 | 85 | 86 | """ 87 | elem = parse_xml(response) 88 | metadata_elems = [e for e in elem.findall('*') if e.tag.startswith('METADATA-')] 89 | if metadata_elems is None: 90 | return () 91 | 92 | def parse_metadata_elem(elem: etree.Element) -> Metadata: 93 | """ Parses a single element """ 94 | return Metadata( 95 | type_=elem.tag.split('-', 1)[1], 96 | resource=elem.get('Resource'), 97 | class_=elem.get('Class'), 98 | data=tuple(_parse_data(elem)), 99 | ) 100 | 101 | return tuple(parse_metadata_elem(metadata_elem) for metadata_elem in metadata_elems) 102 | 103 | 104 | def parse_system(response: Response) -> SystemMetadata: 105 | """ 106 | Parse the server system information from a SYSTEM GetMetadata transaction. 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | """ 115 | elem = parse_xml(response) 116 | metadata_system_elem = _find_or_raise(elem, 'METADATA-SYSTEM') 117 | system_elem = _find_or_raise(metadata_system_elem, 'SYSTEM') 118 | comments_elem = metadata_system_elem.find('COMMENTS') 119 | return SystemMetadata( 120 | system_id=system_elem.get('SystemID'), 121 | system_description=system_elem.get('SystemDescription'), 122 | system_date=metadata_system_elem.get('Date'), 123 | system_version=metadata_system_elem.get('Version'), 124 | 125 | # Optional fields 126 | time_zone_offset=system_elem.get('TimeZoneOffset'), 127 | comments=comments_elem and (comments_elem.text or None), 128 | ) 129 | 130 | 131 | def parse_search(response: Response) -> SearchResult: 132 | try: 133 | elem = parse_xml(response) 134 | except RetsApiError as e: 135 | if e.reply_code == 20201: # No records found 136 | return SearchResult(0, False, ()) 137 | raise 138 | 139 | count_elem = elem.find('COUNT') 140 | if count_elem is not None: 141 | count = int(count_elem.get('Records')) 142 | else: 143 | count = None 144 | 145 | try: 146 | data = tuple(_parse_data(elem)) 147 | except RetsParseError: 148 | data = None 149 | 150 | return SearchResult( 151 | count=count, 152 | # python xml.etree.ElementTree.Element objects are always considered false-y 153 | max_rows=elem.find('MAXROWS') is not None, 154 | data=data, 155 | ) 156 | 157 | 158 | def _parse_rets_status(root: etree.Element) -> Tuple[int, str]: 159 | """ 160 | If RETS-STATUS exists, the client must use this instead 161 | of the status from the body-start-line 162 | """ 163 | rets_status = root.find('RETS-STATUS') 164 | elem = rets_status if rets_status is not None else root 165 | return int(elem.get('ReplyCode')), elem.get('ReplyText') 166 | 167 | 168 | def _parse_data(elem: etree.Element) -> Iterable[dict]: 169 | """ 170 | Parses a generic container element enclosing a single COLUMNS and multiple DATA elems, and 171 | returns a generator of dicts with keys given by the COLUMNS elem and values given by each 172 | DATA elem. The container elem may optionally contain a DELIMITER elem to define the delimiter 173 | used, otherwise a default of '\t' is assumed. 174 | 175 | 176 | 177 | LIST_87 LIST_105 LIST_1 178 | 2016-12-01T00:08:10 5489015 20160824051756837742000000 179 | 2016-12-01T00:10:02 5497756 20160915055426038684000000 180 | 2016-12-01T00:10:26 5528935 20161123230848928777000000 181 | 2016-12-01T00:10:52 5528955 20161123234916869427000000 182 | 2016-12-01T00:14:31 5530021 20161127221848669500000000 183 | 184 | """ 185 | delimiter = _parse_delimiter(elem) 186 | 187 | columns_elem = _find_or_raise(elem, 'COLUMNS') 188 | columns = _parse_data_line(columns_elem, delimiter) 189 | 190 | data_elems = elem.findall('DATA') 191 | 192 | return (OrderedDict(zip_longest(columns, _parse_data_line(data, delimiter))) 193 | for data in data_elems) 194 | 195 | 196 | def _find_or_raise(elem: etree.Element, child_elem_name: str) -> etree.Element: 197 | child = elem.find(child_elem_name) 198 | if child is None: 199 | raise RetsParseError('Missing %s element' % child_elem_name) 200 | return child 201 | 202 | 203 | def _parse_data_line(elem: etree.Element, delimiter: str = '\t') -> Sequence[str]: 204 | # DATA elems using the COMPACT format and COLUMN elems all start and end with delimiters 205 | return elem.text.split(delimiter)[1:-1] 206 | 207 | 208 | def _parse_delimiter(elem: etree.Element) -> str: 209 | delimiter_elem = elem.find('DELIMITER') 210 | if delimiter_elem is None: 211 | return '\t' 212 | return chr(int(delimiter_elem.get('value'))) 213 | -------------------------------------------------------------------------------- /tests/http/parsers/parse_object_test.py: -------------------------------------------------------------------------------- 1 | from requests.structures import CaseInsensitiveDict 2 | from rets import Object 3 | from rets.http.parsers.parse_object import parse_object, _guess_mime_type, _parse_mime_type 4 | from tests.utils import make_response 5 | 6 | 7 | def test_parse_object_single_location_true(): 8 | headers = { 9 | 'Content-Type': 'image/jpeg;charset=US-ASCII', 10 | 'Content-ID': '20170817170218718581000000', 11 | 'Object-ID': '1', 12 | 'Location': 'http://cdn.rets.com/1.jpg', 13 | 'Content-Description': 'anthem', 14 | 'Preferred': '1', 15 | } 16 | body = b'' 17 | response = make_response(200, body, headers) 18 | 19 | assert parse_object(response) == ( 20 | Object( 21 | mime_type='image/jpeg', 22 | content_id='20170817170218718581000000', 23 | description='anthem', 24 | object_id='1', 25 | url='http://cdn.rets.com/1.jpg', 26 | preferred=True, 27 | data=None, 28 | ), 29 | ) 30 | 31 | 32 | def test_parse_object_single_location_false(): 33 | headers = { 34 | 'Content-Type': 'image/jpeg;charset=US-ASCII', 35 | 'Content-ID': '20170817170218718581000000', 36 | 'Object-ID': '1', 37 | 'Content-Description': 'anthem', 38 | 'Preferred': '1', 39 | } 40 | body = b'binary content' 41 | response = make_response(200, body, headers) 42 | 43 | assert parse_object(response) == ( 44 | Object( 45 | mime_type='image/jpeg', 46 | content_id='20170817170218718581000000', 47 | description='anthem', 48 | object_id='1', 49 | url=None, 50 | preferred=True, 51 | data=b'binary content', 52 | ), 53 | ) 54 | 55 | 56 | def test_parse_object_not_found(): 57 | headers = { 58 | 'Content-Type': 'text/xml;charset=US-ASCII', 59 | 'Content-ID': '201708171702187185810000009999', 60 | 'Object-ID': '1', 61 | 'Location': '', 62 | } 63 | body = b'' 64 | response = make_response(200, body, headers) 65 | 66 | assert parse_object(response) == () 67 | 68 | 69 | def test_parse_object_multi_location_true(): 70 | headers = { 71 | 'Content-Type': 'multipart/parallel;boundary="FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr";' 72 | 'charset=US-ASCII', 73 | } 74 | body = ( 75 | b'\r\n--FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr' 76 | b'\r\nContent-Type: image/jpeg' 77 | b'\r\nContent-ID: 20170817170218718581000000' 78 | b'\r\nObject-ID: 1' 79 | b'\r\nLocation: http://cdn.rets.com/1.jpg' 80 | b'\r\nContent-Description: anthem' 81 | b'\r\nPreferred: 1' 82 | b'\r\n\r\n' 83 | b'\r\n--FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr' 84 | b'\r\nContent-Type: image/jpeg' 85 | b'\r\nContent-ID: 20170817170218718581000000' 86 | b'\r\nObject-ID: 2' 87 | b'\r\nLocation: http://cdn.rets.com/2.jpg' 88 | b'\r\nContent-Description: anthem2' 89 | b'\r\n\r\n' 90 | b'\r\n--FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr--' 91 | b'\r\n' 92 | ) 93 | response = make_response(200, body, headers) 94 | 95 | assert parse_object(response) == ( 96 | Object( 97 | mime_type='image/jpeg', 98 | content_id='20170817170218718581000000', 99 | description='anthem', 100 | object_id='1', 101 | url='http://cdn.rets.com/1.jpg', 102 | preferred=True, 103 | data=None, 104 | ), 105 | Object( 106 | mime_type='image/jpeg', 107 | content_id='20170817170218718581000000', 108 | description='anthem2', 109 | object_id='2', 110 | url='http://cdn.rets.com/2.jpg', 111 | preferred=False, 112 | data=None, 113 | ), 114 | ) 115 | 116 | 117 | def test_parse_object_multi_location_false(): 118 | headers = { 119 | 'Content-Type': 'multipart/parallel;boundary="FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr";' 120 | 'charset=US-ASCII', 121 | } 122 | body = ( 123 | b'\r\n--FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr' 124 | b'\r\nContent-Type: image/jpeg' 125 | b'\r\nContent-ID: 20170817170218718581000000' 126 | b'\r\nObject-ID: 1' 127 | b'\r\nContent-Description: anthem' 128 | b'\r\nPreferred: 1' 129 | b'\r\n' 130 | b'\r\nbinary content 1' 131 | b'\r\n--FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr' 132 | b'\r\nContent-Type: image/jpeg' 133 | b'\r\nContent-ID: 20170817170218718581000000' 134 | b'\r\nObject-ID: 2' 135 | b'\r\nContent-Description: anthem2' 136 | b'\r\n' 137 | b'\r\nbinary content 2' 138 | b'\r\n--FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr--' 139 | b'\r\n' 140 | ) 141 | response = make_response(200, body, headers) 142 | 143 | assert parse_object(response) == ( 144 | Object( 145 | mime_type='image/jpeg', 146 | content_id='20170817170218718581000000', 147 | description='anthem', 148 | object_id='1', 149 | url=None, 150 | preferred=True, 151 | data=b'binary content 1', 152 | ), 153 | Object( 154 | mime_type='image/jpeg', 155 | content_id='20170817170218718581000000', 156 | description='anthem2', 157 | object_id='2', 158 | url=None, 159 | preferred=False, 160 | data=b'binary content 2', 161 | ), 162 | ) 163 | 164 | 165 | def test_parse_object_no_encoding(): 166 | # Note: there is no charset in the content-type 167 | headers = { 168 | 'Content-Type': 'multipart/parallel;boundary="FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr"' 169 | } 170 | body = ( 171 | b'\r\n--FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr' 172 | b'\r\nContent-Type: image/jpeg' 173 | b'\r\nContent-ID: 20170817170218718581000000' 174 | b'\r\nObject-ID: 1' 175 | b'\r\nLocation: http://cdn.rets.com/1.jpg' 176 | b'\r\nContent-Description: anthem' 177 | b'\r\nPreferred: 1' 178 | b'\r\n\r\n' 179 | b'\r\n--FLEX1t7l9O45tdFUw2e92ASD3qKPxB0lf0Wo7atUz9qlAFoQdBGpDr--' 180 | b'\r\n' 181 | ) 182 | response = make_response(200, body, headers) 183 | 184 | assert parse_object(response) == ( 185 | Object( 186 | mime_type='image/jpeg', 187 | content_id='20170817170218718581000000', 188 | description='anthem', 189 | object_id='1', 190 | url='http://cdn.rets.com/1.jpg', 191 | preferred=True, 192 | data=None, 193 | ), 194 | ) 195 | 196 | 197 | def test_parse_object_location_true_content_type_xml(): 198 | headers = { 199 | 'Content-Type': 'multipart/parallel; boundary=2ce97979.83bf.368b.86c2.cc9295f41e3d', 200 | } 201 | body = ( 202 | b'\r\n--2ce97979.83bf.368b.86c2.cc9295f41e3d' 203 | b'\r\nContent-ID: 8240151' 204 | b'\r\nObject-ID: 1' 205 | b'\r\nLocation: http://cdn.rets.com/1.jpg' 206 | b'\r\nContent-Description: Welcome Home!' 207 | b'\r\nContent-Type: text/xml' 208 | b'\r\n' 209 | b'\r\n\r\n\r\n' 210 | b'\r\n--2ce97979.83bf.368b.86c2.cc9295f41e3d--' 211 | b'\r\n' 212 | ) 213 | response = make_response(200, body, headers) 214 | 215 | assert parse_object(response) == ( 216 | Object( 217 | mime_type='image/jpeg', 218 | content_id='8240151', 219 | description='Welcome Home!', 220 | object_id='1', 221 | url='http://cdn.rets.com/1.jpg', 222 | preferred=False, 223 | data=None, 224 | ), 225 | ) 226 | 227 | 228 | def test_guess_mime_type(): 229 | # Can guess from URL extension 230 | assert 'image/jpeg' == _guess_mime_type('http://cdn.rets.com/1.jpg') 231 | assert 'image/png' == _guess_mime_type('http://cdn.rets.com/1.png') 232 | assert 'application/pdf' == _guess_mime_type('http://cdn.rets.com/1.pdf') 233 | assert None == _guess_mime_type('') 234 | 235 | 236 | def test_parse_mime_type(): 237 | # Can guess from content type 238 | assert 'image/jpeg' == _parse_mime_type('image/jpeg') 239 | assert 'image/jpeg' == _parse_mime_type('image/jpeg;charset=US-ASCII') 240 | assert None == _parse_mime_type('') 241 | -------------------------------------------------------------------------------- /rets/http/client.py: -------------------------------------------------------------------------------- 1 | from hashlib import md5 2 | from typing import Any, Mapping, Sequence, Union 3 | from urllib.parse import urljoin, urlsplit, urlunsplit, urlencode 4 | 5 | import requests 6 | from requests import Response 7 | from requests.auth import AuthBase, HTTPBasicAuth, HTTPDigestAuth 8 | 9 | from rets.http.parsers import ( 10 | parse_capability_urls, 11 | parse_metadata, 12 | parse_object, 13 | parse_search, 14 | parse_system, 15 | ) 16 | from rets.http.data import Object, Metadata, SearchResult, SystemMetadata 17 | from rets.errors import RetsApiError, RetsClientError 18 | 19 | 20 | class RetsHttpClient: 21 | 22 | def __init__(self, 23 | login_url: str, 24 | username: str = None, 25 | password: str = None, 26 | auth_type: str = 'digest', 27 | user_agent: str = 'rets-python/0.3', 28 | user_agent_password: str = '', 29 | rets_version: str = '1.7.2', 30 | capability_urls: str = None, 31 | cookie_dict: dict = None, 32 | use_get_method: bool = False, 33 | send_rets_ua_authorization: bool = True, 34 | ): 35 | self._user_agent = user_agent 36 | self._user_agent_password = user_agent_password 37 | self._rets_version = rets_version 38 | self._use_get_method = use_get_method 39 | self._send_rets_ua_authorization = send_rets_ua_authorization 40 | 41 | splits = urlsplit(login_url) 42 | self._base_url = urlunsplit((splits.scheme, splits.netloc, '', '', '')) 43 | self._capabilities = capability_urls or { 44 | 'Login': splits.path, 45 | } 46 | 47 | # Authenticate using either the user agent auth header and (basic or digest) HTTP auth. 48 | # SFARMLS (San Francisco) uses both methods together. 49 | if username and password: 50 | self._http_auth = _get_http_auth(username, password, auth_type) 51 | else: 52 | self._http_auth = None 53 | 54 | # we use a session to keep track of cookies that are required for certain MLSes 55 | self._session = requests.Session() 56 | 57 | # The user may provide an optional cookie_dict argument, which will be used on first login. 58 | # When sending cookies (with a session_id) to the login url, the same cookie (session_id) 59 | # is returned, which (most likely) means no additional login is created. 60 | if cookie_dict: 61 | for name, value in cookie_dict.items(): 62 | self._session.cookies.set(name, value=value) 63 | 64 | # this session id is part of the rets standard for use with a user agent password 65 | self._rets_session_id = '' 66 | 67 | @property 68 | def user_agent(self) -> str: 69 | """ 70 | This header field contains information about the user agent originating the request. 71 | This is for statistical purposes, the tracing of protocol violations, and automated 72 | recognition of user agents for the sake of tailoring responses to avoid particular user 73 | agent limitations, as well as providing enhanced capabilities to some user-agents. All 74 | client requests MUST include this field. This is a standard HTTP header field as defined 75 | in RFC 2616. 76 | """ 77 | return self._user_agent 78 | 79 | @property 80 | def rets_version(self) -> str: 81 | """ 82 | The client MUST send the RETS-Version. The convention used is a ".." 83 | numbering scheme similar to the HTTP Version in Section 3.1 of RFC 2616. The version of a 84 | RETS message is indicated by a RETS-Version field in the header of the message. 85 | """ 86 | return 'RETS/' + self._rets_version 87 | 88 | @property 89 | def capability_urls(self) -> dict: 90 | return self._capabilities 91 | 92 | @property 93 | def cookie_dict(self) -> dict: 94 | """Keeps the last value in case of duplicate keys.""" 95 | cookie_d = {} 96 | for k, v in self._session.cookies.iteritems(): 97 | cookie_d[k] = v 98 | return cookie_d 99 | 100 | def login(self) -> dict: 101 | response = self._http_request(self._url_for('Login')) 102 | self._capabilities = parse_capability_urls(response) 103 | return self._capabilities 104 | 105 | def logout(self) -> None: 106 | self._http_request(self._url_for('Logout')) 107 | self._session = None 108 | 109 | def get_system_metadata(self) -> SystemMetadata: 110 | return parse_system(self._get_metadata('system')) 111 | 112 | def get_metadata(self, 113 | type_: str, 114 | resource: str = None, 115 | class_: str = None, 116 | metadata_id: str = '0', 117 | ) -> Sequence[Metadata]: 118 | if resource: 119 | id_ = ':'.join(filter(None, [resource, class_])) 120 | else: 121 | id_ = metadata_id 122 | 123 | try: 124 | return parse_metadata(self._get_metadata(type_, id_)) 125 | except RetsApiError as e: 126 | if e.reply_code in (20502, 20503): # No metadata exists. 127 | return () 128 | raise 129 | 130 | def _get_metadata(self, type_: str, metadata_id: str = '0') -> Response: 131 | """ 132 | :param type_: The type of metadata being requested. The Type MUST begin with METADATA and 133 | may be one of the defined metadata types (see Section 11). 134 | 135 | :param metadata_id: If the last metadata_id is 0 (zero), then the request is for all Type 136 | metadata contained within that level; if the last metadata_id is '*', then the request 137 | is for all Type metadata contained within that level and all metadata Types contained 138 | within the requested Type. This means that for a metadata-id of METADATA-SYSTEM, for 139 | example, the server is expected to return all metadata. 140 | 141 | Note: The metadata_id for METADATA-SYSTEM and METADATA-RESOURCE must be 0 or *. 142 | """ 143 | payload = { 144 | 'Type': 'METADATA-' + type_.upper(), 145 | 'ID': metadata_id, 146 | 'Format': 'COMPACT', 147 | } 148 | return self._http_request(self._url_for('GetMetadata'), payload=payload) 149 | 150 | def search(self, 151 | resource: str, 152 | class_: str, 153 | query: str, 154 | select: str = None, 155 | count: int = 1, 156 | limit: int = None, 157 | offset: int = 1, 158 | restricted_indicator: str = None, 159 | standard_names: bool = False, 160 | query_type: str = 'DMQL2', 161 | format_: str = 'COMPACT-DECODED', 162 | ) -> SearchResult: 163 | """ 164 | The Search transaction requests that the server search one or more searchable databases 165 | and return the list of qualifying records. The body of the response contains the records 166 | matching the query, presented in the requested format. 167 | 168 | :param resource: The type of search to perform as discussed in Section 7.1 and defined 169 | in the Metadata (see section 11.2.2). 170 | 171 | :param class_: This parameter is set to a value that represents the class of data within 172 | the resource, taken from the Class metadata (section 11.3.1). If the resource has no 173 | classes, the class_ parameter will be ignored by the server and may be omitted by the 174 | client. If the client does include the class_ parameter for a classless search, the 175 | value should be the same as the resource in order to insure forward compatibility. 176 | 177 | :param query: 178 | 179 | :param count: If this argument is set to one '1', then a record-count is returned in the 180 | response in addition to the data. Note that on some servers this will cause the 181 | search to take longer since the count must be returned before any records are 182 | received. If this entry is set to two '2' then only a record-count is returned; no 183 | data is returned, but all matches are counted regardless of any Offset or Limit 184 | parameter. If the Count argument is not present or set to zero '0' then no record 185 | count is returned. 186 | 187 | :param limit: 188 | 189 | :param offset: 190 | 191 | :param restricted_indicator: 192 | 193 | :param standard_names: Queries may use either standard names or system names in the query 194 | (Section 7.7). If the client chooses to use standard names, it must indicate this 195 | using the standard_names argument. If this entry is set to '0' or is not present the 196 | field names passed in the search are the SystemNames, as defined in the metadata. If 197 | this entry is set to '1' then the StandardNames are used for the field names passed 198 | in the search. The StandardName designation applies to all names used in the query: 199 | SearchType, Class, Query and Select. 200 | 201 | :param format_: 'COMPACT' means a field list followed by a delimited set of the 202 | data fields . 'COMPACT-DECODED' is the same as COMPACT except the data for any 203 | field with an interpretation of Lookup, LookupMulti, LookupBitString or LookupBitMask, 204 | is returned in a fully-decoded format using the LongValue. See Section 13 for more 205 | information on the COMPACT formats and section 11.4.3 for more information on the 206 | Lookup types. 'STANDARD-XML' means an XML presentation of the data in the format 207 | defined by the RETS Data XML DTD. Servers MUST support all formats. If the format is 208 | not specified, the server MUST return STANDARD-XML. 209 | """ 210 | raw_payload = { 211 | 'SearchType': resource, 212 | 'Class': class_, 213 | 'Query': query, 214 | 'QueryType': query_type, 215 | 'Select': select, 216 | 'Count': count, 217 | 'Limit': limit or 'NONE', 218 | 'Offset': offset, 219 | 'RestrictedIndicator': restricted_indicator, 220 | 'StandardNames': int(standard_names), 221 | 'Format': format_, 222 | } 223 | # None values indicate that the argument should be omitted from the request 224 | payload = {k: v for k, v in raw_payload.items() if v is not None} 225 | 226 | response = self._http_request(self._url_for('Search'), payload=payload) 227 | return parse_search(response) 228 | 229 | def get_object(self, 230 | resource: str, 231 | object_type: str, 232 | resource_keys: Union[str, Mapping[str, Any], Sequence[str]], 233 | media_types: Union[str, Sequence[str]] = '*/*', 234 | location: bool = False, 235 | ) -> Sequence[Object]: 236 | """ 237 | The GetObject transaction is used to retrieve structured information related to known 238 | system entities. It can be used to retrieve multimedia files and other key-related 239 | information. Objects requested and returned from this transaction are requested and 240 | returned as MIME media types. The message body for successful retrievals contains only 241 | the objects in the specified MIME media type. Error responses follow the normal response 242 | format (section 3.9). 243 | 244 | :param resource: A resource defined in the metadata dictionary (see Section 11.2.2). The 245 | resource from which the object should be retrieved is specified by this entry. For 246 | more information see 5.9. The resource must be a resource defined in the metadata 247 | (section 11.4.1). 248 | 249 | :param object_type: The object type as defined in the metadata (see Section 11.4.1). The 250 | grouping category to which the object belongs. The type must be an ObjectType defined 251 | in the Object metadata for this Resource. For more information see section 11.4.1. 252 | 253 | :param resource_keys: A single value or a list-like or dict-like container specifying the 254 | entities of the resource to retrieve objects for, where the entity is given by the 255 | KeyField of the resource. If the resource_ids is a value or is list-like, then all 256 | objects corresponding to the entities are returned by default. If resource_ids is 257 | dict-like, then it is a mapping from the resource entity to an object_id_list. 258 | 259 | The object_id_list can take the values: '*', 0, or an array of positive ids. If it is 260 | '*', then all objects are returns. If it is 0, then the preferred object is returned. 261 | Otherwise, the ids will refer to the sequential index of the objects beginning with 1. 262 | 263 | :param media_types: A single or list-like container of acceptable media types for the 264 | server to return. If media_types is like-like, then the ordering specifies the 265 | preference of the media types to return, with the first being the most desirable. If 266 | the server is unable to provide the requested media type, it should return a 406 267 | Not Acceptable status, or if no objects exist for any media type then the server 268 | should return a 404 Not Found. 269 | 270 | :param location: Flag to indicate whether the object or a URL to the object should be 271 | returned. If location is set to True, it is up to the server to support this 272 | functionality and the lifetime of the returned URL is not given by the RETS 273 | specification. 274 | """ 275 | headers = { 276 | 'Accept': _build_accepted_media_types(media_types), 277 | } 278 | payload = { 279 | 'Resource': resource, 280 | 'Type': object_type, 281 | 'ID': _build_entity_object_ids(resource_keys), 282 | 'Location': int(location), 283 | } 284 | response = self._http_request(self._url_for('GetObject'), headers=headers, payload=payload) 285 | return parse_object(response) 286 | 287 | def _url_for(self, transaction: str) -> str: 288 | try: 289 | url = self._capabilities[transaction] 290 | except KeyError: 291 | raise RetsClientError('No URL found for transaction %s' % transaction) 292 | return urljoin(self._base_url, url) 293 | 294 | def _http_request(self, url: str, headers: dict = None, payload: dict = None) -> Response: 295 | if not self._session: 296 | raise RetsClientError('Session not instantiated. Call .login() first') 297 | 298 | request_headers = { 299 | **(headers or {}), 300 | 'User-Agent': self.user_agent, 301 | 'RETS-Version': self.rets_version, 302 | } 303 | if self._send_rets_ua_authorization: 304 | request_headers['RETS-UA-Authorization'] = self._rets_ua_authorization() 305 | 306 | if self._use_get_method: 307 | if payload: 308 | url = '%s?%s' % (url, urlencode(payload)) 309 | response = self._session.get(url, auth=self._http_auth, headers=request_headers) 310 | else: 311 | response = self._session.post(url, auth=self._http_auth, headers=request_headers, data=payload) 312 | 313 | response.raise_for_status() 314 | self._rets_session_id = self._session.cookies.get('RETS-Session-ID', '') 315 | return response 316 | 317 | def _rets_ua_authorization(self) -> str: 318 | return 'Digest ' + self._user_agent_auth_digest() 319 | 320 | def _user_agent_auth_digest(self) -> str: 321 | user_password = '%s:%s' % (self.user_agent, self._user_agent_password) 322 | a1 = md5(user_password.encode()).hexdigest() 323 | 324 | digest_values = '%s::%s:%s' % (a1, self._rets_session_id, self.rets_version) 325 | return md5(digest_values.encode()).hexdigest() 326 | 327 | 328 | def _get_http_auth(username: str, password: str, auth_type: str) -> AuthBase: 329 | if auth_type == 'basic': 330 | return HTTPBasicAuth(username, password) 331 | if auth_type == 'digest': 332 | return HTTPDigestAuth(username, password) 333 | raise RetsClientError('unknown auth type %s' % auth_type) 334 | 335 | 336 | def _build_entity_object_ids(entities: Union[str, Mapping[str, Any], Sequence[str]]) -> str: 337 | """ 338 | Builds the string of object ids as required by the GetObject transaction request. See 339 | section 5.3 for the full definition: 340 | 341 | ID ::= resource-set *(, resource-set) 342 | resource-set ::= resource-entity [: object-id-list] 343 | resource-entity ::= 1*ALPHANUM 344 | object-id-list ::= * | object-id *(: object-id) 345 | object-id ::= 1*5DIGIT 346 | """ 347 | if isinstance(entities, str): 348 | return _build_entity_object_ids((entities,)) 349 | elif isinstance(entities, Sequence): 350 | return _build_entity_object_ids({e: '*' for e in entities}) 351 | elif not isinstance(entities, Mapping): 352 | raise RetsClientError('Invalid entities argument') 353 | 354 | def _build_object_ids(object_ids: Any) -> str: 355 | if object_ids in ('*', 0, '0'): 356 | return str(object_ids) 357 | elif isinstance(object_ids, Sequence): 358 | return ':'.join(str(o) for o in object_ids) 359 | else: 360 | raise RetsClientError('Invalid entities argument') 361 | 362 | return ','.join('%s:%s' % (entity, _build_object_ids(object_ids)) 363 | for entity, object_ids in entities.items()) 364 | 365 | 366 | def _build_accepted_media_types(media_types: Union[str, Sequence[str]]) -> str: 367 | """ 368 | Builds the Accept header of media types as required by the GetObject transaction request. 369 | The qvalue is used to specify the desirability of a given media type with 1 being the most 370 | desirable, 0 being the least, and a range in between. 371 | See section 5.1 for the full definition: 372 | 373 | Accept ::= type / subtype [; parameter] *(, type / subtype [; parameter]) 374 | type ::= * | 375 | subtype ::= * | 376 | parameter ::= q = 377 | 378 | A complete list of media types is available at http://www.iana.org/assignments/media-types. 379 | """ 380 | if isinstance(media_types, str): 381 | return media_types 382 | elif not isinstance(media_types, Sequence): 383 | raise RetsClientError('Invalid media types argument') 384 | 385 | n = float(len(media_types)) 386 | return ','.join('%s;%.4f' % (types, 1 - i / n) for i, types in enumerate(media_types)) 387 | --------------------------------------------------------------------------------