├── .cov └── .gitignore ├── .coveragerc ├── .dockerignore ├── .github └── workflows │ └── tests.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.rst ├── codecov.yml ├── docker-compose.yml ├── examples ├── .gitignore ├── minimal_scrapy_project │ ├── project │ │ ├── __init__.py │ │ ├── settings.py │ │ └── spiders │ │ │ ├── __init__.py │ │ │ ├── first_spider.py │ │ │ └── second_spider.py │ └── scrapy.cfg ├── scrapy_project │ ├── project │ │ ├── __init__.py │ │ ├── exporters.py │ │ ├── items.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders │ │ │ ├── __init__.py │ │ │ ├── first_spider.py │ │ │ └── second_spider.py │ ├── requirements.txt │ └── scrapy.cfg └── website │ ├── category1.html │ ├── category2.html │ ├── css │ ├── bootstrap.css │ ├── bootstrap.min.css │ ├── shop-homepage.css │ └── shop-item.css │ ├── fonts │ ├── glyphicons-halflings-regular.eot │ ├── glyphicons-halflings-regular.svg │ ├── glyphicons-halflings-regular.ttf │ ├── glyphicons-halflings-regular.woff │ └── glyphicons-halflings-regular.woff2 │ ├── images │ ├── 320x150.png │ └── 800x300.png │ ├── index.html │ ├── item1.html │ ├── item2.html │ ├── item3.html │ ├── item4.html │ ├── item5.html │ ├── item6.html │ ├── item7.html │ └── js │ ├── bootstrap.js │ ├── bootstrap.min.js │ └── jquery.js ├── logs └── .gitignore ├── pytest.ini ├── requirements.txt ├── scrapy_rss ├── VERSION ├── __init__.py ├── elements.py ├── exceptions.py ├── exporters.py ├── items.py ├── meta │ ├── __init__.py │ ├── attribute.py │ ├── element.py │ ├── item.py │ └── nscomponent.py ├── pipelines.py └── utils.py ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── expected_rss │ ├── empty_feed.rss │ ├── empty_feed_with_categories.rss │ ├── empty_feed_without_generator.rss │ ├── full_empty_feed.rss │ ├── item_with_default_nses.rss │ ├── item_with_default_nses2.rss │ ├── item_with_default_nses3.rss │ ├── item_with_enclosure.rss │ ├── item_with_guid.rss │ ├── item_with_multiple_categories.rss │ ├── item_with_non_unique_ns.rss │ ├── item_with_non_unique_ns2.rss │ ├── item_with_non_unique_ns3.rss │ ├── item_with_non_unique_ns4.rss │ ├── item_with_non_unique_ns5.rss │ ├── item_with_same_ns_prefixes.rss │ ├── item_with_same_ns_prefixes2.rss │ ├── item_with_same_ns_prefixes3.rss │ ├── item_with_single_category.rss │ ├── item_with_unicode.rss │ ├── item_with_unique_ns.rss │ ├── item_with_unique_ns2.rss │ ├── item_with_unique_ns3.rss │ ├── minimal_item.rss │ ├── minimal_item2.rss │ └── simple_item.rss ├── requirements.txt ├── test_elements.py ├── test_exporter.py ├── test_feed_item.py ├── test_import.py ├── test_nscomponent_name.py └── utils.py ├── tox-in-docker.py └── tox.ini /.cov/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = scrapy_rss 4 | omit = 5 | setup.py 6 | tests/* 7 | 8 | [paths] 9 | source = 10 | scrapy_rss 11 | /home/*/scrapy_rss/scrapy_rss 12 | 13 | [html] 14 | directory = coverage_report 15 | 16 | [xml] 17 | output = coverage.xml 18 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | **/__pycache__ 2 | **/*.pyc 3 | .idea 4 | build 5 | dist 6 | .git 7 | .tox 8 | .cov 9 | .coverage 10 | *.egg-info 11 | _trial_temp 12 | .cache 13 | update*.sh 14 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Python package scrapy_rss testing 2 | on: 3 | pull_request: 4 | push: 5 | branches: 6 | - develop 7 | - master 8 | paths-ignore: 9 | - 'scrapy_rss/VERSION' 10 | jobs: 11 | main-pythons-tests: 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | env: 16 | - platform: 'ubuntu-20.04' 17 | python-version: 2.7 18 | - platform: 'ubuntu-20.04' 19 | python-version: 3.6 20 | - platform: 'ubuntu-22.04' 21 | python-version: 3.7 22 | - platform: 'ubuntu-22.04' 23 | python-version: 3.8 24 | - platform: 'ubuntu-latest' 25 | python-version: 3.9 26 | - platform: 'ubuntu-latest' 27 | python-version: '3.10' 28 | - platform: 'ubuntu-latest' 29 | python-version: '3.11' 30 | - platform: 'ubuntu-latest' 31 | python-version: '3.12' 32 | - platform: 'ubuntu-latest' 33 | python-version: '3.13' 34 | runs-on: ${{ matrix.env.platform }} 35 | steps: 36 | - uses: actions/checkout@v4 37 | - name: Set up Python ${{ matrix.env.python-version }} from the repository 38 | if: ${{ matrix.env.python-version == '2.7' }} 39 | run: | 40 | sudo update-alternatives --install /usr/bin/python python /usr/bin/python2 1 41 | curl https://bootstrap.pypa.io/pip/2.7/get-pip.py --output get-pip.py 42 | python get-pip.py 43 | - name: Set up Python ${{ matrix.env.python-version }} using actions 44 | if: ${{ matrix.env.python-version != '2.7' }} 45 | uses: actions/setup-python@v5 46 | with: 47 | python-version: ${{ matrix.env.python-version }} 48 | - name: Install dependencies 49 | run: | 50 | python -m pip install --upgrade pip 51 | python -m pip install tox tox-gh-actions 52 | - name: Test with tox 53 | run: tox 54 | - uses: actions/upload-artifact@v4 55 | with: 56 | name: coverage-${{ strategy.job-index }} 57 | path: .cov/.coverage* 58 | retention-days: 1 59 | include-hidden-files: true 60 | coverage: 61 | runs-on: ubuntu-latest 62 | name: Coverage combining and reporting 63 | needs: main-pythons-tests 64 | steps: 65 | - uses: actions/checkout@v4 66 | - uses: actions/download-artifact@v4 67 | with: 68 | pattern: coverage-* 69 | path: .cov 70 | merge-multiple: true 71 | - name: Set up Python 3.11 72 | uses: actions/setup-python@v5 73 | with: 74 | python-version: '3.11' 75 | - name: Install dependencies 76 | run: | 77 | python -m pip install --upgrade pip 78 | python -m pip install 'coverage<5' codecov 79 | - name: Combine .coverage 80 | run: | 81 | python -m coverage combine .cov/.coverage* 82 | python -m coverage xml 83 | python -m coverage report 84 | - name: Upload coverage to Codecov 85 | uses: codecov/codecov-action@v5 86 | env: 87 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} 88 | with: 89 | fail_ci_if_error: true 90 | 91 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .idea 3 | build 4 | dist 5 | .tox 6 | *.pyc 7 | *.cover 8 | coverage_report/* 9 | .coverage 10 | *coverage.xml 11 | *.egg-info 12 | .python-version 13 | _trial_temp 14 | .cache 15 | update*.sh 16 | *.log 17 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.10 AS py27 2 | ARG UNAME 3 | ARG USERID 4 | ARG GROUPID 5 | ARG WORKDIR 6 | RUN apk update && apk add build-base libffi-dev openssl-dev python2 python2-dev py2-pip py2-lxml && \ 7 | addgroup -g $GROUPID $UNAME && \ 8 | adduser -u $USERID -S -s /bin/sh $UNAME $UNAME && \ 9 | pip install --disable-pip-version-check tox 10 | USER $UNAME 11 | WORKDIR $WORKDIR 12 | 13 | 14 | FROM python:3.3-alpine AS py33 15 | ARG UNAME 16 | ARG USERID 17 | ARG GROUPID 18 | ARG WORKDIR 19 | RUN apk update && apk add build-base libffi-dev openssl-dev libxml2 libxml2-dev libxslt-dev && \ 20 | addgroup -g $GROUPID $UNAME && \ 21 | adduser -u $USERID -S -s /bin/sh $UNAME $UNAME && \ 22 | pip3 install --disable-pip-version-check -U pip==10.0.1 incremental==17.5.0 setuptools==39.2.0 wheel==0.29.0 cffi==1.12.3 pycparser==2.14 && \ 23 | pip3 install --disable-pip-version-check tox==2.9.1 virtualenv==15.2.0 py==1.4.34 packaging==16.8 Automat==0.7.0 PyDispatcher==2.0.5 PyHamcrest==1.8.5 Twisted==17.9.0 asn1crypto==0.24.0 attrs==18.2.0 constantly==15.1.0 coverage==4.5.3 cryptography==2.1.4 cssselect==1.0.1 frozendict==1.2 hyperlink==19.0.0 idna==2.7 lxml==4.2.6 nose==1.3.7 parsel==1.2.0 py==1.4.34 pyOpenSSL==16.2.0 pyasn1==0.4.5 pyasn1-modules==0.2.5 pytest==3.2.5 pytest-cov==2.5.1 python-dateutil==2.8.2 queuelib==1.5.0 service-identity==14.0.0 six==1.12.0 w3lib==1.19.0 xmlunittest==0.5.0 zope.interface==4.4.3 24 | USER $UNAME 25 | WORKDIR $WORKDIR 26 | 27 | 28 | FROM ubuntu:trusty AS py34 29 | ADD https://bootstrap.pypa.io/pip/3.4/get-pip.py /get-pip.py 30 | ENV DEBIAN_FRONTEND=noninteractive 31 | ARG UNAME 32 | ARG USERID 33 | ARG GROUPID 34 | ARG WORKDIR 35 | RUN apt-get update && \ 36 | locale-gen en_US.UTF-8 && \ 37 | apt-get install -y libffi-dev libssl-dev python3 python3-dev python3-lxml python3-dateutil && \ 38 | groupadd -g $GROUPID -o $UNAME && \ 39 | useradd -m -u $USERID -g $GROUPID -o -s /bin/bash $UNAME 40 | ENV LANG=en_US.UTF-8 41 | ENV LANGUAGE=en_US:en 42 | ENV LC_ALL=en_US.UTF-8 43 | RUN python3 get-pip.py --no-setuptools --no-wheel "pip < 19.2" && rm -f get-pip.py && \ 44 | pip3 install --disable-pip-version-check incremental==17.5.0 setuptools==39.2.0 && \ 45 | pip3 install --disable-pip-version-check tox==3.8.3 virtualenv==16.0.0 filelock==3.0.12 py==1.8.0 pluggy==0.12.0 pyparsing==2.4.1.1 packaging==19.0 importlib-resources==1.0.2 importlib-metadata==0.18 pathlib2==2.3.4 Automat==0.7.0 apipkg==1.5 PyDispatcher==2.0.5 PyHamcrest==1.9.0 Twisted==19.2.0 asn1crypto==0.24.0 atomicwrites==1.3.0 attrs==19.1.0 cffi==1.12.3 constantly==15.1.0 coverage==4.5.3 cryptography==2.7 cssselect==1.0.3 frozendict==1.2 hyperlink==19.0.0 idna==2.8 more-itertools==7.2.0 parsel==1.5.1 pyOpenSSL==19.0.0 pyasn1==0.4.5 pyasn1-modules==0.2.5 pytest==4.6.4 pytest-cov==2.7.1 queuelib==1.5.0 scandir==1.10.0 service-identity==18.1.0 six==1.12.0 w3lib==1.20.0 wcwidth==0.1.7 xmlunittest==0.5.0 zipp==0.5.2 zope.interface==4.6.0 46 | USER $UNAME 47 | WORKDIR $WORKDIR 48 | 49 | 50 | FROM alpine:3.5 AS py35 51 | ARG UNAME 52 | ARG USERID 53 | ARG GROUPID 54 | ARG WORKDIR 55 | RUN apk update && \ 56 | apk add build-base python3 python3-dev openssl-dev py3-lxml py3-cryptography py3-cffi py3-dateutil py3-pytest && \ 57 | addgroup -g $GROUPID $UNAME && \ 58 | adduser -u $USERID -S -s /bin/sh $UNAME $UNAME 59 | RUN pip3 install --disable-pip-version-check tox 60 | USER $UNAME 61 | WORKDIR $WORKDIR 62 | 63 | 64 | FROM alpine:3.9 AS py36 65 | ARG UNAME 66 | ARG USERID 67 | ARG GROUPID 68 | ARG WORKDIR 69 | RUN apk update && \ 70 | apk add build-base python3 python3-dev openssl-dev py3-lxml py3-cryptography py3-cffi libffi-dev py3-dateutil py3-pytest && \ 71 | addgroup -g $GROUPID $UNAME && \ 72 | adduser -u $USERID -S -s /bin/sh $UNAME $UNAME 73 | RUN pip3 install --disable-pip-version-check tox 74 | USER $UNAME 75 | WORKDIR $WORKDIR 76 | 77 | 78 | FROM alpine:3.19 AS py3 79 | ARG UNAME 80 | ARG USERID 81 | ARG GROUPID 82 | ARG WORKDIR 83 | RUN apk update && \ 84 | apk add curl git bash build-base libffi-dev openssl-dev bzip2-dev zlib-dev xz-dev readline-dev sqlite-dev && \ 85 | addgroup -g $GROUPID $UNAME && \ 86 | adduser -u $USERID -S -s /bin/bash $UNAME $UNAME && \ 87 | printf $'#!/bin/bash\n\ 88 | default=(echo "Nothing to execute")\n\ 89 | str=$(printf \'"%%s" \' "${@:-${default[@]}}")\n\ 90 | /bin/bash -lc "$str"\n' > /bin/runcmd && \ 91 | chmod +x /bin/runcmd 92 | USER $UNAME 93 | WORKDIR $WORKDIR 94 | SHELL ["/bin/bash", "-lc"] 95 | RUN git clone https://github.com/pyenv/pyenv.git ~/.pyenv && \ 96 | cd ~/.pyenv && src/configure && make -C src && \ 97 | echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.profile && \ 98 | echo 'export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.profile && \ 99 | echo 'eval "$(pyenv init --path)"' >> ~/.profile && \ 100 | echo 'eval "$(pyenv init -)"' >> ~/.profile && \ 101 | source ~/.profile && \ 102 | for v in 3.6 3.7 3.8 3.9 3.10 3.11 3.12 3.13; do pyenv install "$v:latest"; done && \ 103 | pyenv versions --bare | tee ~/.pyenv/version .python-version && \ 104 | for v in 3.6 3.7 3.8 3.9 3.10 3.11 3.12 3.13; do pip$v install -U pip; done && \ 105 | pip3.9 install tox 106 | ENTRYPOINT ["/bin/runcmd"] 107 | CMD ["echo", "py3 is built"] 108 | 109 | 110 | FROM fedora:40 AS py314 111 | ADD https://bootstrap.pypa.io/get-pip.py /get-pip.py 112 | ARG UNAME 113 | ARG USERID 114 | ARG GROUPID 115 | ARG WORKDIR 116 | RUN dnf -y update && \ 117 | dnf -y install make automake gcc gcc-c++ kernel-devel gnupg ca-certificates libffi-devel libxml2-devel libxslt-devel python3.14 python3.14-devel && \ 118 | python3.13 /get-pip.py && rm -f /get-pip.py && \ 119 | groupadd -g $GROUPID -o $UNAME && \ 120 | useradd -m -u $USERID -g $GROUPID -s /bin/bash $UNAME 121 | RUN pip install --disable-pip-version-check tox 122 | USER $UNAME 123 | WORKDIR $WORKDIR 124 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) scrapy_rss developers. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of Scrapy nor the names of its contributors may be used 15 | to endorse or promote products derived from this software without 16 | specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | include LICENSE 3 | include MANIFEST.in 4 | include scrapy_rss/VERSION 5 | include requirements.txt 6 | recursive-include tests * 7 | global-exclude __pycache__ *.py[cod] -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | scrapy_rss 3 | ========== 4 | 5 | .. image:: https://img.shields.io/pypi/v/scrapy-rss.svg?style=flat-square 6 | :target: https://pypi.python.org/pypi/scrapy_rss 7 | :alt: PyPI Version 8 | 9 | .. image:: https://img.shields.io/pypi/wheel/scrapy-rss.svg?style=flat-square 10 | :target: https://pypi.python.org/pypi/scrapy_rss 11 | :alt: Wheel Status 12 | 13 | .. image:: https://github.com/woxcab/scrapy_rss/actions/workflows/tests.yml/badge.svg?branch=master 14 | :target: https://github.com/woxcab/scrapy_rss/actions 15 | :alt: Testing status 16 | 17 | .. image:: https://img.shields.io/codecov/c/github/woxcab/scrapy_rss/master.svg?style=flat-square 18 | :target: http://codecov.io/github/woxcab/scrapy_rss?branch=master 19 | :alt: Coverage report 20 | 21 | .. image:: https://img.shields.io/pypi/pyversions/scrapy-rss.svg?style=flat-square 22 | :target: https://pypi.python.org/pypi/scrapy_rss 23 | :alt: Supported python versions 24 | 25 | 26 | Tools to easy generate `RSS feed `_ 27 | that contains each scraped item using `Scrapy framework `_. 28 | 29 | 30 | Table of Contents 31 | ================= 32 | * `Installation <#installation>`__ 33 | * `How To Use <#how-to-use>`__ 34 | 35 | * `Configuration <#configuration>`__ 36 | * `Optional Additional Customization <#feed-channel-elements-customization-optionally>`__ 37 | * `Usage <#usage>`__ 38 | 39 | * `Basic usage <#basic-usage>`__ 40 | * `RssItem derivation and namespaces <#rssitem-derivation-and-namespaces>`__ 41 | 42 | * `Scrapy Project Examples <#scrapy-project-examples>`__ 43 | 44 | 45 | `Installation `_ 46 | ========================================================== 47 | * Install :code:`scrapy_rss` using pip 48 | 49 | .. code:: bash 50 | 51 | pip install scrapy_rss 52 | 53 | or using pip for the specific interpreter, e.g.: 54 | 55 | .. code:: bash 56 | 57 | pip3 install scrapy_rss 58 | 59 | * or using setuptools directly: 60 | 61 | .. code:: bash 62 | 63 | cd path/to/root/of/scrapy_rss 64 | python setup.py install 65 | 66 | or using setuptools for specific interpreter, e.g.: 67 | 68 | .. code:: bash 69 | 70 | cd path/to/root/of/scrapy_rss 71 | python3 setup.py install 72 | 73 | 74 | How To Use 75 | ========== 76 | 77 | Configuration 78 | ------------- 79 | 80 | Add parameters to the Scrapy project settings (`settings.py` file) 81 | or to the :code:`custom_settings` attribute of the spider: 82 | 83 | 1. Add item pipeline that export items to rss feed: 84 | 85 | .. code:: python 86 | 87 | ITEM_PIPELINES = { 88 | # ... 89 | 'scrapy_rss.pipelines.RssExportPipeline': 900, # or another priority 90 | # ... 91 | } 92 | 93 | 94 | 2. Add required feed parameters: 95 | 96 | FEED_FILE 97 | the absolute or relative file path where the result RSS feed will be saved. 98 | For example, :code:`feed.rss` or :code:`output/feed.rss`. 99 | FEED_TITLE 100 | the name of the channel (feed), 101 | FEED_DESCRIPTION 102 | the phrase or sentence that describes the channel (feed), 103 | FEED_LINK 104 | the URL to the HTML website corresponding to the channel (feed) 105 | 106 | .. code:: python 107 | 108 | FEED_FILE = 'path/to/feed.rss' 109 | FEED_TITLE = 'Some title of the channel' 110 | FEED_LINK = 'http://example.com/rss' 111 | FEED_DESCRIPTION = 'About channel' 112 | 113 | 114 | Feed (Channel) Elements Customization [optionally] 115 | -------------------------------------------------- 116 | 117 | If you want to change other channel parameters (such as language, copyright, managing_editor, 118 | webmaster, pubdate, last_build_date, category, generator, docs, ttl) 119 | then define your own exporter that's inherited from :code:`RssItemExporter` class, for example: 120 | 121 | .. code:: python 122 | 123 | from scrapy_rss.exporters import RssItemExporter 124 | 125 | class MyRssItemExporter(RssItemExporter): 126 | def __init__(self, *args, **kwargs): 127 | kwargs['generator'] = kwargs.get('generator', 'Special generator') 128 | kwargs['language'] = kwargs.get('language', 'en-us') 129 | super(MyRssItemExporter, self).__init__(*args, **kwargs) 130 | 131 | And add :code:`FEED_EXPORTER` parameter to the Scrapy project settings 132 | or to the :code:`custom_settings` attribute of the spider: 133 | 134 | .. code:: python 135 | 136 | FEED_EXPORTER = 'myproject.exporters.MyRssItemExporter' 137 | 138 | 139 | Usage 140 | ----- 141 | Basic usage 142 | ^^^^^^^^^^^ 143 | 144 | Declare your item directly as RssItem(): 145 | 146 | .. code:: python 147 | 148 | import scrapy_rss 149 | 150 | item1 = scrapy_rss.RssItem() 151 | 152 | Or use predefined item class :code:`RssedItem` with RSS field named as :code:`rss` 153 | that's instance of :code:`RssItem`: 154 | 155 | .. code:: python 156 | 157 | import scrapy 158 | import scrapy_rss 159 | 160 | class MyItem(scrapy_rss.RssedItem): 161 | field1 = scrapy.Field() 162 | field2 = scrapy.Field() 163 | # ... 164 | 165 | item2 = MyItem() 166 | 167 | 168 | Set/get item fields. Case sensitive attributes of :code:`RssItem()` are appropriate to RSS elements. 169 | Attributes of RSS elements are case sensitive too. 170 | If the editor allows autocompletion then it suggests attributes for instances of :code:`RssedItem` and :code:`RssItem`. 171 | It's allowed to set **any** subset of RSS elements (e.g. title only). For example: 172 | 173 | .. code:: python 174 | 175 | from datetime import datetime 176 | 177 | item1.title = 'RSS item title' # set value of element 178 | title = item1.title.title # get value of <title> element 179 | item1.description = 'description' 180 | 181 | item1.guid = 'item identifier' 182 | item1.guid.isPermaLink = True # set value of attribute isPermalink of <guid> element, 183 | # isPermaLink is False by default 184 | is_permalink = item1.guid.isPermaLink # get value of attribute isPermalink of <guid> element 185 | guid = item1.guid.guid # get value of element <guid> 186 | 187 | item1.category = 'single category' 188 | category = item1.category 189 | item1.category = ['first category', 'second category'] 190 | first_category = item1.category[0].category # get value of the element <category> with multiple values 191 | all_categories = [cat.category for cat in item1.category] 192 | 193 | # direct attributes setting 194 | item1.enclosure.url = 'http://example.com/file' 195 | item1.enclosure.length = 0 196 | item1.enclosure.type = 'text/plain' 197 | 198 | # or dict based attributes setting 199 | item1.enclosure = {'url': 'http://example.com/file', 'length': 0, 'type': 'text/plain'} 200 | item1.guid = {'guid': 'item identifier', 'isPermaLink': True} 201 | 202 | item1.pubDate = datetime.now() # correctly works with Python' datetimes 203 | 204 | 205 | item2.rss.title = 'Item title' 206 | item2.rss.guid = 'identifier' 207 | item2.rss.enclosure = {'url': 'http://example.com/file', 'length': 0, 'type': 'text/plain'} 208 | 209 | 210 | All allowed elements are listed in the `scrapy_rss/items.py <https://github.com/woxcab/scrapy_rss/blob/master/scrapy_rss/items.py>`_. 211 | All allowed attributes of each element with constraints and default values 212 | are listed in the `scrapy_rss/elements.py <https://github.com/woxcab/scrapy_rss/blob/master/scrapy_rss/elements.py>`_. 213 | Also you can read `RSS specification <http://www.rssboard.org/rss-specification>`_ for more details. 214 | 215 | :code:`RssItem` derivation and namespaces 216 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 217 | 218 | You can extend RssItem to add new XML fields that can be namespaced or not. 219 | You can specify namespaces in an attribute and/or an element constructors. 220 | Namespace prefix can be specified in the attribute/element name 221 | using double underscores as delimiter (:code:`prefix__name`) 222 | or in the attribute/element constructor using :code:`ns_prefix` argument. 223 | Namespace URI can be specified using :code:`ns_uri` argument of the constructor. 224 | 225 | .. code:: python 226 | 227 | from scrapy_rss.meta import ItemElementAttribute, ItemElement 228 | from scrapy_rss.items import RssItem 229 | 230 | class Element0(ItemElement): 231 | # attributes without special namespace 232 | attr0 = ItemElementAttribute(is_content=True, required=True) 233 | attr1 = ItemElementAttribute() 234 | 235 | class Element1(ItemElement): 236 | # attribute "prefix2:attr2" with namespace xmlns:prefix2="id2" 237 | attr2 = ItemElementAttribute(ns_prefix="prefix2", ns_uri="id2") 238 | 239 | # attribute "prefix3:attr3" with namespace xmlns:prefix3="id3" 240 | prefix3__attr3 = ItemElementAttribute(ns_uri="id3") 241 | 242 | # attribute "prefix4:attr4" with namespace xmlns:prefix4="id4" 243 | fake_prefix__attr4 = ItemElementAttribute(ns_prefix="prefix4", ns_uri="id4") 244 | 245 | # attribute "attr5" with default namespace xmlns="id5" 246 | attr5 = ItemElementAttribute(ns_uri="id5") 247 | 248 | class MyXMLItem(RssItem): 249 | # element <elem1> without namespace 250 | elem1 = Element0() 251 | 252 | # element <elem_prefix2:elem2> with namespace xmlns:elem_prefix2="id2e" 253 | elem2 = Element0(ns_prefix="elem_prefix2", ns_uri="id2e") 254 | 255 | # element <elem_prefix3:elem3> with namespace xmlns:elem_prefix3="id3e" 256 | elem_prefix3__elem3 = Element1(ns_uri="id3e") 257 | 258 | # yet another element <elem_prefix4:elem3> with namespace xmlns:elem_prefix4="id4e" 259 | # (does not conflict with previous one) 260 | fake_prefix__elem3 = Element0(ns_prefix="elem_prefix4", ns_uri="id4e") 261 | 262 | # element <elem5> with default namespace xmlns="id5e" 263 | elem5 = Element0(ns_uri="id5e") 264 | 265 | Access to elements and its attributes is the same as with simple items: 266 | 267 | .. code:: python 268 | 269 | item = MyXMLItem() 270 | item.title = 'Some title' 271 | item.elem1.attr0 = 'Required content value' 272 | item.elem1 = 'Another way to set content value' 273 | item.elem1.attr1 = 'Some attribute value' 274 | item.elem_prefix3__elem3.prefix3__attr3 = 'Yet another attribute value' 275 | item.elem_prefix3__elem3.fake_prefix__attr4 = '' # non-None value is interpreted as assigned 276 | item.fake_prefix__elem3.attr1 = 42 277 | 278 | 279 | Several optional settings are allowed for namespaced items: 280 | 281 | FEED_NAMESPACES 282 | list of tuples :code:`[(prefix, URI), ...]` or dictionary :code:`{prefix: URI, ...}` of namespaces 283 | that must be defined in the root XML element 284 | 285 | FEED_ITEM_CLASS or FEED_ITEM_CLS 286 | main class of feed items (class object :code:`MyXMLItem` or path to class :code:`"path.to.MyXMLItem"`). 287 | **Default value**: :code:`RssItem`. 288 | It's used in order to extract all possible namespaces 289 | that will be declared in the root XML element. 290 | 291 | Feed items do **NOT** have to be instances of this class or its subclass. 292 | 293 | If these settings are not defined or only part of namespaces are defined 294 | then other used namespaces will be declared either in the :code:`<item>` element 295 | or in its subelements when these namespaces are not unique. 296 | Each :code:`<item>` element and its sublements always contains 297 | only namespace declarations of non-:code:`None` attributes (including ones that are interpreted as element content). 298 | 299 | 300 | Scrapy Project Examples 301 | ======================= 302 | 303 | `Examples directory <https://github.com/woxcab/scrapy_rss/blob/master/examples>`_ contains 304 | several Scrapy projects with the scrapy_rss usage demonstration. It crawls 305 | `this website <https://woxcab.github.io/scrapy_rss/>`_ whose source code is 306 | `here <https://github.com/woxcab/scrapy_rss/blob/master/examples/website>`_. 307 | 308 | Just go to the Scrapy project directory and run commands 309 | 310 | .. code:: bash 311 | 312 | scrapy crawl first_spider 313 | scrapy crawl second_spider 314 | 315 | Thereafter `feed.rss` and `feed2.rss` files will be created in the same directory. 316 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | project: 4 | default: 5 | if_not_found: success 6 | patch: 7 | default: 8 | if_not_found: success 9 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | x-base: &base 4 | volumes: 5 | - .:/home/testuser/scrapy_rss 6 | user: "$USERID:$GROUPID" 7 | environment: 8 | - TOX_PARALLEL_NO_SPINNER=1 9 | 10 | x-build: &default-build 11 | context: . 12 | args: 13 | UNAME: testuser 14 | USERID: "$USERID" 15 | GROUPID: "$GROUPID" 16 | WORKDIR: "/home/testuser/scrapy_rss" 17 | 18 | services: 19 | py27: 20 | <<: *base 21 | build: 22 | <<: *default-build 23 | target: py27 24 | py33: 25 | <<: *base 26 | build: 27 | <<: *default-build 28 | target: py33 29 | py34: 30 | <<: *base 31 | build: 32 | <<: *default-build 33 | target: py34 34 | py35: 35 | <<: *base 36 | build: 37 | <<: *default-build 38 | target: py35 39 | py36: 40 | <<: *base 41 | build: 42 | <<: *default-build 43 | target: py36 44 | py3: 45 | <<: *base 46 | build: 47 | <<: *default-build 48 | target: py3 49 | py314: 50 | <<: *base 51 | build: 52 | <<: *default-build 53 | target: py314 54 | 55 | -------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- 1 | */*.rss 2 | -------------------------------------------------------------------------------- /examples/minimal_scrapy_project/project/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/woxcab/scrapy_rss/93bf446fcaf4b334364f4e9d2175ce555f76dea7/examples/minimal_scrapy_project/project/__init__.py -------------------------------------------------------------------------------- /examples/minimal_scrapy_project/project/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | BOT_NAME = 'project' 4 | 5 | SPIDER_MODULES = ['project.spiders'] 6 | 7 | ITEM_PIPELINES = { 8 | 'scrapy_rss.pipelines.RssExportPipeline': 950, 9 | } 10 | 11 | FEED_FILE = 'feed.rss' 12 | FEED_TITLE = 'Shop categories' 13 | FEED_LINK = 'http://example.com/rss' 14 | FEED_DESCRIPTION = 'List of shop categories' 15 | -------------------------------------------------------------------------------- /examples/minimal_scrapy_project/project/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /examples/minimal_scrapy_project/project/spiders/first_spider.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import scrapy 3 | 4 | from scrapy_rss import RssItem 5 | 6 | 7 | class SomeSpider(scrapy.Spider): 8 | name = 'first_spider' 9 | start_urls = ['https://woxcab.github.io/scrapy_rss/'] 10 | 11 | def parse(self, response): 12 | for category_name in response.css('.list-group-item ::text'): 13 | item = RssItem() 14 | item.title = category_name.extract() 15 | yield item 16 | -------------------------------------------------------------------------------- /examples/minimal_scrapy_project/project/spiders/second_spider.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import scrapy 3 | 4 | from scrapy_rss import RssItem 5 | 6 | 7 | class SomeSpider(scrapy.Spider): 8 | name = 'second_spider' 9 | start_urls = ['https://woxcab.github.io/scrapy_rss/'] 10 | custom_settings = { 11 | 'FEED_TITLE': 'New shop categories', 12 | 'FEED_FILE': 'feed2.rss' 13 | } 14 | 15 | def parse(self, response): 16 | for category_name in response.css('.list-group-item ::text'): 17 | item = RssItem() 18 | item.title = category_name.extract() 19 | yield item 20 | -------------------------------------------------------------------------------- /examples/minimal_scrapy_project/scrapy.cfg: -------------------------------------------------------------------------------- 1 | 2 | [settings] 3 | default = project.settings 4 | 5 | [deploy] 6 | project = project 7 | -------------------------------------------------------------------------------- /examples/scrapy_project/project/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/woxcab/scrapy_rss/93bf446fcaf4b334364f4e9d2175ce555f76dea7/examples/scrapy_project/project/__init__.py -------------------------------------------------------------------------------- /examples/scrapy_project/project/exporters.py: -------------------------------------------------------------------------------- 1 | 2 | from scrapy_rss.exporters import RssItemExporter 3 | 4 | 5 | class CustomRssItemExporter(RssItemExporter): 6 | def __init__(self, *args, **kwargs): 7 | kwargs['generator'] = kwargs.get('generator', 'Special generator') 8 | kwargs['language'] = kwargs.get('language', 'en-us') 9 | super(CustomRssItemExporter, self).__init__(*args, **kwargs) 10 | 11 | 12 | -------------------------------------------------------------------------------- /examples/scrapy_project/project/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import scrapy 4 | from scrapy_rss import RssedItem 5 | 6 | 7 | class ShopItem(RssedItem): 8 | name = scrapy.Field() 9 | rating = scrapy.Field() 10 | price = scrapy.Field() 11 | reviews = scrapy.Field() 12 | review_dates = scrapy.Field() 13 | 14 | -------------------------------------------------------------------------------- /examples/scrapy_project/project/pipelines.py: -------------------------------------------------------------------------------- 1 | 2 | import scrapy_rss 3 | import dateparser 4 | 5 | 6 | class FillPipeline: 7 | def process_item(self, item, spider): 8 | if isinstance(item, scrapy_rss.RssedItem): 9 | item.rss.title = '{} [{}] [{}] [{}]'.format(item['name'], item['rating'], 10 | item['reviews'], item['price']) 11 | if item['review_dates']: 12 | item.rss.pubDate = dateparser.parse(item['review_dates'][0], 13 | settings={'RETURN_AS_TIMEZONE_AWARE': True}) 14 | return item -------------------------------------------------------------------------------- /examples/scrapy_project/project/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | BOT_NAME = 'project' 4 | 5 | SPIDER_MODULES = ['project.spiders'] 6 | 7 | ITEM_PIPELINES = { 8 | 'project.pipelines.FillPipeline': 500, 9 | 'scrapy_rss.pipelines.RssExportPipeline': 950, 10 | } 11 | 12 | FEED_FILE = 'feed.rss' 13 | FEED_TITLE = 'Shop items' 14 | FEED_LINK = 'http://example.com/rss' 15 | FEED_DESCRIPTION = 'List of shop items' 16 | 17 | FEED_EXPORTER = 'project.exporters.CustomRssItemExporter' 18 | -------------------------------------------------------------------------------- /examples/scrapy_project/project/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /examples/scrapy_project/project/spiders/first_spider.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from scrapy.spiders import CrawlSpider, Rule 4 | from scrapy.linkextractor import LinkExtractor 5 | from project.items import ShopItem 6 | 7 | 8 | class ShopSpider(CrawlSpider): 9 | name = 'first_spider' 10 | allowed_domains = ['woxcab.github.io'] 11 | start_urls = ['https://woxcab.github.io/scrapy_rss/'] 12 | 13 | rules = ( 14 | Rule(LinkExtractor(allow=(r'item\d+\.html$',)), 15 | callback='parse_item'), 16 | Rule(LinkExtractor()), 17 | ) 18 | 19 | def parse_item(self, response): 20 | item = ShopItem() 21 | item['name'] = response.css('h4 a ::text').extract_first() 22 | item['price'] = response.css('h4.pull-right ::text').extract_first() 23 | item.rss.description = ' '.join(p.strip() for p in response.css('.caption-full p ::text').extract()) 24 | item['rating'] = response.css('.ratings p:nth-child(2) ::text').extract()[-1].strip() 25 | item['reviews'] = response.css('.ratings p:nth-child(1) ::text').extract_first() 26 | item['review_dates'] = response.css('.col-md-12 span ::text').extract() 27 | item.rss.author = 'Shop' 28 | item.rss.guid.isPermaLink = True 29 | item.rss.guid = response.url 30 | item.rss.link = item.rss.comments = response.url 31 | item.rss.category = response.css('.list-group-item .active ::text') 32 | item.rss.enclosure = {'type': 'image/png', 33 | 'url': response.xpath('//div[@class="thumbnail"]/img/@src').extract_first(), 34 | 'length': 0} 35 | yield item 36 | 37 | 38 | -------------------------------------------------------------------------------- /examples/scrapy_project/project/spiders/second_spider.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from scrapy.spiders import CrawlSpider, Rule 4 | from scrapy.linkextractor import LinkExtractor 5 | from project.items import ShopItem 6 | 7 | 8 | class ShopSpider(CrawlSpider): 9 | name = 'second_spider' 10 | allowed_domains = ['woxcab.github.io'] 11 | start_urls = ['https://woxcab.github.io/scrapy_rss/'] 12 | custom_settings = { 13 | 'FEED_EXPORTER': 'scrapy_rss.exporters.RssItemExporter', 14 | 'FEED_FILE': 'feed2.rss' 15 | } 16 | 17 | rules = ( 18 | Rule(LinkExtractor(allow=(r'item\d+\.html$',)), 19 | callback='parse_item'), 20 | Rule(LinkExtractor()), 21 | ) 22 | 23 | def parse_item(self, response): 24 | item = ShopItem() 25 | item['name'] = response.css('h4 a ::text').extract_first() 26 | item['price'] = response.css('h4.pull-right ::text').extract_first() 27 | item.rss.description = ' '.join(p.strip() for p in response.css('.caption-full p ::text').extract()) 28 | item['rating'] = response.css('.ratings p:nth-child(2) ::text').extract()[-1].strip() 29 | item['reviews'] = response.css('.ratings p:nth-child(1) ::text').extract_first() 30 | item['review_dates'] = response.css('.col-md-12 span ::text').extract() 31 | item.rss.author = 'Shop' 32 | item.rss.guid = {'isPermaLink': True, 'guid': response.url} 33 | item.rss.link = item.rss.comments = response.url 34 | item.rss.category = response.css('.list-group-item .active ::text') 35 | item.rss.enclosure.type = 'image/png' 36 | item.rss.enclosure.url = response.xpath('//div[@class="thumbnail"]/img/@src').extract_first() 37 | item.rss.enclosure.length = 0 38 | yield item 39 | 40 | 41 | -------------------------------------------------------------------------------- /examples/scrapy_project/requirements.txt: -------------------------------------------------------------------------------- 1 | dateparser -------------------------------------------------------------------------------- /examples/scrapy_project/scrapy.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | default = project.settings 3 | 4 | [deploy] 5 | project = project 6 | -------------------------------------------------------------------------------- /examples/website/category1.html: -------------------------------------------------------------------------------- 1 | <!DOCTYPE html> 2 | <html lang="en"> 3 | 4 | <head> 5 | 6 | <meta charset="utf-8"> 7 | <meta http-equiv="X-UA-Compatible" content="IE=edge"> 8 | <meta name="viewport" content="width=device-width, initial-scale=1"> 9 | <meta name="description" content=""> 10 | <meta name="author" content=""> 11 | 12 | <title>Category 1 - Start Bootstrap Template 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 62 | 63 | 64 |
65 | 66 |
67 | 68 |
69 |

Shop Name

70 |
71 | Category 1 72 | Category 2 73 |
74 |
75 | 76 |
77 | 78 | 108 | 109 |
110 | 111 |
112 |
113 | 114 |
115 |

$24.99

116 |

First Product 117 |

118 |

See more snippets like this online store item at Bootsnipp - http://bootsnipp.com.

119 |
120 |
121 |

3 reviews

122 |

123 | 124 | 125 | 126 | 127 | 128 |

129 |
130 |
131 |
132 | 133 |
134 |
135 | 136 |
137 |

$64.99

138 |

Second Product 139 |

140 |

This is a short description. Lorem ipsum dolor sit amet, consectetur adipiscing elit.

141 |
142 |
143 |

2 reviews

144 |

145 | 146 | 147 | 148 | 149 | 150 |

151 |
152 |
153 |
154 | 155 |
156 |
157 | 158 |
159 |

$94.99

160 |

Fifth Product 161 |

162 |

This is a short description. Lorem ipsum dolor sit amet, consectetur adipiscing elit.

163 |
164 |
165 |

4 reviews

166 |

167 | 168 | 169 | 170 | 171 | 172 |

173 |
174 |
175 |
176 | 177 |
178 |

Like this template? 179 |

180 |

If you like this template, then check out this tutorial on how to build a working review system for your online store!

181 | View Tutorial 182 |
183 | 184 |
185 | 186 |
187 | 188 |
189 | 190 |
191 | 192 | 193 |
194 | 195 |
196 | 197 | 198 |
199 |
200 |
201 |

Copyright © Your Website 2014

202 |
203 |
204 |
205 | 206 |
207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | -------------------------------------------------------------------------------- /examples/website/category2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | Category 2 - Start Bootstrap Template 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 62 | 63 | 64 |
65 | 66 |
67 | 68 |
69 |

Shop Name

70 |
71 | Category 1 72 | Category 2 73 |
74 |
75 | 76 |
77 | 78 | 108 | 109 |
110 | 111 |
112 |
113 | 114 |
115 |

$74.99

116 |

Third Product 117 |

118 |

This is a short description. Lorem ipsum dolor sit amet, consectetur adipiscing elit.

119 |
120 |
121 |

0 reviews

122 |

123 | 124 | 125 | 126 | 127 | 128 |

129 |
130 |
131 |
132 | 133 |
134 |
135 | 136 |
137 |

$84.99

138 |

Fourth Product 139 |

140 |

This is a short description. Lorem ipsum dolor sit amet, consectetur adipiscing elit.

141 |
142 |
143 |

6 reviews

144 |

145 | 146 | 147 | 148 | 149 | 150 |

151 |
152 |
153 |
154 | 155 |
156 |
157 | 158 |
159 |

$104.99

160 |

Sixth Product 161 |

162 |

This is a short description. Lorem ipsum dolor sit amet, consectetur adipiscing elit.

163 |
164 |
165 |

2 reviews

166 |

167 | 168 | 169 | 170 | 171 | 172 |

173 |
174 |
175 |
176 | 177 |
178 |
179 | 180 |
181 |

$4.99

182 |

Seventh Product 183 |

184 |

This is a short description. Lorem ipsum dolor sit amet, consectetur adipiscing elit.

185 |
186 |
187 |

0 reviews

188 |

189 | 190 | 191 | 192 | 193 | 194 |

195 |
196 |
197 |
198 | 199 |
200 |

Like this template? 201 |

202 |

If you like this template, then check out this tutorial on how to build a working review system for your online store!

203 | View Tutorial 204 |
205 | 206 |
207 | 208 |
209 | 210 |
211 | 212 |
213 | 214 | 215 |
216 | 217 |
218 | 219 | 220 |
221 |
222 |
223 |

Copyright © Your Website 2014

224 |
225 |
226 |
227 | 228 |
229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | -------------------------------------------------------------------------------- /examples/website/css/shop-homepage.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Start Bootstrap - Shop Homepage (http://startbootstrap.com/) 3 | * Copyright 2013-2016 Start Bootstrap 4 | * Licensed under MIT (https://github.com/BlackrockDigital/startbootstrap/blob/gh-pages/LICENSE) 5 | */ 6 | 7 | body { 8 | padding-top: 70px; /* Required padding for .navbar-fixed-top. Remove if using .navbar-static-top. Change if height of navigation changes. */ 9 | } 10 | 11 | .slide-image { 12 | width: 100%; 13 | } 14 | 15 | .carousel-holder { 16 | margin-bottom: 30px; 17 | } 18 | 19 | .carousel-control, 20 | .item { 21 | border-radius: 4px; 22 | } 23 | 24 | .caption { 25 | height: 130px; 26 | overflow: hidden; 27 | } 28 | 29 | .caption h4 { 30 | white-space: nowrap; 31 | } 32 | 33 | .thumbnail img { 34 | width: 100%; 35 | } 36 | 37 | .ratings { 38 | padding-right: 10px; 39 | padding-left: 10px; 40 | color: #d17581; 41 | } 42 | 43 | .thumbnail { 44 | padding: 0; 45 | } 46 | 47 | .thumbnail .caption-full { 48 | padding: 9px; 49 | color: #333; 50 | } 51 | 52 | footer { 53 | margin: 50px 0; 54 | } -------------------------------------------------------------------------------- /examples/website/css/shop-item.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Start Bootstrap - Shop Item (http://startbootstrap.com/) 3 | * Copyright 2013-2016 Start Bootstrap 4 | * Licensed under MIT (https://github.com/BlackrockDigital/startbootstrap/blob/gh-pages/LICENSE) 5 | */ 6 | 7 | body { 8 | padding-top: 70px; /* Required padding for .navbar-fixed-top. Remove if using .navbar-static-top. Change if height of navigation changes. */ 9 | } 10 | 11 | .thumbnail img { 12 | width: 100%; 13 | } 14 | 15 | .ratings { 16 | padding-right: 10px; 17 | padding-left: 10px; 18 | color: #d17581; 19 | } 20 | 21 | .thumbnail { 22 | padding: 0; 23 | } 24 | 25 | .thumbnail .caption-full { 26 | padding: 9px; 27 | color: #333; 28 | } 29 | 30 | footer { 31 | margin: 50px 0; 32 | } -------------------------------------------------------------------------------- /examples/website/fonts/glyphicons-halflings-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/woxcab/scrapy_rss/93bf446fcaf4b334364f4e9d2175ce555f76dea7/examples/website/fonts/glyphicons-halflings-regular.eot -------------------------------------------------------------------------------- /examples/website/fonts/glyphicons-halflings-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/woxcab/scrapy_rss/93bf446fcaf4b334364f4e9d2175ce555f76dea7/examples/website/fonts/glyphicons-halflings-regular.ttf -------------------------------------------------------------------------------- /examples/website/fonts/glyphicons-halflings-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/woxcab/scrapy_rss/93bf446fcaf4b334364f4e9d2175ce555f76dea7/examples/website/fonts/glyphicons-halflings-regular.woff -------------------------------------------------------------------------------- /examples/website/fonts/glyphicons-halflings-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/woxcab/scrapy_rss/93bf446fcaf4b334364f4e9d2175ce555f76dea7/examples/website/fonts/glyphicons-halflings-regular.woff2 -------------------------------------------------------------------------------- /examples/website/images/320x150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/woxcab/scrapy_rss/93bf446fcaf4b334364f4e9d2175ce555f76dea7/examples/website/images/320x150.png -------------------------------------------------------------------------------- /examples/website/images/800x300.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/woxcab/scrapy_rss/93bf446fcaf4b334364f4e9d2175ce555f76dea7/examples/website/images/800x300.png -------------------------------------------------------------------------------- /examples/website/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | Shop Homepage - Start Bootstrap Template 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 62 | 63 | 64 |
65 | 66 |
67 | 68 |
69 |

Shop Name

70 |
71 | Category 1 72 | Category 2 73 |
74 |
75 | 76 |
77 | 78 | 108 | 109 |
110 | 111 |
112 |
113 | 114 |
115 |

$24.99

116 |

First Product 117 |

118 |

See more snippets like this online store item at Bootsnipp - http://bootsnipp.com.

119 |
120 |
121 |

3 reviews

122 |

123 | 124 | 125 | 126 | 127 | 128 |

129 |
130 |
131 |
132 | 133 |
134 |
135 | 136 |
137 |

$64.99

138 |

Second Product 139 |

140 |

This is a short description. Lorem ipsum dolor sit amet, consectetur adipiscing elit.

141 |
142 |
143 |

2 reviews

144 |

145 | 146 | 147 | 148 | 149 | 150 |

151 |
152 |
153 |
154 | 155 |
156 |
157 | 158 |
159 |

$74.99

160 |

Third Product 161 |

162 |

This is a short description. Lorem ipsum dolor sit amet, consectetur adipiscing elit.

163 |
164 |
165 |

0 reviews

166 |

167 | 168 | 169 | 170 | 171 | 172 |

173 |
174 |
175 |
176 | 177 |
178 |

Like this template? 179 |

180 |

If you like this template, then check out this tutorial on how to build a working review system for your online store!

181 | View Tutorial 182 |
183 | 184 |
185 | 186 |
187 | 188 |
189 | 190 |
191 | 192 | 193 |
194 | 195 |
196 | 197 | 198 |
199 |
200 |
201 |

Copyright © Your Website 2014

202 |
203 |
204 |
205 | 206 |
207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | -------------------------------------------------------------------------------- /examples/website/item1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | Shop Item 1 - Start Bootstrap Template 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 62 | 63 | 64 |
65 | 66 |
67 | 68 |
69 |

Shop Name

70 |
71 | Category 1 72 | Category 2 73 |
74 |
75 | 76 |
77 | 78 |
79 | 80 |
81 |

$24.99

82 |

First Product 83 |

84 |

See more snippets like these online store reviews at Bootsnipp - http://bootsnipp.com.

85 |

Want to make these reviews work? Check out 86 | this building a review system tutorial 87 | over at maxoffsky.com!

88 |

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum

89 |
90 |
91 |

3 reviews

92 |

93 | 94 | 95 | 96 | 97 | 98 | 4.0 stars 99 |

100 |
101 |
102 | 103 |
104 | 105 |
106 | Leave a Review 107 |
108 | 109 |
110 | 111 |
112 |
113 | 114 | 115 | 116 | 117 | 118 | Anonymous 119 | 10 days ago 120 |

This product was great in terms of quality. I would definitely buy another!

121 |
122 |
123 | 124 |
125 | 126 |
127 |
128 | 129 | 130 | 131 | 132 | 133 | Anonymous 134 | 12 days ago 135 |

I've alredy ordered another one!

136 |
137 |
138 | 139 |
140 | 141 |
142 |
143 | 144 | 145 | 146 | 147 | 148 | Anonymous 149 | 15 days ago 150 |

I've seen some better than this, but not at this price. I definitely recommend this item.

151 |
152 |
153 | 154 |
155 | 156 |
157 | 158 |
159 | 160 |
161 | 162 | 163 |
164 | 165 |
166 | 167 | 168 |
169 |
170 |
171 |

Copyright © Your Website 2014

172 |
173 |
174 |
175 | 176 |
177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | -------------------------------------------------------------------------------- /examples/website/item2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | Shop Item 1 - Start Bootstrap Template 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 62 | 63 | 64 |
65 | 66 |
67 | 68 |
69 |

Shop Name

70 |
71 | Category 1 72 | Category 2 73 |
74 |
75 | 76 |
77 | 78 |
79 | 80 |
81 |

$64.99

82 |

Second Product 83 |

84 |

This is a short description. Lorem ipsum dolor sit amet, consectetur adipiscing elit.

85 |

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum

86 |
87 |
88 |

2 reviews

89 |

90 | 91 | 92 | 93 | 94 | 95 | 4.0 stars 96 |

97 |
98 |
99 | 100 |
101 | 102 |
103 | Leave a Review 104 |
105 | 106 |
107 | 108 |
109 |
110 | 111 | 112 | 113 | 114 | 115 | Anonymous 116 | 10 days ago 117 |

This product was great in terms of quality. I would definitely buy another!

118 |
119 |
120 | 121 |
122 | 123 |
124 |
125 | 126 | 127 | 128 | 129 | 130 | Anonymous 131 | 15 days ago 132 |

I've seen some better than this, but not at this price. I definitely recommend this item.

133 |
134 |
135 | 136 |
137 | 138 |
139 | 140 |
141 | 142 |
143 | 144 | 145 |
146 | 147 |
148 | 149 | 150 |
151 |
152 |
153 |

Copyright © Your Website 2014

154 |
155 |
156 |
157 | 158 |
159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | -------------------------------------------------------------------------------- /examples/website/item3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | Shop Item 1 - Start Bootstrap Template 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 62 | 63 | 64 |
65 | 66 |
67 | 68 |
69 |

Shop Name

70 |
71 | Category 1 72 | Category 2 73 |
74 |
75 | 76 |
77 | 78 |
79 | 80 |
81 |

$74.99

82 |

Third Product 83 |

84 |

This is a short description. Lorem ipsum dolor sit amet, consectetur adipiscing elit.

85 |

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum

86 |
87 |
88 |

0 reviews

89 |

90 | 91 | 92 | 93 | 94 | 95 | 4.0 stars 96 |

97 |
98 |
99 | 100 |
101 | 102 |
103 | Leave a Review 104 |
105 | 106 |
107 | 108 |
109 | 110 |
111 | 112 |
113 | 114 | 115 |
116 | 117 |
118 | 119 | 120 |
121 |
122 |
123 |

Copyright © Your Website 2014

124 |
125 |
126 |
127 | 128 |
129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | -------------------------------------------------------------------------------- /examples/website/item4.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | Shop Item 1 - Start Bootstrap Template 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 62 | 63 | 64 |
65 | 66 |
67 | 68 |
69 |

Shop Name

70 |
71 | Category 1 72 | Category 2 73 |
74 |
75 | 76 |
77 | 78 |
79 | 80 |
81 |

$84.99

82 |

Fourth Product 83 |

84 |

This is a short description. Lorem ipsum dolor sit amet, consectetur adipiscing elit.

85 |

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum

86 |
87 |
88 |

6 reviews

89 |

90 | 91 | 92 | 93 | 94 | 95 | 4.0 stars 96 |

97 |
98 |
99 | 100 |
101 | 102 |
103 | Leave a Review 104 |
105 | 106 |
107 | 108 |
109 |
110 | 111 | 112 | 113 | 114 | 115 | Anonymous 116 | 10 days ago 117 |

This product was great in terms of quality. I would definitely buy another!

118 |
119 |
120 | 121 |
122 | 123 |
124 |
125 | 126 | 127 | 128 | 129 | 130 | Anonymous 131 | 11 days ago 132 |

This product was great in terms of quality.

133 |
134 |
135 | 136 |
137 | 138 |
139 |
140 | 141 | 142 | 143 | 144 | 145 | Anonymous 146 | 11 days ago 147 |

I would definitely buy another!

148 |
149 |
150 | 151 |
152 | 153 |
154 |
155 | 156 | 157 | 158 | 159 | 160 | Anonymous 161 | 12 days ago 162 |

I've alredy ordered another one!

163 |
164 |
165 | 166 |
167 | 168 |
169 |
170 | 171 | 172 | 173 | 174 | 175 | Anonymous 176 | 15 days ago 177 |

I've seen some better than this, but not at this price. I definitely recommend this item.

178 |
179 |
180 | 181 |
182 | 183 |
184 |
185 | 186 | 187 | 188 | 189 | 190 | Anonymous 191 | 25 days ago 192 |

I definitely recommend this item.

193 |
194 |
195 |
196 | 197 |
198 | 199 |
200 | 201 |
202 | 203 | 204 |
205 | 206 |
207 | 208 | 209 |
210 |
211 |
212 |

Copyright © Your Website 2014

213 |
214 |
215 |
216 | 217 |
218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | -------------------------------------------------------------------------------- /examples/website/item5.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | Shop Item 1 - Start Bootstrap Template 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 62 | 63 | 64 |
65 | 66 |
67 | 68 |
69 |

Shop Name

70 |
71 | Category 1 72 | Category 2 73 |
74 |
75 | 76 |
77 | 78 |
79 | 80 |
81 |

$94.99

82 |

Fifth Product 83 |

84 |

This is a short description. Lorem ipsum dolor sit amet, consectetur adipiscing elit.

85 |

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum

86 |
87 |
88 |

4 reviews

89 |

90 | 91 | 92 | 93 | 94 | 95 | 4.0 stars 96 |

97 |
98 |
99 | 100 |
101 | 102 |
103 | Leave a Review 104 |
105 | 106 |
107 | 108 |
109 |
110 | 111 | 112 | 113 | 114 | 115 | Anonymous 116 | 10 days ago 117 |

This product was great in terms of quality. I would definitely buy another!

118 |
119 |
120 | 121 |
122 | 123 |
124 |
125 | 126 | 127 | 128 | 129 | 130 | Anonymous 131 | 12 days ago 132 |

I've alredy ordered another one!

133 |
134 |
135 | 136 |
137 | 138 |
139 |
140 | 141 | 142 | 143 | 144 | 145 | Anonymous 146 | 15 days ago 147 |

I've seen some better than this, but not at this price. I definitely recommend this item.

148 |
149 |
150 | 151 |
152 | 153 |
154 |
155 | 156 | 157 | 158 | 159 | 160 | Anonymous 161 | 25 days ago 162 |

I definitely recommend this item.

163 |
164 |
165 | 166 |
167 | 168 |
169 | 170 |
171 | 172 |
173 | 174 | 175 |
176 | 177 |
178 | 179 | 180 |
181 |
182 |
183 |

Copyright © Your Website 2014

184 |
185 |
186 |
187 | 188 |
189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | -------------------------------------------------------------------------------- /examples/website/item6.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | Shop Item 1 - Start Bootstrap Template 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 62 | 63 | 64 |
65 | 66 |
67 | 68 |
69 |

Shop Name

70 |
71 | Category 1 72 | Category 2 73 |
74 |
75 | 76 |
77 | 78 |
79 | 80 |
81 |

$104.99

82 |

Sixth Product 83 |

84 |

This is a short description. Lorem ipsum dolor sit amet, consectetur adipiscing elit.

85 |

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum

86 |
87 |
88 |

2 reviews

89 |

90 | 91 | 92 | 93 | 94 | 95 | 4.0 stars 96 |

97 |
98 |
99 | 100 |
101 | 102 |
103 | Leave a Review 104 |
105 | 106 |
107 | 108 |
109 |
110 | 111 | 112 | 113 | 114 | 115 | Anonymous 116 | 1 day ago 117 |

I would definitely buy another!

118 |
119 |
120 | 121 |
122 | 123 |
124 |
125 | 126 | 127 | 128 | 129 | 130 | Anonymous 131 | 12 days ago 132 |

I've alredy ordered another one!

133 |
134 |
135 |
136 | 137 |
138 | 139 |
140 | 141 |
142 | 143 | 144 |
145 | 146 |
147 | 148 | 149 |
150 |
151 |
152 |

Copyright © Your Website 2014

153 |
154 |
155 |
156 | 157 |
158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | -------------------------------------------------------------------------------- /examples/website/item7.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | Shop Item 1 - Start Bootstrap Template 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 62 | 63 | 64 |
65 | 66 |
67 | 68 |
69 |

Shop Name

70 |
71 | Category 1 72 | Category 2 73 |
74 |
75 | 76 |
77 | 78 |
79 | 80 |
81 |

$4.99

82 |

Third Product 83 |

84 |

This is a short description. Lorem ipsum dolor sit amet, consectetur adipiscing elit.

85 |

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum

86 |
87 |
88 |

0 reviews

89 |

90 | 91 | 92 | 93 | 94 | 95 | 4.0 stars 96 |

97 |
98 |
99 | 100 |
101 | 102 |
103 | Leave a Review 104 |
105 | 106 |
107 | 108 |
109 | 110 |
111 | 112 |
113 | 114 | 115 |
116 | 117 |
118 | 119 | 120 |
121 |
122 |
123 |

Copyright © Your Website 2014

124 |
125 |
126 |
127 | 128 |
129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | -------------------------------------------------------------------------------- /logs/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | ignore::DeprecationWarning:nose\..* 4 | ignore::PendingDeprecationWarning:nose\..* 5 | ignore:'cgi' is deprecated:DeprecationWarning:twisted\..* 6 | ignore:Python.* is no longer supported.* cryptography:UserWarning 7 | ignore::DeprecationWarning:scrapy\..* 8 | ignore:.*REQUEST_FINGERPRINTER_IMPLEMENTATION:scrapy.exceptions.ScrapyDeprecationWarning:scrapy\..* 9 | ignore:currentThread\(\) is deprecated:DeprecationWarning:coverage\..* 10 | ignore:OpenSSL\.rand is deprecated:DeprecationWarning:importlib\..* 11 | ignore:The hookimpl CovPlugin.*uses old-style configuration options:DeprecationWarning 12 | 13 | addopts = --maxfail=10 --cov=scrapy_rss --cov-report=term 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | python-dateutil 2 | scrapy>=1.3.1,<2.0;python_version<"3" 3 | scrapy<1.5.0;python_version=="3.3.*" 4 | scrapy>=1.1,<1.8.0;python_version=="3.4.*" 5 | scrapy>=1.1;python_version=="3.5.*" 6 | scrapy>=1.3.1;python_version>"3.5" 7 | scrapy>=1.7.1;python_version>="3.10" 8 | packaging 9 | six 10 | cryptography<2.0;python_version=="3.3.*" 11 | pyOpenSSL<17.3.0;python_version=="3.3.*" 12 | -------------------------------------------------------------------------------- /scrapy_rss/VERSION: -------------------------------------------------------------------------------- 1 | 0.3.2 2 | -------------------------------------------------------------------------------- /scrapy_rss/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .items import * 4 | import pkgutil 5 | 6 | 7 | __version__ = pkgutil.get_data(__package__, 'VERSION').decode('ascii').strip() 8 | -------------------------------------------------------------------------------- /scrapy_rss/elements.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from . import meta 4 | from .utils import format_rfc822 5 | 6 | 7 | class TitleElement(meta.ItemElement): 8 | title = meta.ItemElementAttribute(required=True, is_content=True) 9 | 10 | 11 | class LinkElement(meta.ItemElement): 12 | link = meta.ItemElementAttribute(required=True, is_content=True) 13 | 14 | 15 | class DescriptionElement(meta.ItemElement): 16 | description = meta.ItemElementAttribute(required=True, is_content=True) 17 | 18 | 19 | class AuthorElement(meta.ItemElement): 20 | author = meta.ItemElementAttribute(required=True, is_content=True) 21 | 22 | 23 | class CategoryElement(meta.ItemElement): 24 | category = meta.ItemElementAttribute(required=True, is_content=True) 25 | 26 | 27 | class CommentsElement(meta.ItemElement): 28 | comments = meta.ItemElementAttribute(required=True, is_content=True) 29 | 30 | 31 | class EnclosureElement(meta.ItemElement): 32 | url = meta.ItemElementAttribute(required=True) 33 | length = meta.ItemElementAttribute(required=True) 34 | type = meta.ItemElementAttribute(required=True) 35 | 36 | 37 | class GuidElement(meta.ItemElement): 38 | isPermaLink = meta.ItemElementAttribute(required=False, serializer=lambda v: str(v).lower(), value=False) 39 | guid = meta.ItemElementAttribute(required=True, is_content=True) 40 | 41 | 42 | class PubDateElement(meta.ItemElement): 43 | datetime = meta.ItemElementAttribute(required=True, serializer=format_rfc822, is_content=True) 44 | 45 | 46 | class SourceElement(meta.ItemElement): 47 | url = meta.ItemElementAttribute(required=True) 48 | -------------------------------------------------------------------------------- /scrapy_rss/exceptions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | class InvalidElementValueError(ValueError): 5 | def __init__(self, elem_name, elem_cls, value): 6 | self.elem_name = elem_name 7 | self.elem_cls = elem_cls 8 | self.value = value 9 | 10 | def __str__(self): 11 | return ("Could not assign value '{value}' to element '{elem_name}': " 12 | "element requires attributes or value is not instance of '{elem_cls}'. " 13 | "For attributes modification use properties: element.attribute_name = attribute_value. " 14 | "For multiple allowed elements use list: category_element = ['cat1', 'cat2', 'cat3']" 15 | .format(value=self.value, elem_name=self.elem_name, elem_cls=self.elem_cls)) 16 | 17 | 18 | class InvalidRssItemError(ValueError): 19 | pass 20 | 21 | 22 | class InvalidRssItemAttributesError(ValueError): 23 | def __init__(self, rss_element, required_attrs, content_arg): 24 | self.rss_element = rss_element 25 | self.required_attrs = required_attrs 26 | self.content_arg = content_arg 27 | 28 | def __str__(self): 29 | if self.content_arg: 30 | return "The next required attributes of RSS element '{}' ({}) "\ 31 | "or required content ('{}' argument) are not set" \ 32 | .format(self.rss_element, ", ".join(str(a) for a in self.required_attrs), self.content_arg) 33 | return "The next required attributes of RSS element '{}' are not set: {}" \ 34 | .format(self.rss_element, ", ".join(str(a) for a in self.required_attrs)) 35 | 36 | 37 | class NoNamespaceURIError(ValueError): 38 | pass 39 | -------------------------------------------------------------------------------- /scrapy_rss/exporters.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from itertools import chain 4 | from collections import Counter 5 | 6 | from packaging import version 7 | from datetime import datetime 8 | from dateutil.tz import tzlocal 9 | import six 10 | import scrapy 11 | from scrapy.exporters import XmlItemExporter 12 | 13 | from .items import RssItem 14 | from .utils import format_rfc822 15 | from .exceptions import * 16 | from . import meta 17 | 18 | 19 | class RssItemExporter(XmlItemExporter): 20 | def __init__(self, file, channel_title, channel_link, channel_description, 21 | namespaces=None, item_cls=None, 22 | language=None, copyright=None, managing_editor=None, webmaster=None, 23 | pubdate=None, last_build_date=None, category=None, generator=None, 24 | docs=None, ttl=None, 25 | **kwargs): 26 | """ 27 | RSS parameters semantics: https://validator.w3.org/feed/docs/rss2.html 28 | 29 | Parameters 30 | ---------- 31 | namespaces : {str or None : str} or tuple of (str or None, str) or list of (str or None, str) or None 32 | predefined XML namespaces {prefix: URI, ...} or [(prefix, URI), ...] 33 | item_cls : type 34 | main class of RSS items (default: RssItem) 35 | 36 | Item fields that corresponds to RSS element with attributes or sub-elements, 37 | must be a dictionary-like such as 38 | {"attribute-name": "attribute-value"} (with special key "content" if RSS element must have content) 39 | or {"sub-element-name": "sub-element-content"} 40 | 41 | For example, the dictionary for element: 42 | >>> {'isPermalink': 'False', 'content': '0123456789abcdef'} 43 | that converts into 44 | 0123456789abcdef 45 | """ 46 | kwargs['root_element'] = 'rss' 47 | kwargs['item_element'] = 'item' 48 | super(RssItemExporter, self).__init__(file, **kwargs) 49 | 50 | self.channel_element = 'channel' 51 | 52 | self.channel_title = channel_title 53 | self.channel_link = channel_link 54 | self.channel_description = channel_description 55 | 56 | self.channel_language = language 57 | self.channel_copyright = copyright 58 | if managing_editor and '@' not in managing_editor: 59 | raise ValueError('managing_editor field must contain at least e-mail. Passed: {}'.format(managing_editor)) 60 | self.channel_managing_editor = managing_editor 61 | if webmaster and '@' not in webmaster: 62 | raise ValueError('webmaster field must contain at least e-mail. Passed: {}'.format(webmaster)) 63 | self.channel_webmaster = webmaster 64 | self.channel_pubdate = pubdate 65 | self.channel_last_build_date = last_build_date if last_build_date \ 66 | else datetime.today().replace(tzinfo=tzlocal()) 67 | self.channel_category = ([category] if category and isinstance(category, six.string_types) 68 | and not isinstance(category, (list, set, tuple)) 69 | else category) 70 | self.channel_generator = generator if generator is not None else 'Scrapy {}'.format(scrapy.__version__) 71 | self.channel_docs = docs 72 | self.channel_ttl = ttl 73 | 74 | if not item_cls: 75 | item_cls = RssItem 76 | elif not issubclass(item_cls, RssItem): 77 | raise ValueError('Item class must be RssItem class or its subclass') 78 | self._item_cls = item_cls 79 | 80 | if not namespaces: 81 | namespaces = {} 82 | elif isinstance(namespaces, (list, tuple)): 83 | namespaces = dict(namespaces) 84 | namespaces_iter = namespaces.items() if isinstance(namespaces, dict) else namespaces 85 | item_cls_namespaces = item_cls().get_namespaces(False) 86 | self._namespaces = {} 87 | skipped_ns_prefixes = set() 88 | for ns_prefix, ns_uri in chain(namespaces_iter, item_cls_namespaces): 89 | if ns_prefix in skipped_ns_prefixes: 90 | continue 91 | if ns_prefix in self._namespaces and ns_uri != self._namespaces[ns_prefix]: 92 | self._namespaces.pop(ns_prefix) 93 | skipped_ns_prefixes.add(ns_prefix) 94 | else: 95 | self._namespaces[ns_prefix] = ns_uri 96 | 97 | if version.parse(scrapy.__version__) < version.parse('1.4.0'): 98 | def _export_xml_field(self, name, serialized_value, depth): 99 | return super(RssItemExporter, self)._export_xml_field(name, serialized_value) 100 | 101 | def start_exporting(self): 102 | self.xg.startDocument() 103 | for ns_prefix, ns_uri in self._namespaces.items(): 104 | self.xg.startPrefixMapping(ns_prefix, ns_uri) 105 | root_attrs = {(None, 'version'): '2.0'} 106 | self.xg.startElementNS((None, self.root_element), self.root_element, root_attrs) 107 | self.xg.startElement(self.channel_element, {}) 108 | 109 | self._export_xml_field('title', self.channel_title, 1) 110 | self._export_xml_field('link', self.channel_link, 1) 111 | self._export_xml_field('description', self.channel_description, 1) 112 | if self.channel_language: 113 | self._export_xml_field('language', self.channel_language, 1) 114 | if self.channel_copyright: 115 | self._export_xml_field('copyright', self.channel_copyright, 1) 116 | if self.channel_managing_editor: 117 | self._export_xml_field('managingEditor', self.channel_managing_editor, 1) 118 | if self.channel_webmaster: 119 | self._export_xml_field('webMaster', self.channel_webmaster, 1) 120 | if self.channel_pubdate: 121 | self._export_xml_field('pubdate', 122 | format_rfc822(self.channel_pubdate) 123 | if isinstance(self.channel_pubdate, datetime) 124 | else self.channel_pubdate, 1) 125 | self._export_xml_field('lastBuildDate', 126 | format_rfc822(self.channel_last_build_date) 127 | if isinstance(self.channel_last_build_date, datetime) 128 | else self.channel_last_build_date, 1) 129 | if self.channel_category: 130 | for category in self.channel_category: 131 | self._export_xml_field('category', category, 1) 132 | if self.channel_generator: 133 | self._export_xml_field('generator', self.channel_generator, 1) 134 | if self.channel_docs: 135 | self._export_xml_field('docs', self.channel_docs, 1) 136 | if self.channel_ttl: 137 | self._export_xml_field('ttl', self.channel_ttl, 1) 138 | 139 | def export_item(self, item): 140 | if not isinstance(item, RssItem) and not isinstance(getattr(item, 'rss', None), RssItem): 141 | raise InvalidRssItemError("Item must have 'rss' field of type 'RssItem'") 142 | if not isinstance(item, RssItem): 143 | item = item.rss 144 | 145 | item_namespaces = set() 146 | if item.__class__ is not self._item_cls: 147 | item_namespaces = item.get_namespaces() 148 | item_namespaces -= set(self._namespaces.items()) 149 | ns_prefix_count = Counter(ns_prefix for ns_prefix, _ in item_namespaces) 150 | item_namespaces = {ns for ns in item_namespaces if ns_prefix_count[ns[0]] == 1} 151 | item_namespaces = dict(item_namespaces) 152 | 153 | for elem_ns_prefix, elem_ns_uri in item_namespaces.items(): 154 | self.xg.startPrefixMapping(elem_ns_prefix, elem_ns_uri) 155 | self.xg.startElementNS((None, self.item_element), self.item_element, {}) 156 | 157 | for elem_name, elem_descr in item.elements.items(): 158 | elem_value = getattr(item, str(elem_name)) 159 | if elem_value.assigned: 160 | elem_values = elem_value if isinstance(elem_value, meta.MultipleElements) else (elem_value,) 161 | for elem_value in elem_values: 162 | if not elem_value.is_valid(): 163 | raise InvalidRssItemAttributesError(elem_name, 164 | list(elem_value.required_attrs), 165 | elem_value.content_arg) 166 | 167 | attrs = elem_value.serialize() 168 | content = attrs.pop(elem_descr.content_arg.xml_name, None) if elem_descr.content_arg else None 169 | undeclared_elem_namespaces = {ns_prefix: ns_uri 170 | for ns_prefix, ns_uri in elem_descr.get_namespaces() 171 | if (ns_prefix not in self._namespaces 172 | or self._namespaces[ns_prefix] != ns_uri) 173 | and (ns_prefix not in item_namespaces 174 | or item_namespaces[ns_prefix] != ns_uri)} 175 | if elem_name.ns_prefix: 176 | elem_qname = '{}:{}'.format(elem_name.ns_prefix, elem_name.name) 177 | else: 178 | elem_qname = elem_name.name 179 | for ns_prefix, ns_uri in undeclared_elem_namespaces.items(): 180 | self.xg.startPrefixMapping(ns_prefix, ns_uri) 181 | self.xg.startElementNS(elem_name.xml_name, elem_qname, attrs) 182 | if content: 183 | self.xg.characters(content) 184 | self.xg.endElementNS(elem_name.xml_name, elem_qname) 185 | for ns_prefix, ns_uri in undeclared_elem_namespaces.items(): 186 | self.xg.endPrefixMapping(ns_prefix) 187 | 188 | self.xg.endElementNS((None, self.item_element), self.item_element) 189 | for elem_ns_prefix in item_namespaces: 190 | self.xg.endPrefixMapping(elem_ns_prefix) 191 | 192 | def finish_exporting(self): 193 | self.xg.endElement(self.channel_element) 194 | self.xg.endElementNS((None, self.root_element), self.root_element) 195 | for ns_prefix, ns_uri in self._namespaces.items(): 196 | self.xg.startPrefixMapping(ns_prefix, ns_uri) 197 | self.xg.endDocument() 198 | -------------------------------------------------------------------------------- /scrapy_rss/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .elements import * 4 | from . import meta 5 | from .meta.item import FeedItem, ExtendableItem 6 | 7 | 8 | class RssItem(FeedItem): 9 | title = TitleElement() 10 | link = LinkElement() 11 | description = DescriptionElement() 12 | author = AuthorElement() 13 | category = meta.MultipleElements(CategoryElement) 14 | comments = CommentsElement() 15 | enclosure = EnclosureElement() 16 | guid = GuidElement() 17 | pubDate = PubDateElement() 18 | source = SourceElement() 19 | 20 | 21 | class RssedItem(FeedItem): 22 | def __init__(self, *args, **kwargs): 23 | super(RssedItem, self).__init__(*args, **kwargs) 24 | self.rss = RssItem() 25 | 26 | -------------------------------------------------------------------------------- /scrapy_rss/meta/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .nscomponent import BaseNSComponent, NSComponentName 4 | from .attribute import ItemElementAttribute 5 | from .element import ItemElementMeta, ItemElement, MultipleElements 6 | from .item import ItemMeta, FeedItem, ExtendableItem 7 | -------------------------------------------------------------------------------- /scrapy_rss/meta/attribute.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import re 4 | from .nscomponent import BaseNSComponent 5 | 6 | 7 | class ItemElementAttribute(BaseNSComponent): 8 | def __init__(self, value=None, serializer=lambda x: str(x), 9 | required=False, is_content=False, **kwargs): 10 | """ 11 | Construct attribute of an element 12 | 13 | Parameters 14 | ---------- 15 | value : Any 16 | Default attribute value, None means unassigned 17 | serializer : callable 18 | Function to serialize the attribute value to string for a XML document 19 | required : bool 20 | Whether the attribute is required 21 | is_content : bool 22 | Whether the "attribute" is an element content 23 | """ 24 | super(ItemElementAttribute, self).__init__(**kwargs) 25 | if is_content and self.ns_uri: 26 | raise ValueError("Content cannot have namespace") 27 | self.__required = required 28 | self.__is_content = is_content 29 | self.serializer = serializer 30 | self.value = value 31 | 32 | @property 33 | def required(self): 34 | """ 35 | Returns 36 | ------- 37 | bool 38 | Whether the attribute is required 39 | """ 40 | return self.__required 41 | 42 | @property 43 | def is_content(self): 44 | """ 45 | Returns 46 | ------- 47 | bool 48 | Whether the attribute is an element content 49 | """ 50 | return self.__is_content 51 | 52 | def __repr__(self): 53 | s_match = re.match(r'^[^(]+\((.*?)\)$', super(ItemElementAttribute, self).__repr__()) 54 | s_repr = ", " + s_match.group(1) if s_match else '' 55 | return "{}(value={!r}, serializer={!r}, required={!r}, is_content={!r}{})"\ 56 | .format(self.__class__.__name__, self.value, self.serializer, 57 | self.__required, self.__is_content, s_repr) 58 | -------------------------------------------------------------------------------- /scrapy_rss/meta/element.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from copy import deepcopy 4 | import re 5 | import six 6 | 7 | from .attribute import ItemElementAttribute 8 | from .nscomponent import BaseNSComponent, NSComponentName 9 | 10 | 11 | class ItemElementMeta(type): 12 | def __new__(mcs, cls_name, cls_bases, cls_attrs): 13 | elem_attrs = {NSComponentName(attr_name, ns_prefix=attr_descr.ns_prefix, ns_uri=attr_descr.ns_uri): 14 | attr_descr for attr_name, attr_descr in cls_attrs.items() 15 | if isinstance(attr_descr, ItemElementAttribute)} 16 | for attr_name, attr in elem_attrs.items(): 17 | if not attr.ns_prefix: 18 | attr.ns_prefix = attr_name.ns_prefix 19 | if sum(attr.is_content for attr in elem_attrs.values()) > 1: 20 | raise ValueError("More than one attributes that's interpreted as content in the element '{}' specification" 21 | .format(cls_name)) 22 | cls_attrs['_attrs'] = elem_attrs 23 | 24 | cls_attrs.update({attr_name.priv_name: attr_descr 25 | for attr_name, attr_descr in elem_attrs.items()}) 26 | cls_attrs.update({attr_name.pub_name: 27 | property(mcs.build_attr_getter(attr_name), 28 | mcs.build_attr_setter(attr_name)) 29 | for attr_name, attr_descr in elem_attrs.items()}) 30 | 31 | return super(ItemElementMeta, mcs).__new__(mcs, cls_name, cls_bases, cls_attrs) 32 | 33 | def __init__(cls, cls_name, cls_bases, cls_attrs): 34 | cls._assigned = False 35 | cls.assigned = property(lambda self: self._assigned) 36 | cls.attrs = property(lambda self: self._attrs) # Attributes 37 | 38 | cls._required_attrs = {attr_name 39 | for attr_name, attr_descr in cls._attrs.items() 40 | if attr_descr.required and not attr_descr.is_content} 41 | cls.required_attrs = property(lambda self: self._required_attrs) 42 | 43 | cls._content_arg = None 44 | for attr_name, attr_descr in cls._attrs.items(): 45 | if attr_descr.is_content: 46 | cls._content_arg = attr_name 47 | break 48 | cls.content_arg = property(lambda self: self._content_arg) 49 | 50 | cls.serialize = lambda self: {attr_name.xml_name: attr.serializer(attr.value) 51 | for attr_name in self.attrs 52 | for attr in (getattr(self, attr_name.priv_name),) 53 | if attr.value is not None} 54 | 55 | super(ItemElementMeta, cls).__init__(cls_name, cls_bases, cls_attrs) 56 | 57 | @staticmethod 58 | def build_attr_getter(name): 59 | """ 60 | Build attribute getter 61 | 62 | Parameters 63 | ---------- 64 | name : NSComponentName 65 | an attribute name 66 | """ 67 | return lambda self: getattr(self, name.priv_name).value 68 | 69 | @staticmethod 70 | def build_attr_setter(name): 71 | """ 72 | Build attribute setter 73 | 74 | Parameters 75 | ---------- 76 | name : NSComponentName 77 | an attribute name 78 | """ 79 | def setter(self, value): 80 | attr = getattr(self, name.priv_name) 81 | attr.value = value 82 | self._assigned = True 83 | return setter 84 | 85 | 86 | @six.add_metaclass(ItemElementMeta) 87 | class ItemElement(BaseNSComponent): 88 | """ 89 | Base class for elements 90 | 91 | Attributes 92 | ---------- 93 | attrs : {NSComponentName : ItemElementAttribute} 94 | All attributes of the element 95 | required_attrs : set of NSComponentName 96 | Required element attributes 97 | content_arg : NSComponentName or None 98 | Name of an attribute that's interpreted as the element content 99 | assigned : bool 100 | Whether a non-None value is assigned to any attribute of the element 101 | 102 | Methods 103 | ------- 104 | serialize() : { (str or None, str) : str } 105 | Convert values of element attributes to a strings. 106 | The dictionary key is a tuple (namespace_uri, attribute_name) for SAX handlers 107 | """ 108 | 109 | def __init__(self, *args, **kwargs): 110 | if not self.content_arg and args: 111 | raise ValueError("Element of type '{}' does not support unnamed arguments (no content)" 112 | .format(self.__class__.__name__)) 113 | if len(args) > 1: 114 | raise ValueError("Constructor of class '{}' supports only single unnamed argument " 115 | "that is interpreted as content of element".format(self.__class__.__name__)) 116 | 117 | if args and str(self.content_arg) not in kwargs: 118 | kwargs[str(self.content_arg)] = args[0] 119 | 120 | new_attrs = {} 121 | for attr_name in self._attrs: 122 | new_attr = deepcopy(getattr(self, attr_name.priv_name)) 123 | setattr(self, attr_name.priv_name, new_attr) 124 | new_attrs[attr_name] = new_attr 125 | self._attrs = new_attrs 126 | 127 | for attr_name in self._attrs: 128 | attr_name = str(attr_name) 129 | if attr_name in kwargs: 130 | setattr(self, attr_name, kwargs[attr_name]) 131 | del kwargs[attr_name] 132 | 133 | try: 134 | super(ItemElement, self).__init__(**kwargs) 135 | except TypeError: 136 | raise ValueError("Passed arguments {}. " 137 | "But constructor of class '{}' supports only the next named arguments: {}" 138 | .format(list(kwargs.keys()), self.__class__.__name__, 139 | [str(a) for a in self.attrs])) 140 | 141 | def __repr__(self): 142 | s_match = re.match(r'^[^(]+\((.*?)\)$', super(ItemElement, self).__repr__()) 143 | s_repr = s_match.group(1) if s_match else '' 144 | attrs_repr = ", ".join("{}={!r}".format(attr_name, attr) 145 | for attr_name, attr in self.attrs.items()) 146 | return "{}({})".format(self.__class__.__name__, ", ".join(filter(None, [attrs_repr, s_repr]))) 147 | 148 | def is_valid(self): 149 | """ 150 | Check if the element has valid attributes' values 151 | """ 152 | return (not self._assigned 153 | or all(getattr(self, str(attr_name)) is not None for attr_name in self.required_attrs) 154 | and (not self.content_arg or not self.attrs[self.content_arg].required 155 | or getattr(self, str(self.content_arg)) is not None)) 156 | 157 | def get_namespaces(self, assigned_only=True): 158 | namespaces = super(ItemElement, self).get_namespaces() 159 | for attr in self.attrs.values(): 160 | if not assigned_only or attr.value is not None: 161 | namespaces.update(attr.get_namespaces(assigned_only)) 162 | return namespaces 163 | 164 | 165 | class MultipleElements(ItemElement): 166 | """ 167 | Represents elements of the same base class 168 | """ 169 | 170 | def __init__(self, base_element_cls, **kwargs): 171 | if not isinstance(base_element_cls, ItemElementMeta): 172 | raise TypeError("Invalid type of elements class: {}".format(base_element_cls)) 173 | self.base_element_cls = base_element_cls 174 | super(MultipleElements, self).__init__(**kwargs) 175 | self.elements = [] 176 | self._kwargs = kwargs 177 | self._content_arg = base_element_cls._content_arg 178 | 179 | def serializer(): 180 | raise NotImplementedError('Class MultipleElements does not support serialization') 181 | self.serialize = serializer 182 | 183 | def _check_value(self, value): 184 | if not self._kwargs.get("ns_prefix") and self.ns_prefix: 185 | self._kwargs["ns_prefix"] = self.ns_prefix 186 | if isinstance(value, self.base_element_cls): 187 | if self._kwargs.get("ns_uri") and not value.ns_uri: 188 | value.ns_uri = self._kwargs["ns_uri"] 189 | if not value.ns_prefix and self.ns_prefix: 190 | value.ns_prefix = self.ns_prefix 191 | return value 192 | if isinstance(value, dict): 193 | kwargs = self._kwargs.copy() 194 | kwargs.update(value) 195 | elem = self.base_element_cls(**kwargs) 196 | return elem 197 | return self.base_element_cls(value, **self._kwargs) 198 | 199 | def append(self, elem): 200 | """ 201 | Append new element 202 | 203 | Parameters 204 | ---------- 205 | elem : ItemElement or { str : Any } 206 | New element as element instance or dictionary of attributes' values 207 | """ 208 | self.elements.append(self._check_value(elem)) 209 | self._assigned = True 210 | 211 | def extend(self, iterable): 212 | """ 213 | Add multiple elements 214 | 215 | Parameters 216 | ---------- 217 | iterable : iterable of (ItemElement or { str : Any }) 218 | Iterable of elements as element instances or dictionaries of attributes' values 219 | """ 220 | for elem in iterable: 221 | self.append(elem) 222 | 223 | def add(self, value): 224 | """ 225 | Add element(s) by value 226 | 227 | Parameters 228 | ---------- 229 | value : ItemElement or { str : Any } or iterable of (ItemElement or { str : Any }) 230 | New element(s) as element instance(s) or dictionary(-ies) of attributes' values 231 | """ 232 | if isinstance(value, list): 233 | self.extend(value) 234 | else: 235 | self.append(value) 236 | 237 | def clear(self): 238 | """ 239 | Remove all elements 240 | """ 241 | del self.elements[:] 242 | self._assigned = False 243 | 244 | def pop(self, index=-1): 245 | """ 246 | Remove single element 247 | 248 | Parameters 249 | ---------- 250 | index : int 251 | Element index (last by default) 252 | """ 253 | elem = self.elements.pop(index) 254 | if not self.elements: 255 | self._assigned = False 256 | return elem 257 | 258 | def __delitem__(self, index): 259 | self.elements.__delitem__(index) 260 | if not self.elements: 261 | self._assigned = False 262 | 263 | def __getitem__(self, index): 264 | return self.elements[index] 265 | 266 | def __iter__(self): 267 | return iter(self.elements) 268 | 269 | def __len__(self): 270 | return len(self.elements) 271 | 272 | def __setitem__(self, index, elem): 273 | if not isinstance(elem, self.base_element_cls): 274 | raise TypeError("Elements must have type '{}' or descendant type, not '{}'" 275 | .format(self.base_element_cls, elem.__class__)) 276 | self.elements[index] = elem 277 | 278 | def __getattr__(self, name): 279 | if name == 'base_element_cls': 280 | raise AttributeError("'{}' object has no attribute '{}'".format(self.__class__.__name__, name)) 281 | if name not in {attr.pub_name for attr in self.base_element_cls._attrs}: 282 | raise AttributeError("Elements of type '{}' does not have '{}' attribute" 283 | .format(self.base_element_cls, name)) 284 | if not self.elements: 285 | raise AttributeError("Instances of '{}' have not been assigned" 286 | .format(self.base_element_cls.__name__)) 287 | if len(self.elements) > 1: 288 | raise AttributeError("Cannot get attribute: more than one elements have been assigned. " 289 | "Choose element and get its' attribute.") 290 | return getattr(self.elements[0], name) 291 | 292 | def __setattr__(self, name, value): 293 | if name != 'base_element_cls' and name in {attr.pub_name for attr in self.base_element_cls._attrs}: 294 | if len(self.elements) != 1: 295 | raise AttributeError("Cannot set attribute: {} elements have been assigned. " 296 | "Choose element and set its' attribute.".format(len(self.elements))) 297 | setattr(self.elements[0], name, value) 298 | else: 299 | return super(MultipleElements, self).__setattr__(name, value) 300 | 301 | def __repr__(self): 302 | s_match = re.match(r'^[^(]+\((.*?)\)$', super(MultipleElements, self).__repr__()) 303 | s_repr = s_match.group(1) if s_match else '' 304 | base_cls_repr = "base_element_cls={!r}".format(self.base_element_cls) 305 | return "{}({})".format(self.__class__.__name__, ", ".join(filter(None, [base_cls_repr, s_repr]))) 306 | 307 | def get_namespaces(self, assigned_only=True): 308 | namespaces = super(MultipleElements, self).get_namespaces() 309 | for elem in self.elements: 310 | namespaces.update(elem.get_namespaces(assigned_only)) 311 | return namespaces 312 | -------------------------------------------------------------------------------- /scrapy_rss/meta/item.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from copy import deepcopy 4 | import six 5 | from scrapy.item import ItemMeta as BaseItemMeta, Item as BaseItem, MutableMapping 6 | 7 | try: 8 | from scrapy.item import _BaseItemMeta 9 | except ImportError: 10 | _BaseItemMeta = type 11 | 12 | from ..exceptions import * 13 | from .nscomponent import NSComponentName 14 | from .element import ItemElement, MultipleElements 15 | 16 | 17 | class ItemMeta(BaseItemMeta): 18 | def __new__(mcs, cls_name, cls_bases, cls_attrs): 19 | cls_attrs['_elements'] = {} 20 | for cls_base in reversed(cls_bases): 21 | if isinstance(cls_base, ItemMeta): 22 | cls_attrs['_elements'].update(cls_base._elements) 23 | for attr_name, attr_value in cls_base.__dict__.items(): 24 | if isinstance(attr_value, ItemElement): 25 | cls_attrs[attr_name] = attr_value 26 | elements = {NSComponentName(elem_name, elem_descr.ns_prefix, elem_descr.ns_uri): 27 | elem_descr for elem_name, elem_descr in cls_attrs.items() 28 | if isinstance(elem_descr, ItemElement)} 29 | for elem_name, elem in elements.items(): 30 | if not elem.ns_prefix: 31 | elem.ns_prefix = elem_name.ns_prefix 32 | cls_attrs['_elements'].update(elements) 33 | cls_attrs['elements'] = property(lambda self: self._elements) 34 | cls_attrs.update({elem_name.priv_name: elem_descriptor 35 | for elem_name, elem_descriptor in elements.items()}) 36 | cls_attrs.update({elem_name.pub_name: property(mcs.build_elem_getter(elem_name), 37 | mcs.build_elem_setter(elem_name, elem_descr)) 38 | for elem_name, elem_descr in elements.items()}) 39 | return super(ItemMeta, mcs).__new__(mcs, cls_name, cls_bases, cls_attrs) 40 | 41 | @staticmethod 42 | def build_elem_getter(elem_name): 43 | return lambda self: getattr(self, elem_name.priv_name) 44 | 45 | @staticmethod 46 | def build_elem_setter(elem_name, elem_descriptor): 47 | def setter(self, new_value): 48 | if isinstance(elem_descriptor, MultipleElements): 49 | multi_elem = getattr(self, elem_name.priv_name) 50 | multi_elem.clear() 51 | multi_elem.add(new_value) 52 | elif isinstance(new_value, elem_descriptor.__class__): 53 | setattr(self, elem_name.priv_name, new_value) 54 | elif isinstance(new_value, dict): 55 | setattr(self, elem_name.priv_name, elem_descriptor.__class__(**new_value)) 56 | elif not elem_descriptor.required_attrs and elem_descriptor.content_arg: 57 | elem = getattr(self, elem_name.priv_name) 58 | setattr(elem, elem_descriptor.content_arg.pub_name, new_value) 59 | else: 60 | raise InvalidElementValueError(elem_name, elem_descriptor.__class__, new_value) 61 | return setter 62 | 63 | 64 | class FeedItem(six.with_metaclass(ItemMeta, BaseItem)): 65 | """ 66 | Attributes 67 | ---------- 68 | elements : { NSComponentName : ItemElement } 69 | All elements of the item 70 | """ 71 | def __init__(self, *args, **kwargs): 72 | super(FeedItem, self).__init__(*args, **kwargs) 73 | new_elements = {} 74 | for elem_name, elem_descr in self._elements.items(): 75 | new_element = deepcopy(getattr(self, elem_name.priv_name)) 76 | setattr(self, elem_name.priv_name, new_element) 77 | new_elements[elem_name] = new_element 78 | self._elements = new_elements 79 | 80 | def __repr__(self): 81 | return "{}({})".format( 82 | self.__class__.__name__, 83 | ", ".join("{}={!r}".format(elem_name, elem) 84 | for elem_name, elem in self.elements.items())) 85 | 86 | def __setattr__(self, name, value): 87 | if name in self.fields: 88 | raise AttributeError("Use item[{!r}] = {!r} to set field value".format(name, value)) 89 | super(MutableMapping, self).__setattr__(name, value) 90 | 91 | def get_namespaces(self, assigned_only=True): 92 | """ 93 | Get all namespaces of the elements and its attributes 94 | 95 | Parameters 96 | ---------- 97 | assigned_only : bool 98 | Whether to return namespaces of assigned components only 99 | 100 | Returns 101 | ------- 102 | set of (str or None, str or None) 103 | Set of pairs (namespace_prefix, namespace_uri) 104 | """ 105 | namespaces = set() 106 | for elem in self.elements.values(): 107 | if not assigned_only or elem.assigned: 108 | namespaces.update(elem.get_namespaces(assigned_only)) 109 | return namespaces 110 | 111 | 112 | ExtendableItem = FeedItem # Backward compatibility 113 | -------------------------------------------------------------------------------- /scrapy_rss/meta/nscomponent.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import re 4 | 5 | from ..exceptions import NoNamespaceURIError 6 | 7 | 8 | class BaseNSComponent(object): 9 | def __init__(self, ns_prefix=None, ns_uri=None): 10 | """ 11 | Base class for elements, attributes and its names that can be namespaced 12 | 13 | Parameters 14 | ---------- 15 | ns_prefix : str or None 16 | a namespace prefix 17 | ns_uri : str or None 18 | a namespace URI 19 | """ 20 | if ns_prefix and not ns_uri: 21 | raise NoNamespaceURIError("No URI for prefix '{}'".format(ns_prefix)) 22 | self._ns_prefix = ns_prefix or '' 23 | self._ns_uri = ns_uri or '' 24 | 25 | @property 26 | def ns_prefix(self): 27 | """ 28 | Get the namespace prefix 29 | 30 | Returns 31 | ------- 32 | str or None 33 | The namespace prefix 34 | """ 35 | return self._ns_prefix 36 | 37 | @ns_prefix.setter 38 | def ns_prefix(self, ns_prefix): 39 | """ 40 | Set the namespace prefix 41 | 42 | Parameters 43 | ---------- 44 | ns_prefix : str or None 45 | A new namespace prefix 46 | 47 | Raises 48 | ------ 49 | NoNamespaceURIError 50 | If component does not have namespace URI 51 | ValueError 52 | If the namespace prefix is already non-empty 53 | """ 54 | if self._ns_prefix == ns_prefix: 55 | return 56 | if not self._ns_uri: 57 | raise NoNamespaceURIError("Namespace prefix cannot be set when no namespace URI") 58 | if self._ns_prefix: 59 | raise ValueError("Namespace prefix is already non-empty") 60 | self._ns_prefix = ns_prefix 61 | 62 | @property 63 | def ns_uri(self): 64 | """ 65 | Get the namespace URI 66 | 67 | Returns 68 | ------- 69 | str or None 70 | The namespace URI 71 | """ 72 | return self._ns_uri 73 | 74 | @ns_uri.setter 75 | def ns_uri(self, ns_uri): 76 | """ 77 | Set the namespace URI 78 | 79 | Parameters 80 | ---------- 81 | ns_uri : str or None 82 | A new namespace URI 83 | 84 | Raises 85 | ------ 86 | ValueError 87 | If the namespace URI is already non-empty 88 | """ 89 | if self._ns_uri == ns_uri: 90 | return 91 | if self._ns_uri: 92 | raise ValueError("Namespace URI is already non-empty") 93 | self._ns_uri = ns_uri 94 | 95 | def __repr__(self): 96 | return "{}(ns_prefix={!r}, ns_uri={!r})"\ 97 | .format(self.__class__.__name__, self._ns_prefix, self._ns_uri) 98 | 99 | def get_namespaces(self, assigned_only=True): 100 | """ 101 | Get all namespaces of the component and its children 102 | 103 | Parameters 104 | ---------- 105 | assigned_only : bool 106 | whether return namespaces of assigned children only 107 | 108 | Returns 109 | ------- 110 | set of (str or None, str or None) 111 | Set of pairs (namespace_prefix, namespace_uri) 112 | """ 113 | if self._ns_uri: 114 | return {(self._ns_prefix, self._ns_uri)} 115 | return set() 116 | 117 | 118 | class NSComponentName(BaseNSComponent): 119 | def __init__(self, name, ns_prefix=None, ns_uri=None): 120 | """ 121 | Component' name wrapper 122 | 123 | Parameters 124 | ---------- 125 | name : str 126 | the component name that can optionally contain a namespace prefix 127 | using delimiter __ (double underscores) such as ns__name 128 | ns_prefix : str or None 129 | a namespace prefix 130 | ns_uri : str or None 131 | a namespace URI 132 | """ 133 | if '__' in name: 134 | secondary_ns_prefix, name = name.split('__', 1) 135 | else: 136 | secondary_ns_prefix = None 137 | super(NSComponentName, self).__init__(ns_prefix=ns_prefix or secondary_ns_prefix, 138 | ns_uri=ns_uri) 139 | self._name = name 140 | if secondary_ns_prefix: 141 | self._public_fullname = '{}__{}'.format(secondary_ns_prefix, name) 142 | else: 143 | self._public_fullname = name 144 | self._private_fullname = '__{}'.format(self._public_fullname) 145 | 146 | @property 147 | def name(self): 148 | """ 149 | Get main name of the component without namespace information 150 | 151 | Returns 152 | ------- 153 | str 154 | main component name 155 | """ 156 | return self._name 157 | 158 | @property 159 | def xml_name(self): 160 | """ 161 | Get tuple **(namespace_uri, name)** for XML handlers 162 | 163 | Returns 164 | ------- 165 | (str or None, str) 166 | component name in the namespaced SAX format 167 | """ 168 | return self._ns_uri, self._name 169 | 170 | @property 171 | def pub_name(self): 172 | """ 173 | Get component name with namespace URI in the Python public notation format 174 | such as **uri__name** or **name** if no namespace 175 | 176 | Returns 177 | ------- 178 | str 179 | public component name 180 | """ 181 | return self._public_fullname 182 | 183 | @property 184 | def priv_name(self): 185 | """ 186 | Get component name with namespace URI in the Python pseudo-private notation format 187 | such as **__uri__name** or **__name** if no namespace 188 | 189 | Returns 190 | ------- 191 | str 192 | private component name 193 | """ 194 | return self._private_fullname 195 | 196 | def __str__(self): 197 | return self._public_fullname 198 | 199 | def __key(self): 200 | return self._ns_uri, self._name 201 | 202 | def __hash__(self): 203 | return hash(self.__key()) 204 | 205 | def __eq__(self, other): 206 | if isinstance(other, NSComponentName): 207 | return self.__key() == other.__key() 208 | raise NotImplementedError("Cannot compare instances of {} and {}".format(self.__class__, other.__class__)) 209 | 210 | def __repr__(self): 211 | s_match = re.match(r'^[^(]+\((.*?)\)$', super(NSComponentName, self).__repr__()) 212 | s_repr = ", " + s_match.group(1) if s_match else '' 213 | return "{}(name={!r}{})".format(self.__class__.__name__, self._name, s_repr) 214 | -------------------------------------------------------------------------------- /scrapy_rss/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import six 4 | from scrapy import signals 5 | from scrapy.exceptions import NotConfigured, CloseSpider 6 | from scrapy.utils.misc import load_object 7 | 8 | from .items import RssItem 9 | from .exporters import RssItemExporter 10 | 11 | 12 | class RssExportPipeline(object): 13 | def __init__(self): 14 | self.files = {} 15 | self.exporters = {} 16 | 17 | @classmethod 18 | def from_crawler(cls, crawler): 19 | pipeline = cls() 20 | crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) 21 | crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) 22 | return pipeline 23 | 24 | def spider_opened(self, spider): 25 | try: 26 | file = open(spider.settings.get('FEED_FILE'), 'wb') 27 | except TypeError: 28 | raise NotConfigured('FEED_FILE parameter does not string or does not exist') 29 | except (IOError, OSError) as e: 30 | raise CloseSpider('Cannot open file {}: {}'.format(spider.settings.get('FEED_FILE', None), e)) 31 | self.files[spider] = file 32 | feed_title = spider.settings.get('FEED_TITLE') 33 | if not feed_title: 34 | raise NotConfigured('FEED_TITLE parameter does not exist') 35 | feed_link = spider.settings.get('FEED_LINK') 36 | if not feed_link: 37 | raise NotConfigured('FEED_LINK parameter does not exist') 38 | feed_description = spider.settings.get('FEED_DESCRIPTION') 39 | if feed_description is None: 40 | raise NotConfigured('FEED_DESCRIPTION parameter does not exist') 41 | 42 | item_cls = spider.settings.get('FEED_ITEM_CLASS', spider.settings.get('FEED_ITEM_CLS', RssItem)) 43 | if isinstance(item_cls, six.string_types): 44 | item_cls = load_object(item_cls) 45 | 46 | namespaces = spider.settings.get('FEED_NAMESPACES', {}) 47 | 48 | feed_exporter = spider.settings.get('FEED_EXPORTER', RssItemExporter) 49 | if isinstance(feed_exporter, six.string_types): 50 | feed_exporter = load_object(feed_exporter) 51 | if not issubclass(feed_exporter, RssItemExporter): 52 | raise TypeError("FEED_EXPORTER must be RssItemExporter or its subclass, not '{}'".format(feed_exporter)) 53 | self.exporters[spider] = feed_exporter(file, feed_title, feed_link, feed_description, 54 | namespaces=namespaces, item_cls=item_cls) 55 | self.exporters[spider].start_exporting() 56 | 57 | def spider_closed(self, spider): 58 | self.exporters[spider].finish_exporting() 59 | file = self.files.pop(spider) 60 | file.close() 61 | 62 | def process_item(self, item, spider): 63 | self.exporters[spider].export_item(item) 64 | return item 65 | -------------------------------------------------------------------------------- /scrapy_rss/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import locale 4 | 5 | 6 | def format_rfc822(date): 7 | """ 8 | 9 | Parameters 10 | ---------- 11 | date : datetime.datetime 12 | Datetime object 13 | 14 | Returns 15 | ------- 16 | str 17 | Stringified datetime object according to RFC 822 standard 18 | """ 19 | orig_locale = locale.getlocale(locale.LC_TIME) 20 | locale.setlocale(locale.LC_TIME, 'C') 21 | date = date.strftime('%a, %d %b %Y %H:%M:%S %z') 22 | locale.setlocale(locale.LC_TIME, orig_locale) 23 | return date 24 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_rpm] 2 | doc_files = LICENSE README.md 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, join 2 | import sys 3 | from setuptools import setup, find_packages 4 | 5 | 6 | try: 7 | from wheel.bdist_wheel import bdist_wheel as _bdist_wheel 8 | import sys 9 | 10 | class bdist_wheel(_bdist_wheel): 11 | def finalize_options(self): 12 | _bdist_wheel.finalize_options(self) 13 | self.python_tag = 'py%i%i' % (sys.version_info.major, sys.version_info.minor) 14 | 15 | except ImportError: 16 | bdist_wheel = None 17 | 18 | 19 | with open(join(dirname(__file__), 'scrapy_rss/VERSION'), 'rt') as f: 20 | version = f.read().strip() 21 | 22 | 23 | with open(join(dirname(__file__), 'requirements.txt'), 'rt') as f: 24 | install_requires = list(map(str.strip, f)) 25 | 26 | with open(join(dirname(__file__), 'tests', 'requirements.txt'), 'rt') as f: 27 | dev_requires = list(map(str.strip, f)) 28 | 29 | with open('README.rst') as readme: 30 | setup( 31 | name='scrapy-rss', 32 | version=version, 33 | url='https://github.com/woxcab/scrapy_rss', 34 | description='RSS Tools for Scrapy Framework', 35 | long_description=readme.read(), 36 | license='BSD', 37 | packages=find_packages(exclude=('tests',)), 38 | include_package_data=True, 39 | zip_safe=False, 40 | python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*', 41 | classifiers=[ 42 | 'Framework :: Scrapy', 43 | 'Development Status :: 4 - Beta', 44 | 'Intended Audience :: Developers', 45 | 'License :: OSI Approved :: BSD License', 46 | 'Operating System :: OS Independent', 47 | 'Programming Language :: Python', 48 | 'Programming Language :: Python :: 2', 49 | 'Programming Language :: Python :: 2.7', 50 | 'Programming Language :: Python :: 3', 51 | 'Programming Language :: Python :: 3.3', 52 | 'Programming Language :: Python :: 3.4', 53 | 'Programming Language :: Python :: 3.5', 54 | 'Programming Language :: Python :: 3.6', 55 | 'Programming Language :: Python :: 3.7', 56 | 'Programming Language :: Python :: 3.8', 57 | 'Programming Language :: Python :: 3.9', 58 | 'Programming Language :: Python :: 3.10', 59 | 'Programming Language :: Python :: 3.11', 60 | 'Programming Language :: Python :: 3.12', 61 | 'Programming Language :: Python :: 3.13', 62 | 'Topic :: Internet :: WWW/HTTP', 63 | 'Topic :: Software Development :: Libraries :: Python Modules', 64 | ], 65 | install_requires=install_requires, 66 | extras_require={ 67 | 'testing': dev_requires 68 | }, 69 | cmdclass={'bdist_wheel': bdist_wheel}, 70 | ) 71 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /tests/expected_rss/empty_feed.rss: -------------------------------------------------------------------------------- 1 | 2 | Titlehttp://example.com/feedDescription -------------------------------------------------------------------------------- /tests/expected_rss/empty_feed_with_categories.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Title 5 | Description 6 | http://example.com/feed 7 | category 1 8 | category 2 9 | 10 | -------------------------------------------------------------------------------- /tests/expected_rss/empty_feed_without_generator.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Title 5 | Description 6 | http://example.com/feed 7 | 8 | -------------------------------------------------------------------------------- /tests/expected_rss/full_empty_feed.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Title 5 | Description 6 | http://example.com/feed 7 | en-US 8 | Data 9 | m@dot.com (Manager Name) 10 | web@dot.com (Webmaster Name) 11 | Tue, 01 Feb 2000 00:10:30 12 | Tue, 01 Feb 2000 05:10:30 13 | tester 14 | some category 15 | http://example.com/rss_docs 16 | 60 17 | 18 | -------------------------------------------------------------------------------- /tests/expected_rss/item_with_default_nses.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Title 5 | http://example.com/feed 6 | Description 7 | 8 | Title of item with default namespaces 9 | Content value 11ё 10 | 11 | Content value 11 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /tests/expected_rss/item_with_default_nses2.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Title 5 | http://example.com/feed 6 | Description 7 | 8 | Title of item with default namespaces 9 | Content value 11ё 10 | 11 | Content value 11 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /tests/expected_rss/item_with_default_nses3.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Title 5 | http://example.com/feed 6 | Description 7 | 8 | Title of item with default namespaces 9 | Content value 11ё 10 | 11 | Content value 11 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /tests/expected_rss/item_with_enclosure.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Title 5 | http://example.com/feed 6 | Description 7 | 8 | Title of item with enclosure 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /tests/expected_rss/item_with_guid.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Title 5 | http://example.com/feed 6 | Description 7 | 8 | Title of item with guid 9 | Identifier 10 | 11 | 12 | -------------------------------------------------------------------------------- /tests/expected_rss/item_with_multiple_categories.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Title 5 | http://example.com/feed 6 | Description 7 | 8 | Category 2 9 | Category 1 10 | Title of item with multiple categories 11 | 12 | 13 | -------------------------------------------------------------------------------- /tests/expected_rss/item_with_non_unique_ns.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Title 5 | http://example.com/feed 6 | Description 7 | 13 | Title of item with unique namespaces 14 | 15 | 19 | 20 | yet another value3_21 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /tests/expected_rss/item_with_non_unique_ns2.rss: -------------------------------------------------------------------------------- 1 | 2 | 9 | 10 | Title 11 | http://example.com/feed 12 | Description 13 | 14 | Title of item with unique namespaces 15 | 16 | 20 | 21 | yet another value3_21 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /tests/expected_rss/item_with_non_unique_ns3.rss: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | Title 10 | http://example.com/feed 11 | Description 12 | 13 | Title of item with unique namespaces 14 | 15 | 19 | 20 | yet another value3_21 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /tests/expected_rss/item_with_non_unique_ns4.rss: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | Title 10 | http://example.com/feed 11 | Description 12 | 13 | Title of item with unique namespaces 2 14 | 15 | 18 | 19 | value 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /tests/expected_rss/item_with_non_unique_ns5.rss: -------------------------------------------------------------------------------- 1 | 2 | 9 | 10 | Title 11 | http://example.com/feed 12 | Description 13 | 14 | Title of item with unique namespaces 15 | 16 | 20 | 21 | yet another value3_21 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /tests/expected_rss/item_with_same_ns_prefixes.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Title 5 | http://example.com/feed 6 | Description 7 | 8 | Title of item with same namespace prefixes 9 | Content value 11ё 10 | 11 | Content value 11 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /tests/expected_rss/item_with_same_ns_prefixes2.rss: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | Title 8 | http://example.com/feed 9 | Description 10 | 11 | Title of item with same namespace prefixes 12 | Content value 11ё 13 | 14 | Content value 11 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /tests/expected_rss/item_with_same_ns_prefixes3.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Title 5 | http://example.com/feed 6 | Description 7 | 8 | Title of item with same namespace prefixes 9 | Content value 11ё 10 | 11 | Content value 11 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /tests/expected_rss/item_with_single_category.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Title 5 | http://example.com/feed 6 | Description 7 | 8 | Title of item with single category 9 | Category 1 10 | 11 | 12 | -------------------------------------------------------------------------------- /tests/expected_rss/item_with_unicode.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Title 5 | http://example.com/feed 6 | Description 7 | 8 | Title of item with unicode and special characters 9 | [Testing «ταБЬℓσ»: 1<2 & 4+1>3, now 20% off!] 10 | 11 | 12 | -------------------------------------------------------------------------------- /tests/expected_rss/item_with_unique_ns.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Title 5 | http://example.com/feed 6 | Description 7 | 15 | Title of item with unique namespaces 16 | 17 | 20 | 21 | value3_21 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /tests/expected_rss/item_with_unique_ns2.rss: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | Title 10 | http://example.com/feed 11 | Description 12 | 15 | Title of item with unique namespaces 16 | 17 | 20 | 21 | value3_21 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /tests/expected_rss/item_with_unique_ns3.rss: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | Title 13 | http://example.com/feed 14 | Description 15 | 16 | Title of item with unique namespaces 17 | 18 | 21 | 22 | value3_21 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /tests/expected_rss/minimal_item.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Title 5 | http://example.com/feed 6 | Description 7 | 8 | Title of minimal item 9 | 10 | 11 | -------------------------------------------------------------------------------- /tests/expected_rss/minimal_item2.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Title 5 | http://example.com/feed 6 | Description 7 | 8 | Description of minimal item 9 | 10 | 11 | -------------------------------------------------------------------------------- /tests/expected_rss/simple_item.rss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Title 5 | http://example.com/feed 6 | Description 7 | 8 | Title of simple item 9 | Description of simple item 10 | 11 | 12 | -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | tox 2 | pytest 3 | pytest-xdist 4 | pytest-cov<2.11 5 | coverage<5 6 | xmlunittest 7 | parameterized 8 | enum34;python_version<"3.4" 9 | backports.tempfile;python_version<"3.2" 10 | -------------------------------------------------------------------------------- /tests/test_feed_item.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import unittest 4 | from parameterized import parameterized 5 | import scrapy 6 | import six 7 | from scrapy_rss import FeedItem, RssItem, RssedItem 8 | from tests.utils import RssTestCase 9 | 10 | 11 | class TestFeedItem(RssTestCase): 12 | class MyItem1(FeedItem): 13 | def __init__(self, **kwargs): 14 | super(self.__class__, self).__init__(**kwargs) 15 | self.rss = RssItem() 16 | 17 | class MyItem2(FeedItem): 18 | field = scrapy.Field() 19 | field2 = scrapy.Field() 20 | 21 | def __init__(self, **kwargs): 22 | super(self.__class__, self).__init__(**kwargs) 23 | self.rss = RssItem() 24 | 25 | class MyItem3(RssedItem): 26 | pass 27 | 28 | class MyItem4(RssedItem): 29 | field = scrapy.Field() 30 | field2 = scrapy.Field() 31 | 32 | def test_extendable_item(self): 33 | from scrapy_rss import ExtendableItem as ExtendableItem1 34 | from scrapy_rss.meta import ExtendableItem as ExtendableItem2 35 | from scrapy_rss.meta.item import ExtendableItem as ExtendableItem3 36 | 37 | self.assertIs(ExtendableItem1, FeedItem) 38 | self.assertIs(ExtendableItem2, FeedItem) 39 | self.assertIs(ExtendableItem3, FeedItem) 40 | 41 | def test_field_init(self): 42 | data = {'field': 'value1', 'field2': 2} 43 | for item_cls in (self.MyItem2, self.MyItem4): 44 | item = item_cls(**data) 45 | for key, value in data.items(): 46 | self.assertEqual(item[key], value) 47 | 48 | def test_dict_init(self): 49 | d = {'field': 'value1', 'field2': 2} 50 | item = self.MyItem4(d) 51 | for key, value in d.items(): 52 | self.assertEqual(item[key], value) 53 | 54 | @parameterized.expand([ 55 | ('bad_key', 1), 56 | ('elements', 'nothing') 57 | ]) 58 | def test_bad_dict_init(self, key, value): 59 | for item_cls in (self.MyItem1, self.MyItem2, self.MyItem3, self.MyItem4): 60 | with six.assertRaisesRegex(self, KeyError, r'does not support field:'): 61 | item_cls(**{key: value}) 62 | 63 | def test_field_setter(self): 64 | for item in (self.MyItem2(), self.MyItem4()): 65 | item['field'] = 'value' 66 | item.rss = None 67 | item.new_attr = 'OK' 68 | 69 | def test_bad_field_setter(self): 70 | for item in (self.MyItem2(), self.MyItem4()): 71 | with six.assertRaisesRegex(self, AttributeError, r'Use item\[[^\]]+\] = .*? to set field value', 72 | msg="Allowed assignment to Scrapy fields [{} class]" 73 | .format(item.__class__.__name__)): 74 | item.field = None 75 | with six.assertRaisesRegex(self, KeyError, r'does not support field:'): 76 | item['unknown_field'] = 'Bad' 77 | 78 | def test_field_getter(self): 79 | for item in (self.MyItem2(), self.MyItem4()): 80 | with six.assertRaisesRegex(self, AttributeError, r'Use item\[[^\]]+\] to get field value'): 81 | _ = item.field 82 | _ = item.rss 83 | 84 | item.new_attr = None 85 | _ = item.new_attr 86 | 87 | def test_uniqueness(self): 88 | for item_cls in (self.MyItem1, self.MyItem2, self.MyItem3, self.MyItem4): 89 | item11 = item_cls() 90 | item12 = item_cls() 91 | self.assertNotEqual(id(item11.rss), id(item12.rss), 92 | msg='[{} class]'.format(item_cls.__name__)) 93 | 94 | def test_inheritance(self): 95 | class Derived1(FeedItem): 96 | pass 97 | 98 | Derived1() 99 | 100 | class Derived2(FeedItem): 101 | def __init__(self, **kwargs): 102 | super(Derived2, self).__init__(**kwargs) 103 | 104 | Derived2() 105 | 106 | class Derived3(RssedItem): 107 | pass 108 | 109 | Derived3() 110 | 111 | class Derived4(RssedItem): 112 | def __init__(self, **kwargs): 113 | super(Derived4, self).__init__(**kwargs) 114 | 115 | Derived4() 116 | 117 | 118 | if __name__ == '__main__': 119 | unittest.main() 120 | -------------------------------------------------------------------------------- /tests/test_import.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import unittest 4 | 5 | 6 | class TestImport(unittest.TestCase): 7 | def test_module_level(self): 8 | from scrapy_rss import RssItem 9 | from scrapy_rss import FeedItem 10 | from scrapy_rss import ExtendableItem 11 | from scrapy_rss import RssedItem 12 | import scrapy_rss 13 | 14 | def test_elements(self): 15 | from scrapy_rss.elements import TitleElement 16 | from scrapy_rss.elements import LinkElement 17 | from scrapy_rss.elements import DescriptionElement 18 | from scrapy_rss.elements import AuthorElement 19 | from scrapy_rss.elements import CategoryElement 20 | from scrapy_rss.elements import CommentsElement 21 | from scrapy_rss.elements import EnclosureElement 22 | from scrapy_rss.elements import GuidElement 23 | from scrapy_rss.elements import PubDateElement 24 | from scrapy_rss.elements import SourceElement 25 | 26 | def test_exporters(self): 27 | from scrapy_rss.exporters import RssItemExporter 28 | 29 | def test_items(self): 30 | from scrapy_rss.items import RssItem 31 | from scrapy_rss.items import FeedItem 32 | from scrapy_rss.items import ExtendableItem 33 | from scrapy_rss.items import RssedItem 34 | 35 | def test_meta(self): 36 | from scrapy_rss.meta import BaseNSComponent 37 | from scrapy_rss.meta import NSComponentName 38 | from scrapy_rss.meta import ItemElementAttribute 39 | from scrapy_rss.meta import ItemElementMeta 40 | from scrapy_rss.meta import ItemElement 41 | from scrapy_rss.meta import MultipleElements 42 | from scrapy_rss.meta import ItemMeta 43 | from scrapy_rss.meta import FeedItem 44 | from scrapy_rss.meta import ExtendableItem 45 | 46 | def test_pipelines(self): 47 | from scrapy_rss.pipelines import RssExportPipeline 48 | 49 | def test_utils(self): 50 | from scrapy_rss.utils import format_rfc822 51 | 52 | 53 | if __name__ == '__main__': 54 | unittest.main() 55 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from collections import Counter 4 | import itertools 5 | 6 | from unittest.util import safe_repr 7 | import difflib 8 | import re 9 | from os.path import commonprefix 10 | import sys 11 | import pprint 12 | import six 13 | 14 | from twisted.python.failure import Failure 15 | from scrapy.pipelines import ItemPipelineManager 16 | from lxml import etree 17 | from xmlunittest import XmlTestCase 18 | 19 | from scrapy_rss.meta import ItemElement, MultipleElements 20 | from scrapy_rss.items import RssItem 21 | 22 | try: 23 | from collections.abc import Mapping 24 | except ImportError: 25 | from collections import Mapping 26 | 27 | try: 28 | from unittest.util import _common_shorten_repr, _shorten 29 | except ImportError: 30 | _MAX_LENGTH = 80 31 | _PLACEHOLDER_LEN = 12 32 | _MIN_BEGIN_LEN = 5 33 | _MIN_END_LEN = 5 34 | _MIN_COMMON_LEN = 5 35 | _MIN_DIFF_LEN = _MAX_LENGTH - \ 36 | (_MIN_BEGIN_LEN + _PLACEHOLDER_LEN + _MIN_COMMON_LEN + 37 | _PLACEHOLDER_LEN + _MIN_END_LEN) 38 | assert _MIN_DIFF_LEN >= 0 39 | 40 | 41 | def _shorten(s, prefixlen, suffixlen): 42 | skip = len(s) - prefixlen - suffixlen 43 | if skip > _PLACEHOLDER_LEN: 44 | s = '%s[%d chars]%s' % (s[:prefixlen], skip, s[len(s) - suffixlen:]) 45 | return s 46 | 47 | 48 | def _common_shorten_repr(*args): 49 | args = tuple(map(safe_repr, args)) 50 | maxlen = max(map(len, args)) 51 | if maxlen <= _MAX_LENGTH: 52 | return args 53 | 54 | prefix = commonprefix(args) 55 | prefixlen = len(prefix) 56 | 57 | common_len = _MAX_LENGTH - \ 58 | (maxlen - prefixlen + _MIN_BEGIN_LEN + _PLACEHOLDER_LEN) 59 | if common_len > _MIN_COMMON_LEN: 60 | assert _MIN_BEGIN_LEN + _PLACEHOLDER_LEN + _MIN_COMMON_LEN + \ 61 | (maxlen - prefixlen) < _MAX_LENGTH 62 | prefix = _shorten(prefix, _MIN_BEGIN_LEN, common_len) 63 | return tuple(prefix + s[prefixlen:] for s in args) 64 | 65 | prefix = _shorten(prefix, _MIN_BEGIN_LEN, _MIN_COMMON_LEN) 66 | return tuple(prefix + _shorten(s[prefixlen:], _MIN_DIFF_LEN, _MIN_END_LEN) 67 | for s in args) 68 | 69 | 70 | def get_dict_attr(obj,attr): 71 | for obj in [obj]+obj.__class__.mro(): 72 | if attr in obj.__dict__: 73 | return obj.__dict__[attr] 74 | raise AttributeError 75 | 76 | 77 | iteritems = getattr(dict, 'iteritems', dict.items) # py2-3 compatibility 78 | 79 | 80 | class FrozenDict(Mapping): 81 | """ 82 | A simple immutable wrapper around dictionaries. 83 | It can be used as a drop-in replacement for dictionaries where immutability is desired. 84 | """ 85 | 86 | dict_cls = dict 87 | 88 | def __init__(self, *args, **kwargs): 89 | self._dict = self.dict_cls(*args, **kwargs) 90 | self._hash = None 91 | 92 | def __getitem__(self, key): 93 | return self._dict[key] 94 | 95 | def __contains__(self, key): 96 | return key in self._dict 97 | 98 | def copy(self, **add_or_replace): 99 | return self.__class__(self, **add_or_replace) 100 | 101 | def __iter__(self): 102 | return iter(self._dict) 103 | 104 | def __len__(self): 105 | return len(self._dict) 106 | 107 | def __repr__(self): 108 | return '<%s %r>' % (self.__class__.__name__, self._dict) 109 | 110 | def __hash__(self): 111 | if self._hash is None: 112 | h = 0 113 | for key, value in iteritems(self._dict): 114 | h ^= hash((key, value)) 115 | self._hash = h 116 | return self._hash 117 | 118 | 119 | class RaisedItemPipelineManager(ItemPipelineManager): 120 | def process_item(self, item, spider): 121 | d = super(RaisedItemPipelineManager, self).process_item(item, spider) 122 | if isinstance(d.result, Failure): 123 | failure = d.result 124 | d.addErrback(lambda failure: None) # workaround for Python 2.* 125 | print(failure.getTraceback()) 126 | failure.raiseException() 127 | return d 128 | 129 | 130 | class UnorderedXmlTestCase(XmlTestCase): 131 | """ 132 | Expand XmlTestCase functionality with unordered XML equivalence testing. 133 | """ 134 | 135 | @classmethod 136 | def _xml_to_tuple(cls, element): 137 | return (element.tag, 138 | FrozenDict(element.nsmap), 139 | FrozenDict(element.attrib), 140 | FrozenDict(Counter(t for t in element.itertext() if t.strip())), 141 | frozenset(cls._xml_to_tuple(child) for child in element.getchildren())) 142 | 143 | @staticmethod 144 | def _str_to_bytes(data): 145 | if isinstance(data, str): 146 | return data.encode(encoding='utf-8') 147 | if not isinstance(data, bytes): 148 | raise ValueError("Passing data must be string or bytes array") 149 | return data 150 | 151 | def _getAssertEqualityFunc(self, first, second): 152 | for (first_cls, second_cls) in ((first.__class__, second.__class__), 153 | (first.__class__, None), 154 | (None, second.__class__)): 155 | if (first_cls, second_cls) in self._type_equality_funcs: 156 | asserter = self._type_equality_funcs[(first_cls, second_cls)] 157 | if asserter is not None: 158 | if isinstance(asserter, str): 159 | asserter = getattr(self, asserter) 160 | return asserter 161 | return super(UnorderedXmlTestCase, self)._getAssertEqualityFunc(first, second) 162 | 163 | def assertUnorderedXmlEquivalentOutputs(self, data, expected, excepted_elements = ('lastBuildDate', 'generator')): 164 | """ 165 | Children and text subnodes of each element in XML are considered as unordered set. 166 | Therefore if two XML files has different order of same elements then these XMLs are same. 167 | """ 168 | if not excepted_elements: 169 | excepted_elements = () 170 | if isinstance(excepted_elements, six.string_types): 171 | excepted_elements = (excepted_elements,) 172 | 173 | data = data if isinstance(data, etree._Element) \ 174 | else etree.fromstring(self._str_to_bytes(data)) 175 | for excepted_element in excepted_elements: 176 | for element in data.xpath('//{}'.format(excepted_element)): 177 | element.getparent().remove(element) 178 | data_tuple = self._xml_to_tuple(data) 179 | 180 | expected = expected if isinstance(expected, etree._Element) \ 181 | else etree.fromstring(self._str_to_bytes(expected)) 182 | for excepted_element in excepted_elements: 183 | for element in expected.xpath('//{}'.format(excepted_element)): 184 | element.getparent().remove(element) 185 | expected_tuple = self._xml_to_tuple(expected) 186 | 187 | if data_tuple != expected_tuple: 188 | self.fail( 189 | 'Feeds are not equivalent accurate within ordering ' 190 | '(taking into consideration excepted nodes {excepted_elements}):\n' 191 | 'Given: {given}\n' 192 | 'Expected: {expected}' 193 | .format(excepted_elements=excepted_elements, 194 | given=etree.tostring(data), expected=etree.tostring(expected))) 195 | 196 | def assertXmlDocument(self, data): 197 | data = self._str_to_bytes(data) 198 | return super(UnorderedXmlTestCase, self).assertXmlDocument(data) 199 | 200 | def assertXmlEquivalentOutputs(self, data, expected): 201 | data = self._str_to_bytes(data) 202 | expected = self._str_to_bytes(expected) 203 | return super(UnorderedXmlTestCase, self).assertXmlEquivalentOutputs(data, expected) 204 | 205 | def assertSequenceEqual(self, seq1, seq2, msg=None, seq_type=None): 206 | """An equality assertion for ordered sequences (like lists and tuples). 207 | 208 | For the purposes of this function, a valid ordered sequence type is one 209 | which can be indexed, has a length, and has an equality operator. 210 | 211 | Args: 212 | seq1: The first sequence to compare. 213 | seq2: The second sequence to compare. 214 | seq_type: The expected datatype of the sequences, or None if no 215 | datatype should be enforced. 216 | msg: Optional message to use on failure instead of a list of 217 | differences. 218 | """ 219 | if seq_type is not None: 220 | seq_type_name = seq_type.__name__ 221 | if not isinstance(seq1, seq_type): 222 | raise self.failureException('First sequence is not a %s: %s' 223 | % (seq_type_name, safe_repr(seq1))) 224 | if not isinstance(seq2, seq_type): 225 | raise self.failureException('Second sequence is not a %s: %s' 226 | % (seq_type_name, safe_repr(seq2))) 227 | else: 228 | seq_type_name = "sequence" 229 | 230 | differing = None 231 | try: 232 | len1 = len(seq1) 233 | except (TypeError, NotImplementedError): 234 | differing = 'First %s has no length. Non-sequence?' % ( 235 | seq_type_name) 236 | 237 | if differing is None: 238 | try: 239 | len2 = len(seq2) 240 | except (TypeError, NotImplementedError): 241 | differing = 'Second %s has no length. Non-sequence?' % ( 242 | seq_type_name) 243 | 244 | if differing is None: 245 | if len1 > len2: 246 | differing = ('\nFirst %s contains %d additional ' 247 | 'elements.\n' % (seq_type_name, len1 - len2)) 248 | try: 249 | differing += ('First extra element %d:\n%s\n' % 250 | (len2, seq1[len2])) 251 | except (TypeError, IndexError, NotImplementedError): 252 | differing += ('Unable to index element %d ' 253 | 'of first %s\n' % (len2, seq_type_name)) 254 | elif len1 < len2: 255 | differing = ('\nSecond %s contains %d additional ' 256 | 'elements.\n' % (seq_type_name, len2 - len1)) 257 | try: 258 | differing += ('First extra element %d:\n%s\n' % 259 | (len1, seq2[len1])) 260 | except (TypeError, IndexError, NotImplementedError): 261 | differing += ('Unable to index element %d ' 262 | 'of second %s\n' % (len1, seq_type_name)) 263 | 264 | tmp_differing = '' 265 | for ind, (item1, item2) in enumerate(zip(seq1, seq2)): 266 | try: 267 | self.assertEqual(item1, item2) 268 | except AssertionError as e: 269 | tmp_differing += ('\nFirst differing element %d: %s\n' % 270 | (ind, e.args[0])) 271 | break 272 | if tmp_differing: 273 | differing = '%ss differ: %s != %s\n%s' % ( 274 | (seq_type_name.capitalize(),) + 275 | _common_shorten_repr(seq1, seq2) + (tmp_differing,)) 276 | else: 277 | return 278 | 279 | standardMsg = differing 280 | diffMsg = '\n' + '\n'.join( 281 | difflib.ndiff(pprint.pformat(seq1).splitlines(), 282 | pprint.pformat(seq2).splitlines())) 283 | 284 | standardMsg = self._truncateMessage(standardMsg, diffMsg) 285 | msg = self._formatMessage(msg, standardMsg) 286 | self.fail(msg) 287 | 288 | 289 | class RssTestCase(UnorderedXmlTestCase): 290 | def __init__(self, *args, **kwargs): 291 | super(RssTestCase, self).__init__(*args, **kwargs) 292 | for elem in RssItem().elements.values(): 293 | if isinstance(elem, MultipleElements): 294 | self.addTypeEqualityFunc((elem.__class__, None), self.assertMultipleRssElementsEqualsToValues) 295 | else: 296 | self.addTypeEqualityFunc((elem.__class__, None), self.assertRssElementEqualsToValue) 297 | 298 | def assertRssElementEqualsToValue(self, element, value, msg=None): 299 | if isinstance(value, ItemElement): 300 | raise NotImplemented 301 | self.assertEqual(getattr(element, str(element.content_arg)), value, msg) 302 | 303 | def assertMultipleRssElementsEqualsToValues(self, multiple_element, values, msg=None): 304 | if isinstance(values, ItemElement): 305 | raise NotImplemented 306 | if len(multiple_element) == 1: 307 | self.assertRssElementEqualsToValue(multiple_element, values, msg) 308 | else: 309 | self.assertSequenceEqual([getattr(elem, str(elem.content_arg)) for elem in multiple_element], values, msg) 310 | 311 | 312 | -------------------------------------------------------------------------------- /tox-in-docker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Script accepts the same arguments as tox command. 5 | This runs tox command in the specific docker containers 6 | that's identified by environment list. 7 | 8 | Script logs docker and tests standard output 9 | to the logs/docker.log and logs/pytest.log files respectively. 10 | 11 | Examples: 12 | tox-in-docker.py --help 13 | tox-in-docker.py 14 | tox-in-docker.py --recreate 15 | tox-in-docker.py -f py310 -f py39 16 | tox-in-docker.py -f scrapy2.10.0 17 | tox-in-docker.py -e py38-scrapy260 18 | tox-in-docker.py -e py38-scrapy260,py310-scrapy290 19 | """ 20 | 21 | import os 22 | import sys 23 | import re 24 | import subprocess 25 | from collections import defaultdict 26 | from itertools import chain 27 | import tox 28 | from tox.tox_env.python.api import PY_FACTORS_RE 29 | from tox.run import setup_state 30 | 31 | 32 | class UnknownEnvlist(ValueError): 33 | pass 34 | 35 | class UnknownFactor(ValueError): 36 | pass 37 | 38 | 39 | class DuplicateOutput: 40 | ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') 41 | 42 | def __init__(self, file): 43 | self.file = file 44 | 45 | def write(self, text): 46 | print(text, end='\r') 47 | sys.stdout.flush() 48 | self.file.write(self.ansi_escape.sub('', text)) 49 | self.file.flush() 50 | 51 | 52 | def main(docker_logfile, pytest_logfile): 53 | deprecated_pythons = {'py27', 'py33', 'py34', 'py35', 'py36'} 54 | upcoming_python = 'py314' 55 | nonparallel_pythons = {'py33', 'py34'} 56 | nonstandard_pythons = deprecated_pythons | {upcoming_python} 57 | pyfactor2container = lambda pyfactor: pyfactor if pyfactor in nonstandard_pythons else 'py3' 58 | 59 | tox_config = setup_state(sys.argv[1:]) 60 | default_tox_config = setup_state([]) 61 | default_tox_envs = set(default_tox_config.envs.iter()) 62 | argv = iter(sys.argv[1:]) 63 | filtered_argv = [] 64 | for arg in argv: 65 | if arg not in {'-e', '-f'}: 66 | filtered_argv.append(arg) 67 | else: 68 | try: 69 | next(argv) 70 | except StopIteration: 71 | pass 72 | 73 | containers = defaultdict(list) 74 | envs = set(tox_config.envs.iter()) 75 | for testenv in envs: 76 | pyfactor = testenv.split('-', 1)[0] 77 | if not PY_FACTORS_RE.match(pyfactor): 78 | raise UnknownFactor(pyfactor) 79 | containers[pyfactor2container(pyfactor)].append(testenv) 80 | 81 | unknown_envlists = envs - default_tox_envs 82 | unknown_envlists = {e for e in unknown_envlists if not e.startswith(upcoming_python)} 83 | if unknown_envlists: 84 | raise UnknownEnvlist('Environment lists ' + ', '.join(unknown_envlists) 85 | + ' are not defined in the Tox configuration file') 86 | sysenv = os.environ.copy() 87 | sysenv["USERID"], sysenv["GROUPID"] = str(os.getuid()), str(os.getgid()) 88 | 89 | summary = [] 90 | summary_title = None 91 | congratulations_line = None 92 | failed = False 93 | return_code = 0 94 | 95 | pytest_logger = DuplicateOutput(pytest_logfile) 96 | 97 | for container, envlist in containers.items(): 98 | up = subprocess.run(['docker-compose', 'up', '--build', container], 99 | env=sysenv, 100 | stdout=docker_logfile, text=True, bufsize=1) 101 | if up.returncode: 102 | continue 103 | 104 | specialargs = [] 105 | if container in deprecated_pythons: 106 | specialargs.append('--sitepackages') 107 | if container not in nonparallel_pythons: 108 | specialargs.extend(['-p', 'auto']) 109 | with subprocess.Popen(['docker-compose', 'run', container, 'tox', 110 | *specialargs, *filtered_argv, '-e', ','.join(envlist)], 111 | env=sysenv, 112 | stdout=subprocess.PIPE, 113 | text=True, 114 | bufsize=1) as container_process: 115 | summary_reached = False 116 | while container_process.poll() is None: 117 | line = container_process.stdout.readline().lstrip() 118 | if summary_reached: 119 | if 'congratulations' in line: 120 | congratulations_line = line 121 | else: 122 | if re.search(r'(?:error|fail(?:ure|ed)?)\b', line, flags=re.I): 123 | failed = True 124 | summary.append(line) 125 | elif re.search(r'(__ summary __|\.pkg.*_exit)', line): 126 | summary_title = '___________________________________ summary ____________________________________\n' 127 | summary_reached = True 128 | if ' summary ' not in line: 129 | pytest_logger.write(line) 130 | else: 131 | pytest_logger.write(line) 132 | if container_process.returncode: 133 | return_code = container_process.returncode 134 | 135 | if summary_title: 136 | pytest_logger.write(summary_title) 137 | for summary_line in summary: 138 | pytest_logger.write(summary_line) 139 | if not failed and congratulations_line: 140 | pytest_logger.write(congratulations_line) 141 | 142 | if containers: 143 | subprocess.run(['docker-compose', 'down'], 144 | env=sysenv, 145 | stdout=docker_logfile, text=True, bufsize=1) 146 | 147 | exit(return_code) 148 | 149 | 150 | if __name__ == '__main__': 151 | with open(os.path.join('logs', 'docker.log'), 'wt') as docker_log, \ 152 | open(os.path.join('logs', 'pytest.log'), 'wt') as pytest_log: 153 | main(docker_log, pytest_log) 154 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = 3 | py33-scrapy{110,113,120,122,130,133,140} 4 | py34-scrapy{110,113,120,122,130,133,140,150,152,160,170,174} 5 | py{27,35}-scrapy{110,113,120,122,130} 6 | py27-scrapy{131,133,140,150,152,160,170,174,180,184} 7 | py{35,36,37,38,39}-scrapy{131,133,140,150,152,160,170,174,180,184,201,210,220,221,230} 8 | py310-scrapy{171,174,180,184,201,210,220,221,230} 9 | py{36,37,38,39,310,311,312}-scrapy{240,241,250,251,261,263} 10 | py{37,38,39,310,311,312}-scrapy{270,271,280,290} 11 | py38-scrapy{2.10.0,2.10.1,2.11.0,2.11.2} 12 | py{39,310,311,312}-scrapy{2.10.0,2.10.1,2.11.0,2.11.2,2.12.0} 13 | py{313}-scrapy{2.12.0} 14 | 15 | 16 | [testenv] 17 | deps = 18 | scrapy110: scrapy==1.1.0 19 | scrapy113: scrapy==1.1.3 20 | scrapy120: scrapy==1.2.0 21 | scrapy122: scrapy==1.2.2 22 | scrapy130: scrapy==1.3.0 23 | scrapy131: scrapy==1.3.1 24 | scrapy133: scrapy==1.3.3 25 | scrapy140: scrapy==1.4.0 26 | scrapy150: scrapy==1.5.0 27 | scrapy152: scrapy==1.5.2 28 | scrapy160: scrapy==1.6.0 29 | scrapy170: scrapy==1.7.0 30 | scrapy171: scrapy==1.7.1 31 | scrapy174: scrapy==1.7.4 32 | scrapy180: scrapy==1.8.0 33 | scrapy184: scrapy==1.8.4 34 | scrapy201: scrapy==2.0.1 35 | scrapy210: scrapy==2.1.0 36 | scrapy220: scrapy==2.2.0 37 | scrapy221: scrapy==2.2.1 38 | scrapy230: scrapy==2.3.0 39 | scrapy240: scrapy==2.4.0 40 | scrapy241: scrapy==2.4.1 41 | scrapy250: scrapy==2.5.0 42 | scrapy251: scrapy==2.5.1 43 | scrapy261: scrapy==2.6.1 44 | scrapy263: scrapy==2.6.3 45 | scrapy270: scrapy==2.7.0 46 | scrapy271: scrapy==2.7.1 47 | scrapy280: scrapy==2.8.0 48 | scrapy290: scrapy==2.9.0 49 | scrapy2.10.0: scrapy==2.10.0 50 | scrapy2.10.1: scrapy==2.10.1 51 | scrapy2.11.0: scrapy==2.11.0 52 | scrapy2.11.2: scrapy==2.11.2 53 | scrapy2.12.0: scrapy==2.12.0 54 | 55 | extras = 56 | testing 57 | parallel_show_output=true 58 | setenv = 59 | LANG=en_US.UTF-8 60 | LANGUAGE=en_US:en 61 | LC_ALL=en_US.UTF-8 62 | COVERAGE_FILE={envdir}/.coverage.{envname} 63 | allowlist_externals = 64 | /bin/cp 65 | /usr/bin/cp 66 | commands = 67 | {envpython} -m pytest {posargs:tests} 68 | /bin/cp {env:COVERAGE_FILE} .cov/ 69 | 70 | 71 | [gh-actions] 72 | python = 73 | 2.7: py27 74 | 3.5: py35 75 | 3.6: py36 76 | 3.7: py37 77 | 3.8: py38 78 | 3.9: py39 79 | 3.10: py310 80 | 3.11: py311 81 | 3.12: py312 82 | 3.13: py313 83 | 84 | [testenv:coverase] 85 | deps = 86 | coverage<5 87 | skip_install=true 88 | allowlist_externals = 89 | /bin/bash 90 | /usr/bin/bash 91 | commands = 92 | /bin/bash -c 'rm -rf .cov/*' 93 | /bin/bash -c 'rm -f "{toxworkdir}/py"*/.coverage*' 94 | {envpython} -m coverage erase 95 | 96 | 97 | [testenv:cov] 98 | deps = 99 | coverage<5 100 | skip_install=true 101 | setenv = 102 | COVERAGE_FILE=.coverage 103 | allowlist_externals = 104 | /bin/bash 105 | /usr/bin/bash 106 | commands= 107 | /bin/bash -c 'cp "{toxworkdir}/py"*/.coverage* .cov/' 108 | /bin/bash -c '{envpython} -m coverage combine .cov/.coverage*' 109 | {envpython} -m coverage html 110 | --------------------------------------------------------------------------------