├── tests
    ├── __init__.py
    ├── image
    │   ├── __init__.py
    │   ├── test_tool.py
    │   ├── test_upload.py
    │   ├── test_check.py
    │   ├── conftest.py
    │   ├── utils.py
    │   ├── test_test.py
    │   ├── test_init.py
    │   ├── test_run.py
    │   ├── test_utils.py
    │   ├── test_build.py
    │   └── test_list.py
    ├── samples
    │   ├── deploy_egg_sample_project
    │   │   ├── test_project
    │   │   │   └── __init__.py
    │   │   └── setup.py
    │   ├── migrate-eggs.zip
    │   ├── migrate-eggs-no-eggs.zip
    │   ├── deploy_egg_sample_project.zip
    │   ├── deploy_egg_sample_repo.git.zip
    │   ├── custom-images-examples-master.zip
    │   └── deploy_reqs_sample_project
    │   │   ├── other-egg-0.2.1.zip
    │   │   └── inflect-0.2.5.tar.gz
    ├── requirements.txt
    ├── conftest.py
    ├── test_fetch_eggs.py
    ├── test_logout.py
    ├── test_end_to_end.py
    ├── test_deploy_reqs.py
    ├── utils.py
    ├── test_jobresource.py
    ├── test_cancel.py
    ├── test_login.py
    ├── test_bootstrap.py
    ├── test_deploy_egg.py
    ├── test_schedule.py
    └── test_migrate_eggs.py
├── freeze
    ├── tests
    │   ├── __init__.py
    │   ├── testproject
    │   │   ├── testproject
    │   │   │   ├── __init__.py
    │   │   │   ├── items.py
    │   │   │   ├── pipelines.py
    │   │   │   ├── settings.py
    │   │   │   └── spiders
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── example.py
    │   │   ├── scrapinghub.yml
    │   │   └── scrapy.cfg
    │   ├── run.py
    │   └── fakeserver.py
    ├── spider-down.ico
    └── hooks
    │   ├── runtime-hooks.py
    │   ├── hook-scrapinghub.py
    │   └── hook-shub.py
├── docs
    ├── changes.rst
    ├── requirements.txt
    ├── _static
    │   └── theme_overrides.css
    ├── index.rst
    ├── quickstart.rst
    ├── scheduling.rst
    ├── deploying.rst
    └── Makefile
├── setup.cfg
├── shub
    ├── version.py
    ├── __init__.py
    ├── __main__.py
    ├── image
    │   ├── __init__.py
    │   ├── check.py
    │   ├── run
    │   │   ├── wrapper.py
    │   │   └── __init__.py
    │   ├── upload.py
    │   ├── build.py
    │   ├── test.py
    │   ├── list.py
    │   ├── init.py
    │   └── push.py
    ├── logout.py
    ├── items.py
    ├── requests.py
    ├── tool.py
    ├── compat.py
    ├── fetch_eggs.py
    ├── log.py
    ├── login.py
    ├── deploy_reqs.py
    ├── cancel.py
    ├── copy_eggs.py
    ├── migrate_eggs.py
    ├── exceptions.py
    ├── schedule.py
    ├── bootstrap.py
    └── deploy_egg.py
├── .bumpversion.cfg
├── .readthedocs.yml
├── CHANGES.rst
├── .github
    └── workflows
    │   ├── checks.yml
    │   ├── freeze-release-publish.yml
    │   └── tests.yml
├── RELEASE.md
├── LICENSE
├── tox.ini
├── README.rst
├── setup.py
└── .gitignore


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/image/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/freeze/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/changes.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CHANGES.rst
2 | 


--------------------------------------------------------------------------------
/freeze/tests/testproject/testproject/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx==7.2.6
2 | sphinx-rtd-theme==2.0.0
3 | 


--------------------------------------------------------------------------------
/tests/samples/deploy_egg_sample_project/test_project/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/freeze/spider-down.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scrapinghub/shub/HEAD/freeze/spider-down.ico


--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | cleo
2 | flake8
3 | pipenv
4 | python-dateutil
5 | pytest
6 | pytest-cov
7 | 


--------------------------------------------------------------------------------
/tests/samples/migrate-eggs.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scrapinghub/shub/HEAD/tests/samples/migrate-eggs.zip


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1
3 | [flake8]
4 | max-line-length = 110
5 | exclude = .tox,tests,freeze,dist
6 | 


--------------------------------------------------------------------------------
/freeze/tests/testproject/testproject/items.py:
--------------------------------------------------------------------------------
1 | import scrapy
2 | 
3 | 
4 | class TestprojectItem(scrapy.Item):
5 |     pass
6 | 


--------------------------------------------------------------------------------
/tests/samples/migrate-eggs-no-eggs.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scrapinghub/shub/HEAD/tests/samples/migrate-eggs-no-eggs.zip


--------------------------------------------------------------------------------
/tests/samples/deploy_egg_sample_project.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scrapinghub/shub/HEAD/tests/samples/deploy_egg_sample_project.zip


--------------------------------------------------------------------------------
/tests/samples/deploy_egg_sample_repo.git.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scrapinghub/shub/HEAD/tests/samples/deploy_egg_sample_repo.git.zip


--------------------------------------------------------------------------------
/tests/samples/custom-images-examples-master.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scrapinghub/shub/HEAD/tests/samples/custom-images-examples-master.zip


--------------------------------------------------------------------------------
/freeze/tests/testproject/testproject/pipelines.py:
--------------------------------------------------------------------------------
1 | class TestprojectPipeline:
2 |     def process_item(self, item, spider):
3 |         return item
4 | 


--------------------------------------------------------------------------------
/shub/version.py:
--------------------------------------------------------------------------------
1 | import click
2 | import shub
3 | 
4 | 
5 | @click.command(help="Show shub version")
6 | def cli():
7 |     click.echo(shub.__version__)
8 | 


--------------------------------------------------------------------------------
/freeze/hooks/runtime-hooks.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | 
4 | os.environ['REQUESTS_CA_BUNDLE'] = os.path.join(
5 |     sys._MEIPASS, 'requests', 'cacert.pem')
6 | 


--------------------------------------------------------------------------------
/freeze/tests/testproject/testproject/settings.py:
--------------------------------------------------------------------------------
1 | BOT_NAME = 'testproject'
2 | SPIDER_MODULES = ['testproject.spiders']
3 | NEWSPIDER_MODULE = 'testproject.spiders'
4 | 


--------------------------------------------------------------------------------
/tests/samples/deploy_reqs_sample_project/other-egg-0.2.1.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scrapinghub/shub/HEAD/tests/samples/deploy_reqs_sample_project/other-egg-0.2.1.zip


--------------------------------------------------------------------------------
/tests/samples/deploy_reqs_sample_project/inflect-0.2.5.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scrapinghub/shub/HEAD/tests/samples/deploy_reqs_sample_project/inflect-0.2.5.tar.gz


--------------------------------------------------------------------------------
/.bumpversion.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | current_version = 2.16.0
 3 | commit = True
 4 | tag = True
 5 | tag_name = v{new_version}
 6 | 
 7 | [bumpversion:file:setup.py]
 8 | 
 9 | [bumpversion:file:shub/__init__.py]
10 | 


--------------------------------------------------------------------------------
/freeze/hooks/hook-scrapinghub.py:
--------------------------------------------------------------------------------
1 | from PyInstaller.utils.hooks import collect_data_files
2 | 
3 | # Add the data files in the scrapinghub package (aka scrapinghub.VERSION).
4 | datas = collect_data_files('scrapinghub')
5 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | 
 5 | 
 6 | @pytest.fixture
 7 | def tempdir(tmpdir):
 8 |     cwd = os.getcwd()
 9 |     os.chdir(str(tmpdir))
10 |     yield tmpdir
11 |     os.chdir(cwd)
12 | 


--------------------------------------------------------------------------------
/freeze/tests/testproject/testproject/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/freeze/hooks/hook-shub.py:
--------------------------------------------------------------------------------
1 | from PyInstaller.utils.hooks import collect_submodules
2 | 
3 | # Add as hidden imports all submodules from shub. This is because shub
4 | # modules are loaded when it's executed.
5 | hiddenimports = collect_submodules('shub')
6 | 


--------------------------------------------------------------------------------
/docs/_static/theme_overrides.css:
--------------------------------------------------------------------------------
1 | /* override table width restrictions */
2 | /* https://github.com/snide/sphinx_rtd_theme/issues/117#issuecomment-41506687 */
3 | .wy-table-responsive table td, .wy-table-responsive table th {
4 |     white-space: normal;
5 | }
6 | 


--------------------------------------------------------------------------------
/shub/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '2.16.0'
2 | 
3 | 
4 | # Links to documentation to use over the project sources
5 | DOCS_LINK = "https://shub.readthedocs.io/en/stable/"
6 | DEPLOY_DOCS_LINK = DOCS_LINK + "deploying.html#deploying-dependencies"
7 | CONFIG_DOCS_LINK = DOCS_LINK + "configuration.html"
8 | 


--------------------------------------------------------------------------------
/shub/__main__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | import shub.tool
 5 | 
 6 | 
 7 | prog_name = os.path.basename(sys.argv and sys.argv[0] or __file__)
 8 | if prog_name == '__main__.py':
 9 |     # shub invoked via python -m shub
10 |     prog_name = __package__
11 | shub.tool.cli(prog_name=prog_name)
12 | 


--------------------------------------------------------------------------------
/freeze/tests/testproject/scrapinghub.yml:
--------------------------------------------------------------------------------
 1 | projects:
 2 |   default: 1
 3 |   next: 2
 4 |   nested: default/3
 5 |   nested2: numeric/4
 6 | 
 7 | endpoints:
 8 |   default: http://localhost:7999/api/
 9 |   nuneric: http://127.0.0.1:7999/api/
10 | 
11 | apikeys:
12 |   default: abcdabcdabcdabcdabcdabcdabcdabcd
13 | 
14 | 


--------------------------------------------------------------------------------
/freeze/tests/testproject/testproject/spiders/example.py:
--------------------------------------------------------------------------------
 1 | import scrapy
 2 | 
 3 | 
 4 | class ExampleSpider(scrapy.Spider):
 5 |     name = "example"
 6 |     allowed_domains = ["example.com"]
 7 |     start_urls = (
 8 |         'http://www.example.com/',
 9 |     )
10 | 
11 |     def parse(self, response):
12 |         pass
13 | 


--------------------------------------------------------------------------------
/freeze/tests/testproject/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = testproject.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = testproject
12 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | formats: all
 3 | sphinx:
 4 |   configuration: docs/conf.py
 5 |   fail_on_warning: true
 6 | 
 7 | build:
 8 |   os: ubuntu-20.04
 9 |   tools:
10 |     # For available versions, see:
11 |     # https://docs.readthedocs.io/en/stable/config-file/v2.html#build-tools-python
12 |     python: "3.11"  # Keep in sync with .github/workflows/checks.yml
13 | 
14 | python:
15 |   install:
16 |     - requirements: docs/requirements.txt
17 |     - path: .
18 | 


--------------------------------------------------------------------------------
/tests/image/test_tool.py:
--------------------------------------------------------------------------------
 1 | from click.testing import CliRunner
 2 | from unittest import TestCase
 3 | from shub.image import cli
 4 | 
 5 | 
 6 | class TestToolCli(TestCase):
 7 | 
 8 |     def test_cli(self):
 9 |         runner = CliRunner()
10 |         result = runner.invoke(cli, ['--help'])
11 |         assert result.exit_code == 0
12 |         assert 'Manage project based on custom Docker image' in result.output
13 |         assert 'Options:' in result.output
14 |         assert 'Commands:' in result.output
15 | 


--------------------------------------------------------------------------------
/shub/image/__init__.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import importlib
 3 | 
 4 | 
 5 | @click.group(help="Manage project based on custom Docker image")
 6 | def cli():
 7 |     pass
 8 | 
 9 | 
10 | module_deps = [
11 |     "init",
12 |     "build",
13 |     "list",
14 |     "test",
15 |     "push",
16 |     "deploy",
17 |     "upload",
18 |     "check",
19 |     "run",
20 | ]
21 | 
22 | for command in module_deps:
23 |     module_path = "shub.image." + command
24 |     command_module = importlib.import_module(module_path)
25 |     cli.add_command(command_module.cli, command)
26 | 


--------------------------------------------------------------------------------
/shub/logout.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from shub.config import load_shub_config, GLOBAL_SCRAPINGHUB_YML_PATH
 4 | from shub.utils import update_yaml_dict
 5 | 
 6 | 
 7 | HELP = """
 8 | Remove the Scrapinghub API key that is saved in your global configuration
 9 | file (~/.scrapinghub.yml), if any.
10 | """
11 | 
12 | SHORT_HELP = "Forget saved Scrapinghub API key"
13 | 
14 | 
15 | @click.command(help=HELP, short_help=SHORT_HELP)
16 | def cli():
17 |     global_conf = load_shub_config(load_local=False, load_env=False)
18 |     if 'default' not in global_conf.apikeys:
19 |         click.echo("You are not logged in.")
20 |         return 0
21 | 
22 |     with update_yaml_dict(GLOBAL_SCRAPINGHUB_YML_PATH) as conf:
23 |         del conf['apikeys']['default']
24 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. shub.image documentation master file, created by
 2 |    sphinx-quickstart on Tue May  3 16:20:52 2016.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to shub's documentation!
 7 | ================================
 8 | 
 9 | ``shub`` is the Scrapinghub command line client. It allows you to deploy
10 | projects or dependencies, schedule spiders, and retrieve scraped data or logs
11 | without leaving the command line.
12 | 
13 | Contents
14 | --------
15 | 
16 | .. toctree::
17 |    :maxdepth: 2
18 | 
19 |    quickstart
20 |    configuration
21 |    deploying
22 |    scheduling
23 |    deploy-custom-image
24 |    custom-images-contract
25 |    changes
26 | 


--------------------------------------------------------------------------------
/shub/image/check.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import requests
 3 | 
 4 | from shub.image.utils import load_status_url
 5 | 
 6 | SHORT_HELP = "Check a deploy task's status url saved in a temporary file."
 7 | 
 8 | HELP = """
 9 | A command to check your release task state for asynchronous deploy mode.
10 | Does a simple GET request to Dash with an URL which it reads from a
11 | temporary file.
12 | """
13 | 
14 | 
15 | @click.command(help=HELP, short_help=SHORT_HELP)
16 | @click.option("--id", type=int, help="status id to check deploy results")
17 | def cli(id):
18 |     status_url = load_status_url(id)
19 |     status_req = requests.get(status_url, timeout=300)
20 |     status_req.raise_for_status()
21 |     result = status_req.json()
22 |     click.echo(f"Deploy results: {result}")
23 | 


--------------------------------------------------------------------------------
/CHANGES.rst:
--------------------------------------------------------------------------------
 1 | =======
 2 | Changes
 3 | =======
 4 | 
 5 | 2.16.0 (2025-11-05)
 6 | ===================
 7 | 
 8 | -   Drop support for Python 3.6, 3.7 & 3.8.
 9 | -   Add support for Python 3.13 & 3.14.
10 | -   Fix ``shub image upload`` documentation.
11 | -   Add support for current Poetry versions.
12 | -   Fix link to get apikey in ``shub login`` command.
13 | -   Modernize generated Dockerfile (``shub image init`` command).
14 | 
15 | 
16 | 2.15.4 (2024-02-08)
17 | ===================
18 | 
19 | -   Support Docker server 25+.
20 | 
21 | 
22 | 2.15.3 (2024-01-23)
23 | ===================
24 | 
25 | -   Fix ``shub image deploy`` failing on Python 3.8 and 3.9.
26 | 
27 | 
28 | 2.15.2 (2024-01-17)
29 | ===================
30 | 
31 | -   Add support for Python 3.12.
32 | 
33 | -   Remove remnants of Python 2 support.
34 | 
35 | -   Start a changelog.
36 | 


--------------------------------------------------------------------------------
/.github/workflows/checks.yml:
--------------------------------------------------------------------------------
 1 | name: Checks
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request:
 8 |     branches:
 9 |       - master
10 | 
11 | jobs:
12 |   checks:
13 |     runs-on: ubuntu-latest
14 |     strategy:
15 |       matrix:
16 |         include:
17 |         - python-version: 3
18 |           env:
19 |             TOXENV: flake8
20 |         - python-version: "3.11"
21 |           env:
22 |             TOXENV: docs
23 | 
24 |     steps:
25 |     - uses: actions/checkout@v2
26 | 
27 |     - name: Set up Python ${{ matrix.python-version }}
28 |       uses: actions/setup-python@v5
29 |       with:
30 |         python-version: ${{ matrix.python-version }}
31 | 
32 |     - name: Run check
33 |       env: ${{ matrix.env }}
34 |       run: |
35 |         pip install -U pip
36 |         pip install -U tox
37 |         tox
38 | 


--------------------------------------------------------------------------------
/tests/samples/deploy_egg_sample_project/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | 
 4 | setup(
 5 |     name='test_project',
 6 |     version='1.2.0',
 7 |     packages=['test_project'],
 8 |     description='Test Project',
 9 |     author='Scrapinghub',
10 |     author_email='info@scrapinghub.com',
11 |     maintainer='Scrapinghub',
12 |     maintainer_email='info@scrapinghub.com',
13 |     license='BSD',
14 |     include_package_data=True,
15 |     zip_safe=False,
16 |     install_requires=[],
17 |     classifiers=[
18 |         'Development Status :: 5 - Production/Stable',
19 |         'Intended Audience :: Developers',
20 |         'Natural Language :: English',
21 |         'License :: OSI Approved :: BSD License',
22 |         'Programming Language :: Python',
23 |         'Programming Language :: Python :: 2.7',
24 |         'Operating System :: OS Independent',
25 |         'Environment :: Console',
26 |         'Topic :: Internet :: WWW/HTTP',
27 |     ],
28 | )
29 | 


--------------------------------------------------------------------------------
/tests/image/test_upload.py:
--------------------------------------------------------------------------------
 1 | from unittest import mock, TestCase
 2 | 
 3 | from click.testing import CliRunner
 4 | 
 5 | from shub.image.upload import cli
 6 | 
 7 | 
 8 | class TestUploadCli(TestCase):
 9 | 
10 |     @mock.patch('shub.image.deploy.deploy_cmd')
11 |     @mock.patch('shub.image.push.push_cmd')
12 |     @mock.patch('shub.image.build.build_cmd')
13 |     def test_cli(self, build, push, deploy):
14 |         runner = CliRunner()
15 |         result = runner.invoke(
16 |             cli, ["dev", "-v", "--version", "test",
17 |                   "--username", "user", "--password", "pass",
18 |                   "--email", "mail", "--async", "--apikey", "apikey",
19 |                   "--skip-tests", "--no-cache", "-f", "Dockerfile", "--reauth"])
20 |         assert result.exit_code == 0
21 |         build.assert_called_with('dev', 'test', True, True, (), filename='Dockerfile')
22 |         push.assert_called_with(
23 |             'dev', 'test', 'user', 'pass', 'mail', "apikey", False, reauth=True,
24 |             skip_tests=True)
25 |         deploy.assert_called_with(
26 |             'dev', 'test', 'user', 'pass', 'mail', "apikey", False, True)
27 | 


--------------------------------------------------------------------------------
/tests/test_fetch_eggs.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from collections import namedtuple
 3 | from unittest import mock
 4 | 
 5 | from click.testing import CliRunner
 6 | 
 7 | from shub import fetch_eggs
 8 | from shub.exceptions import InvalidAuthException, RemoteErrorException
 9 | 
10 | from .utils import AssertInvokeRaisesMixin, mock_conf
11 | 
12 | 
13 | FakeResponse = namedtuple('FakeResponse', ['status_code'])
14 | 
15 | 
16 | @mock.patch('shub.fetch_eggs.requests', autospec=True)
17 | class FetchEggsTest(AssertInvokeRaisesMixin, unittest.TestCase):
18 | 
19 |     def setUp(self):
20 |         self.runner = CliRunner()
21 |         self.conf = mock_conf(self)
22 | 
23 |     def test_raises_auth_exception(self, requests_mock):
24 |         fake_response = FakeResponse(403)
25 |         requests_mock.get.return_value = fake_response
26 |         self.assertInvokeRaises(InvalidAuthException, fetch_eggs.cli)
27 | 
28 |     def test_raises_exception_if_request_error(self, requests_mock):
29 |         fake_response = FakeResponse(400)
30 |         requests_mock.get.return_value = fake_response
31 |         self.assertInvokeRaises(RemoteErrorException, fetch_eggs.cli)
32 | 


--------------------------------------------------------------------------------
/RELEASE.md:
--------------------------------------------------------------------------------
 1 | Release procedure for shub
 2 | ==========================
 3 | 
 4 | The GitHub Actions build is configured to release `shub` to PyPI whenever
 5 | a new tag (starting with `v`, e.g. `v2.13.0`) is committed.
 6 | 
 7 | The steps to do a release are:
 8 | 
 9 | 1. Install [bumpversion](https://pypi.python.org/pypi/bumpversion)
10 | 
11 | 2. Make sure you're at the tip of master, and then run:
12 | 
13 |        bumpversion VERSION_PART
14 | 
15 |    In place of `VERSION_PART`, use one of `patch`, `minor` or `major`, meaning
16 |    the part of the version number to be updated.
17 | 
18 |    This will create a new commit and tag updating the version number.
19 | 
20 | 3. Push the changes and the new tag to trigger the release:
21 | 
22 |        git push origin master --tags
23 | 
24 | 4. Once the build finishes, run `pip install shub` in a temporary virtualenv
25 |    and make sure it's installing the latest version.
26 | 
27 | 5. Update the release information at:
28 | 
29 |        https://github.com/scrapinghub/shub/releases
30 | 
31 |    The GitHub action will automatically create a release draft and attach the
32 |    platform-specific binaries (built with the `freeze` tox environment) to it.
33 | 


--------------------------------------------------------------------------------
/tests/test_logout.py:
--------------------------------------------------------------------------------
 1 | import textwrap
 2 | import unittest
 3 | from unittest import mock
 4 | 
 5 | from click.testing import CliRunner
 6 | 
 7 | from shub import config, logout
 8 | 
 9 | 
10 | @mock.patch('shub.config.GLOBAL_SCRAPINGHUB_YML_PATH', new='.scrapinghub.yml')
11 | @mock.patch('shub.config.NETRC_PATH', new='.netrc')
12 | @mock.patch('shub.logout.GLOBAL_SCRAPINGHUB_YML_PATH', new='.scrapinghub.yml')
13 | class LogoutTestCase(unittest.TestCase):
14 | 
15 |     def setUp(self):
16 |         self.runner = CliRunner()
17 | 
18 |     def test_remove_key(self):
19 |         GLOBAL_SH_YML = textwrap.dedent("""
20 |             apikeys:
21 |                 default: LOGGED_IN_KEY
22 |         """)
23 |         with self.runner.isolated_filesystem():
24 |             with open('.scrapinghub.yml', 'w') as f:
25 |                 f.write(GLOBAL_SH_YML)
26 |             conf = config.load_shub_config()
27 |             self.assertIn('default', conf.apikeys)
28 |             self.runner.invoke(logout.cli)
29 |             conf = config.load_shub_config()
30 |             self.assertNotIn('default', conf.apikeys)
31 | 
32 |     @mock.patch('shub.logout.update_yaml_dict')
33 |     def test_fail_on_not_logged_in(self, mock_uyd):
34 |         with self.runner.isolated_filesystem():
35 |             self.runner.invoke(logout.cli)
36 |             self.assertFalse(mock_uyd.called)
37 | 


--------------------------------------------------------------------------------
/shub/items.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from shub.utils import job_resource_iter, get_job
 4 | 
 5 | 
 6 | HELP = """
 7 | Given a job ID, fetch items for that job from Scrapy Cloud and output them as
 8 | JSON lines.
 9 | 
10 | A job ID consists of the Scrapinghub project ID, the numerical spider ID, and
11 | the job ID, separated by forward slashes, e.g.:
12 | 
13 |     shub items 12345/2/15
14 | 
15 | You can also provide the Scrapinghub job URL instead:
16 | 
17 |     shub items https://app.zyte.com/p/12345/2/15
18 | 
19 | You can omit the project ID if you have a default target defined in your
20 | scrapinghub.yml:
21 | 
22 |     shub items 2/15
23 | 
24 | Or use any target defined in your scrapinghub.yml:
25 | 
26 |     shub items production/2/15
27 | 
28 | If the job is still running, you can watch the items as they are being scraped
29 | by providing the -f flag:
30 | 
31 |     shub items -f 2/15
32 | """
33 | 
34 | SHORT_HELP = "Fetch items from Scrapy Cloud"
35 | 
36 | 
37 | @click.command(help=HELP, short_help=SHORT_HELP)
38 | @click.argument('job_id')
39 | @click.option('-f', '--follow', help='output new items as they are scraped',
40 |               is_flag=True)
41 | @click.option('-n', '--tail', help='output last N items only', type=int)
42 | def cli(job_id, follow, tail):
43 |     job = get_job(job_id)
44 |     for item in job_resource_iter(job, job.items, output_json=True,
45 |                                   follow=follow, tail=tail):
46 |         click.echo(item)
47 | 


--------------------------------------------------------------------------------
/shub/requests.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from shub.utils import job_resource_iter, get_job
 4 | 
 5 | 
 6 | HELP = """
 7 | Given a job ID, fetch requests made for that job from Scrapy Cloud and output
 8 | them as JSON lines.
 9 | 
10 | A job ID consists of the Scrapinghub project ID, the numerical spider ID, and
11 | the job ID, separated by forward slashes, e.g.:
12 | 
13 |     shub requests 12345/2/15
14 | 
15 | You can also provide the Scrapinghub job URL instead:
16 | 
17 |     shub requests https://app.zyte.com/p/12345/2/15
18 | 
19 | You can omit the project ID if you have a default target defined in your
20 | scrapinghub.yml:
21 | 
22 |     shub requests 2/15
23 | 
24 | Or use any target defined in your scrapinghub.yml:
25 | 
26 |     shub requests production/2/15
27 | 
28 | If the job is still running, you can watch the requests as they are being made
29 | by providing the -f flag:
30 | 
31 |     shub requests -f 2/15
32 | """
33 | 
34 | SHORT_HELP = "Fetch requests from Scrapy Cloud"
35 | 
36 | 
37 | @click.command(help=HELP, short_help=SHORT_HELP)
38 | @click.argument('job_id')
39 | @click.option('-f', '--follow', help='output new requests as they are made',
40 |               is_flag=True)
41 | @click.option('-n', '--tail', help='output last N requests only', type=int)
42 | def cli(job_id, follow, tail):
43 |     job = get_job(job_id)
44 |     for item in job_resource_iter(job, job.requests, output_json=True,
45 |                                   follow=follow, tail=tail):
46 |         click.echo(item)
47 | 


--------------------------------------------------------------------------------
/tests/test_end_to_end.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from click.testing import CliRunner
 3 | from shub import tool
 4 | import os
 5 | 
 6 | 
 7 | @unittest.skipUnless(os.getenv('USING_TOX'),
 8 |                      'End to end tests only run via TOX')
 9 | class ShubEndToEndTests(unittest.TestCase):
10 |     def setUp(self):
11 |         self.runner = CliRunner()
12 | 
13 |     def run_subcmd(self, subcmd):
14 |         return self.runner.invoke(tool.cli, [subcmd]).output
15 | 
16 |     def test_usage_is_displayed_if_no_arg_is_provided(self):
17 |         output = self.run_subcmd('')
18 |         usage_is_displayed = output.startswith('Usage:')
19 |         self.assertTrue(usage_is_displayed)
20 | 
21 |     def test_deploy_egg_isnt_broken(self):
22 |         output = self.run_subcmd('deploy-egg')
23 |         error = 'Unexpected output: %s' % output
24 |         self.assertTrue('specify target' in output, error)
25 | 
26 |     def test_deploy_reqs_isnt_broken(self):
27 |         output = self.run_subcmd('deploy-reqs')
28 |         error = 'Unexpected output: %s' % output
29 |         self.assertTrue('specify target' in output, error)
30 | 
31 |     def test_deploy_isnt_broken(self):
32 |         output = self.run_subcmd('deploy')
33 |         error = 'Unexpected output: %s' % output
34 |         self.assertTrue('Cannot find project' in output, error)
35 | 
36 |     def test_fetch_eggs_isnt_broken(self):
37 |         output = self.run_subcmd('fetch-eggs')
38 |         error = 'Unexpected output: %s' % output
39 |         self.assertTrue('specify target' in output, error)
40 | 


--------------------------------------------------------------------------------
/tests/image/test_check.py:
--------------------------------------------------------------------------------
 1 | from unittest import mock, TestCase
 2 | 
 3 | from click.testing import CliRunner
 4 | 
 5 | from shub import exceptions as shub_exceptions
 6 | from shub.image.check import cli
 7 | from shub.image import utils
 8 | 
 9 | from .utils import FakeProjectDirectory
10 | 
11 | 
12 | class TestCheckCli(TestCase):
13 | 
14 |     @mock.patch('requests.get')
15 |     def test_cli(self, mocked):
16 |         # the test creates .releases file locally
17 |         # this context manager cleans it in the end
18 |         with FakeProjectDirectory():
19 |             runner = CliRunner()
20 |             result = runner.invoke(cli, [])
21 |             assert result.exit_code == \
22 |                 shub_exceptions.NotFoundException.exit_code
23 |             deploy_id1 = utils.store_status_url('http://linkA', 2)
24 |             deploy_id2 = utils.store_status_url('http://linkB', 2)
25 |             utils.store_status_url('http://linkC', 2)
26 | 
27 |             # get latest (deploy 3)
28 |             result = runner.invoke(cli, [])
29 |             assert result.exit_code == 0
30 |             mocked.assert_called_with('http://linkC', timeout=300)
31 | 
32 |             # get deploy by id
33 |             result = runner.invoke(cli, ["--id", deploy_id2])
34 |             assert result.exit_code == 0
35 |             mocked.assert_called_with('http://linkB', timeout=300)
36 | 
37 |             # get non-existing deploy
38 |             result = runner.invoke(cli, ["--id", deploy_id1])
39 |             assert result.exit_code == \
40 |                 shub_exceptions.NotFoundException.exit_code
41 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2016 Scrapinghub, Inc
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification,
 5 | are permitted provided that the following conditions are met:
 6 | 
 7 |     1. Redistributions of source code must retain the above copyright notice,
 8 |        this list of conditions and the following disclaimer.
 9 | 
10 |     2. Redistributions in binary form must reproduce the above copyright
11 |        notice, this list of conditions and the following disclaimer in the
12 |        documentation and/or other materials provided with the distribution.
13 | 
14 |     3. Neither the name of extruct nor the names of its contributors may be used
15 |        to endorse or promote products derived from this software without
16 |        specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/shub/tool.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | 
 3 | import click
 4 | 
 5 | import shub
 6 | from shub.utils import update_available
 7 | 
 8 | 
 9 | HELP = """
10 | shub is the Scrapinghub command-line client. It allows you to deploy projects
11 | or dependencies, schedule spiders, and retrieve scraped data or logs without
12 | leaving the command line.
13 | """
14 | 
15 | SHORT_HELP = "Scrapinghub command-line client"
16 | 
17 | EPILOG = """
18 | For usage and help on a specific command, run it with a --help flag, e.g.:
19 | 
20 |     shub schedule --help
21 | """
22 | 
23 | CONTEXT_SETTINGS = {'help_option_names': ['-h', '--help']}
24 | 
25 | 
26 | @click.group(help=HELP, short_help=SHORT_HELP, epilog=EPILOG,
27 |              context_settings=CONTEXT_SETTINGS)
28 | @click.version_option(shub.__version__)
29 | def cli():
30 |     update_url = update_available()
31 |     if update_url:
32 |         click.echo("INFO: A newer version of shub is available. Update "
33 |                    "via pip or get it at {}".format(update_url),  err=True)
34 | 
35 | 
36 | commands = [
37 |     "bootstrap",
38 |     "deploy",
39 |     "login",
40 |     "deploy_egg",
41 |     "fetch_eggs",
42 |     "deploy_reqs",
43 |     "logout",
44 |     "version",
45 |     "items",
46 |     "schedule",
47 |     "log",
48 |     "requests",
49 |     "copy_eggs",
50 |     "migrate_eggs",
51 |     "image",
52 |     "cancel",
53 | ]
54 | 
55 | for command in commands:
56 |     module_path = "shub." + command
57 |     command_module = importlib.import_module(module_path)
58 |     command_name = command.replace('_', '-')  # easier to type
59 |     cli.add_command(command_module.cli, command_name)
60 | 


--------------------------------------------------------------------------------
/tests/test_deploy_reqs.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import os
 3 | import tempfile
 4 | from unittest import mock
 5 | 
 6 | from click.testing import CliRunner
 7 | 
 8 | from shub import deploy_reqs
 9 | 
10 | from .utils import mock_conf
11 | 
12 | 
13 | class TestDeployReqs(unittest.TestCase):
14 | 
15 |     def setUp(self):
16 |         self.runner = CliRunner()
17 |         self.conf = mock_conf(self)
18 | 
19 |     @unittest.skip('flaky')
20 |     def test_can_decompress_downloaded_packages_and_call_deploy_reqs(self):
21 |         requirements_file = self._write_tmp_requirements_file()
22 |         with mock.patch('shub.utils.build_and_deploy_egg') as m:
23 |             self.runner.invoke(
24 |                 deploy_reqs.cli,
25 |                 ('-r', requirements_file),
26 |             )
27 |             self.assertEqual(m.call_count, 2)
28 |             for args, kwargs in m.call_args_list:
29 |                 project, endpoint, apikey = args
30 |                 self.assertEqual(project, 1)
31 |                 self.assertIn('https://app.zyte.com', endpoint)
32 |                 self.assertEqual(apikey, self.conf.apikeys['default'])
33 | 
34 |     def _write_tmp_requirements_file(self):
35 |         basepath = 'tests/samples/deploy_reqs_sample_project/'
36 |         eggs = ['other-egg-0.2.1.zip', 'inflect-0.2.5.tar.gz']
37 |         tmp_dir = tempfile.mkdtemp(prefix="shub-test-deploy-reqs")
38 |         requirements_file = os.path.join(tmp_dir, 'requirements.txt')
39 | 
40 |         with open(requirements_file, 'w') as f:
41 |             for egg in eggs:
42 |                 f.write(os.path.abspath(os.path.join(basepath, egg)) + "\n")
43 | 
44 |         return requirements_file
45 | 


--------------------------------------------------------------------------------
/shub/compat.py:
--------------------------------------------------------------------------------
 1 | def to_unicode(text, encoding=None, errors='strict'):
 2 |     """Return the unicode representation of `text`.
 3 | 
 4 |     If `text` is already a ``unicode`` object, return it as-is.
 5 |     If `text` is a ``bytes`` object, decode it using `encoding`.
 6 | 
 7 |     Otherwise, raise an error.
 8 | 
 9 |     """
10 |     if isinstance(text, str):
11 |         return text
12 |     if not isinstance(text, (bytes, bytearray)):
13 |         raise TypeError('to_unicode must receive a bytes, str or unicode '
14 |                         'object, got %s' % type(text).__name__)
15 |     if encoding is None:
16 |         encoding = 'utf-8'
17 |     return text.decode(encoding, errors)
18 | 
19 | 
20 | def to_bytes(text, encoding=None, errors='strict'):
21 |     """Return the binary representation of `text`.
22 | 
23 |     If `text` is already a ``bytes`` object, return it as-is.
24 |     If `text` is a ``unicode`` object, encode it using `encoding`.
25 | 
26 |     Otherwise, raise an error."""
27 |     if isinstance(text, bytes):
28 |         return text
29 |     if isinstance(text, bytearray):
30 |         return bytes(text)
31 |     if not isinstance(text, str):
32 |         raise TypeError('to_bytes must receive a unicode, str or bytes '
33 |                         'object, got %s' % type(text).__name__)
34 |     if encoding is None:
35 |         encoding = 'utf-8'
36 |     return text.encode(encoding, errors)
37 | 
38 | 
39 | def to_native_str(text, encoding=None, errors='strict'):
40 |     """Return ``str`` representation of `text`.
41 | 
42 |     ``str`` representation means ``bytes`` in PY2 and ``unicode`` in PY3.
43 | 
44 |     """
45 |     return to_unicode(text, encoding, errors)
46 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = flake8,min,min-poetry,py,poetry
 3 | 
 4 | [testenv]
 5 | basepython = python3
 6 | setenv =
 7 |     USING_TOX=1
 8 | deps =
 9 |     -r tests/requirements.txt
10 | commands =
11 |     pytest --cov=shub --cov-report=term-missing --cov-report=html --cov-report=xml {posargs:shub tests}
12 | 
13 | [testenv:min]
14 | basepython = python3.9
15 | deps =
16 |     {[testenv]deps}
17 |     pipenv<2024.3.0
18 | 
19 | [testenv:min-poetry]
20 | basepython = python3.9
21 | deps =
22 |     {[testenv]deps}
23 |     poetry-core<2
24 | 
25 | [testenv:poetry]
26 | deps =
27 |     {[testenv:min]deps}
28 |     poetry-core
29 |     poetry-plugin-export
30 | 
31 | [testenv:freeze]
32 | install_command =
33 |     python -m pip install {opts} {packages}
34 | deps =
35 |     pyinstaller==4.10
36 |     pytest
37 |     packaging==20.4
38 |     setuptools==59.8.0  # https://github.com/pypa/setuptools/issues/3089
39 | ; address https://github.com/pyinstaller/pyinstaller/issues/2162 with hidden imports
40 |     setuptools>=44.0
41 | commands =
42 |     pyinstaller --clean -y -F -n shub \
43 |         --distpath=./dist_bin \
44 |         --additional-hooks-dir=./freeze/hooks \
45 |         --runtime-hook=./freeze/hooks/runtime-hooks.py \
46 |         --icon=./freeze/spider-down.ico \
47 |         --hidden-import=packaging \
48 |         --hidden-import=packaging.specifiers \
49 |         ./shub/__main__.py
50 |     pytest -vv {toxinidir}/freeze/tests/run.py
51 | 
52 | [testenv:flake8]
53 | deps =
54 |     flake8>=3.7.9
55 | commands =
56 |     flake8 --exclude=.git,.tox,venv* {posargs:shub tests}
57 | 
58 | [testenv:docs]
59 | changedir = docs
60 | deps =
61 |     -rdocs/requirements.txt
62 | commands =
63 |     sphinx-build -W -b html . {envtmpdir}/html
64 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | Scrapinghub command line client
 2 | ===============================
 3 | 
 4 | .. image:: https://img.shields.io/pypi/v/shub.svg
 5 |    :target: https://pypi.python.org/pypi/shub
 6 |    :alt: PyPI Version
 7 | 
 8 | .. image:: https://img.shields.io/pypi/pyversions/shub.svg
 9 |    :target: https://pypi.python.org/pypi/shub
10 |    :alt: Python Versions
11 | 
12 | .. image:: https://github.com/scrapinghub/shub/actions/workflows/tests.yml/badge.svg
13 |    :target: https://github.com/scrapinghub/shub/actions/workflows/tests.yml
14 |    :alt: Tests
15 | 
16 | .. image:: https://img.shields.io/codecov/c/github/scrapinghub/shub/master.svg
17 |    :target: https://codecov.io/github/scrapinghub/shub?branch=master
18 |    :alt: Coverage report
19 | 
20 | ``shub`` is the Scrapinghub command line client. It allows you to deploy
21 | projects or dependencies, schedule spiders, and retrieve scraped data or logs
22 | without leaving the command line.
23 | 
24 | 
25 | Requirements
26 | ------------
27 | 
28 | * Python >= 3.9
29 | 
30 | 
31 | Installation
32 | ------------
33 | 
34 | If you have ``pip`` installed on your system, you can install ``shub`` from
35 | the Python Package Index::
36 | 
37 |     pip install shub
38 | 
39 | Please note:
40 | 
41 | * if you are using Python < 3.6, you should pin `shub` to `2.13.0` or lower.
42 | * if you are using Python < 3.9, you should pin `shub` to `2.15.4` or lower.
43 | 
44 | We also supply stand-alone binaries. You can find them in our `latest GitHub
45 | release`_.
46 | 
47 | .. _`latest Github release`: https://github.com/scrapinghub/shub/releases/latest
48 | 
49 | 
50 | Documentation
51 | -------------
52 | 
53 | Documentation is available online via Read the Docs:
54 | https://shub.readthedocs.io/, or in the ``docs`` directory.
55 | 


--------------------------------------------------------------------------------
/freeze/tests/run.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import shlex
 4 | import shutil
 5 | import tempfile
 6 | from os.path import abspath, dirname, join
 7 | from subprocess import Popen, PIPE
 8 | 
 9 | import pytest
10 | from . import fakeserver
11 | 
12 | SHUB = abspath(join(dirname(__file__), '../../dist_bin/shub'))
13 | 
14 | 
15 | @pytest.fixture(scope='module')
16 | def apipipe():
17 |     return fakeserver.run(("127.0.0.1", 7999))
18 | 
19 | 
20 | @pytest.fixture
21 | def scrapyproject(request):
22 |     cwd = os.getcwd()
23 |     tmpdir = os.path.join(tempfile.mkdtemp(), 'project')
24 | 
25 |     def _fin():
26 |         os.chdir(cwd)
27 |         shutil.rmtree(tmpdir, ignore_errors=True)
28 |     request.addfinalizer(_fin)
29 |     shutil.copytree(abspath(join(dirname(__file__), 'testproject')), tmpdir)
30 |     os.chdir(tmpdir)
31 |     return tmpdir
32 | 
33 | 
34 | def shub(shub_args):
35 |     cmd = [SHUB]
36 |     if isinstance(shub_args, str):
37 |         shub_args = shlex.split(shub_args)
38 |     if shub_args is not None:
39 |         cmd.extend(shub_args)
40 |     return Popen(cmd, stdout=PIPE, stderr=PIPE)
41 | 
42 | 
43 | def test_version():
44 |     stdout, stderr = shub('version').communicate()
45 |     assert re.match(br'\d+[.]\d+[.]\d+$', stdout.strip())
46 | 
47 | 
48 | def test_deploy_without_project():
49 |     stdout, stderr = shub('deploy').communicate()
50 |     assert stdout == b''
51 |     assert b'Cannot find project' in stderr
52 | 
53 | 
54 | def test_deploy_default_project(apipipe, scrapyproject):
55 |     p = shub('deploy')
56 |     assert apipipe.poll(15)
57 |     req = apipipe.recv()
58 |     assert req['path'] == '/api/scrapyd/addversion.json'
59 |     apipipe.send((200, None, {'status': 'ok'}))
60 |     stdout, stderr = p.communicate()
61 |     assert b'{"status": "ok"}' in stdout
62 | 


--------------------------------------------------------------------------------
/shub/fetch_eggs.py:
--------------------------------------------------------------------------------
 1 | from urllib.parse import urljoin
 2 | 
 3 | import click
 4 | import requests
 5 | 
 6 | from shub.config import get_target_conf
 7 | from shub.exceptions import InvalidAuthException, RemoteErrorException
 8 | 
 9 | 
10 | HELP = """
11 | Download all eggs deployed to a Scrapy CLoud project into a zip file.
12 | 
13 | You can either fetch to your default target (as defined in scrapinghub.yml),
14 | or explicitly supply a numerical project ID or a target defined in
15 | scrapinghub.yml (see shub deploy).
16 | """
17 | 
18 | SHORT_HELP = "Download project eggs from Scrapy Cloud"
19 | 
20 | 
21 | @click.command(help=HELP, short_help=SHORT_HELP)
22 | @click.argument("target", required=False, default='default')
23 | def cli(target):
24 |     targetconf = get_target_conf(target)
25 |     destfile = 'eggs-%s.zip' % targetconf.project_id
26 |     fetch_eggs(targetconf.project_id, targetconf.endpoint, targetconf.apikey,
27 |                destfile)
28 | 
29 | 
30 | def fetch_eggs(project, endpoint, apikey, destfile):
31 |     auth = (apikey, '')
32 |     url = urljoin(endpoint, "eggs/bundle.zip")
33 |     rsp = requests.get(url=url, params={'project': project}, auth=auth,
34 |                        stream=True, timeout=300)
35 | 
36 |     _assert_response_is_valid(rsp)
37 | 
38 |     click.echo("Downloading eggs to %s" % destfile)
39 | 
40 |     with open(destfile, 'wb') as f:
41 |         for chunk in rsp.iter_content(chunk_size=1024):
42 |             if chunk:
43 |                 f.write(chunk)
44 |                 f.flush()
45 | 
46 | 
47 | def _assert_response_is_valid(rsp):
48 |     if rsp.status_code == 403:
49 |         raise InvalidAuthException
50 |     elif rsp.status_code != 200:
51 |         msg = 'Eggs could not be fetched. Status: %d' % rsp.status_code
52 |         raise RemoteErrorException(msg)
53 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | 
 4 | setup(
 5 |     name='shub',
 6 |     version='2.16.0',
 7 |     packages=find_packages(exclude=('tests', 'tests.*')),
 8 |     url="https://shub.readthedocs.io/en/stable/",
 9 |     description='Scrapinghub Command Line Client',
10 |     long_description=open('README.rst').read(),
11 |     author='Scrapinghub',
12 |     author_email='info@scrapinghub.com',
13 |     maintainer='Scrapinghub',
14 |     maintainer_email='info@scrapinghub.com',
15 |     license='BSD',
16 |     entry_points={
17 |         'console_scripts': ['shub = shub.tool:cli']
18 |     },
19 |     include_package_data=True,
20 |     zip_safe=False,
21 |     python_requires='>=3.9',
22 |     install_requires=[
23 |         'click',
24 |         'docker',
25 |         'importlib-metadata; python_version < "3.10"',
26 |         'packaging',
27 |         'pip',
28 |         'PyYAML',
29 |         'retrying',
30 |         'requests',
31 |         'scrapinghub>=2.3.1',
32 |         'setuptools',
33 |         'tqdm==4.55.1',
34 |         'toml',
35 |     ],
36 |     classifiers=[
37 |         'Development Status :: 5 - Production/Stable',
38 |         'Intended Audience :: Developers',
39 |         'Natural Language :: English',
40 |         'License :: OSI Approved :: BSD License',
41 |         'Programming Language :: Python',
42 |         'Programming Language :: Python :: 3.9',
43 |         'Programming Language :: Python :: 3.10',
44 |         'Programming Language :: Python :: 3.11',
45 |         'Programming Language :: Python :: 3.12',
46 |         'Programming Language :: Python :: 3.13',
47 |         'Programming Language :: Python :: 3.14',
48 |         'Operating System :: OS Independent',
49 |         'Environment :: Console',
50 |         'Topic :: Internet :: WWW/HTTP',
51 |     ],
52 | )
53 | 


--------------------------------------------------------------------------------
/docs/quickstart.rst:
--------------------------------------------------------------------------------
 1 | .. _quickstart:
 2 | 
 3 | ==========
 4 | Quickstart
 5 | ==========
 6 | 
 7 | Installation
 8 | ------------
 9 | 
10 | If you have ``pip`` installed on your system, you can install shub from the
11 | Python Package Index::
12 | 
13 |     pip install shub
14 | 
15 | We also supply stand-alone binaries. You can find them in our `latest GitHub
16 | release`_.
17 | 
18 | .. _`latest Github release`: https://github.com/scrapinghub/shub/releases/latest
19 | 
20 | 
21 | Getting help
22 | ------------
23 | 
24 | To see all available commands, run::
25 | 
26 |     shub
27 | 
28 | For help on a specific command, run it with a ``--help`` flag, e.g.::
29 | 
30 |     shub schedule --help
31 | 
32 | 
33 | .. _basic-usage:
34 | 
35 | Basic usage
36 | -----------
37 | 
38 | Start by logging in::
39 | 
40 |     shub login
41 | 
42 | This will save your Scrapinghub API key to a file in your home directory
43 | (``~/.scrapinghub.yml``) and is necessary for access to projects associated
44 | with your Scrapinghub account. Alternatively, you can set your Scrapinghub
45 | API key as an environment variable (``SHUB_APIKEY``), check :ref:`an appropriate
46 | section <configuration-environment>` for details.
47 | 
48 | Next, navigate to a Scrapy project that you wish to upload to Scrapinghub. You
49 | can deploy it to Scrapy Cloud via::
50 | 
51 |     shub deploy
52 | 
53 | On the first call, this will guide you through a wizard to save your project ID
54 | into a YAML file named ``scrapinghub.yml``, living next to your ``scrapy.cfg``.
55 | From anywhere within the project directory tree, you can now deploy via ``shub
56 | deploy``.
57 | 
58 | Next, schedule one of your spiders to run on Scrapy Cloud::
59 | 
60 |     shub schedule myspider
61 | 
62 | You can watch its log or the scraped items while the spider is running by
63 | supplying the job ID::
64 | 
65 |     shub log -f 2/34
66 |     shub items -f 2/34
67 | 


--------------------------------------------------------------------------------
/freeze/tests/fakeserver.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import multiprocessing
 3 | from threading import Thread
 4 | from socketserver import TCPServer
 5 | from http.server import SimpleHTTPRequestHandler
 6 | import urllib.parse
 7 | 
 8 | class Handler(SimpleHTTPRequestHandler):
 9 | 
10 |     def _do_any(self):
11 |         method = self.command
12 |         path, _, querystr = self.path.partition('?')
13 |         query = urllib.parse.parse_qs(querystr)
14 |         content_len = int(self.headers.get('content-length', 0))
15 |         body = self.rfile.read(content_len)
16 |         headers = self.headers.get_params()
17 |         print(self)
18 | 
19 |         self.server.pipe.send({
20 |             'path': path, 'query': query, 'body': body,
21 |             'method': self.command, 'headers': headers,
22 |         })
23 |         if not self.server.pipe.poll(10):
24 |             self.send_error(500, 'Pipe hung')
25 | 
26 |         status, headers, body = self.server.pipe.recv()
27 |         if not isinstance(body, bytes):
28 |             body = json.dumps(body).encode('utf8') + b'\n'
29 | 
30 |         self.send_response(status)
31 |         for hn, hv in headers or ():
32 |             self.send_header(hn, hv)
33 |         self.end_headers()
34 |         self.wfile.write(body)
35 | 
36 |     do_GET = _do_any
37 |     do_PUT = _do_any
38 |     do_POST = _do_any
39 |     do_DELETE = _do_any
40 |     do_PATCH = _do_any
41 | 
42 | 
43 | def threadit(target, *args, **kw):
44 |     t = Thread(target=target, name=target.__name__, args=args, kwargs=kw)
45 |     t.daemon = True
46 |     t.start()
47 |     return t
48 | 
49 | 
50 | def run(bind_at):
51 |     p1, p2 = multiprocessing.Pipe()
52 | 
53 |     class MyTCPServer(TCPServer):
54 |         allow_reuse_address = True
55 |         pipe = p2
56 | 
57 |     httpd = MyTCPServer(bind_at, Handler)
58 |     threadit(httpd.serve_forever)
59 |     return p1
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/shub/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from datetime import datetime
 3 | 
 4 | from shub.utils import job_resource_iter, get_job
 5 | 
 6 | import click
 7 | 
 8 | 
 9 | HELP = """
10 | Given a job ID, fetch the log of that job from Scrapy Cloud and print it.
11 | 
12 | A job ID consists of the Scrapinghub project ID, the numerical spider ID, and
13 | the job ID, separated by forward slashes, e.g.:
14 | 
15 |     shub log 12345/2/15
16 | 
17 | You can also provide the Scrapinghub job URL instead:
18 | 
19 |     shub log https://app.zyte.com/p/12345/2/15
20 | 
21 | You can omit the project ID if you have a default target defined in your
22 | scrapinghub.yml:
23 | 
24 |     shub log 2/15
25 | 
26 | Or use any target defined in your scrapinghub.yml:
27 | 
28 |     shub log production/2/15
29 | 
30 | If the job is still running, you can watch the log as it is being written by
31 | providing the -f flag:
32 | 
33 |     shub log -f 2/15
34 | """
35 | 
36 | SHORT_HELP = "Fetch log from Scrapy Cloud"
37 | 
38 | 
39 | @click.command(help=HELP, short_help=SHORT_HELP)
40 | @click.argument('job_id')
41 | @click.option('-f', '--follow', help='output new log entries as they are '
42 |               'produced', is_flag=True)
43 | @click.option('-n', '--tail', help='output last N log entries only', type=int)
44 | @click.option('--json', 'json_', help='output log entries in JSON', is_flag=True, default=False)
45 | def cli(job_id, follow, tail, json_):
46 |     job = get_job(job_id)
47 |     for item in job_resource_iter(job, job.logs, follow=follow, tail=tail, output_json=json_):
48 |         if json_:
49 |             click.echo(item)
50 |         else:
51 |             click.echo(
52 |                 "{} {} {}".format(
53 |                     datetime.utcfromtimestamp(item['time']/1000),
54 |                     logging.getLevelName(int(item['level'])),
55 |                     item['message']
56 |                 )
57 |             )
58 | 


--------------------------------------------------------------------------------
/tests/image/conftest.py:
--------------------------------------------------------------------------------
 1 | from functools import wraps
 2 | from unittest import mock
 3 | 
 4 | import pytest
 5 | 
 6 | from shub.image.utils import ProgressBar
 7 | 
 8 | try:
 9 |     # https://stackoverflow.com/a/55000090
10 |     from inspect import getfullargspec as get_args
11 | except ImportError:
12 |     from inspect import getargspec as get_args
13 | 
14 | from .utils import (
15 |     FakeProjectDirectory, add_scrapy_fake_config, add_sh_fake_config,
16 |     add_fake_dockerfile, add_fake_setup_py,
17 | )
18 | 
19 | 
20 | @pytest.fixture
21 | def docker_client_mock():
22 |     """Docker client mock"""
23 |     client_mock = mock.Mock()
24 |     with mock.patch('shub.image.utils.get_docker_client') as m:
25 |         m.return_value = client_mock
26 |         yield client_mock
27 | 
28 | 
29 | @pytest.fixture
30 | def project_dir():
31 |     """Fake project directory"""
32 |     with FakeProjectDirectory() as tmpdir:
33 |         add_scrapy_fake_config(tmpdir)
34 |         add_sh_fake_config(tmpdir)
35 |         add_fake_dockerfile(tmpdir)
36 |         add_fake_setup_py(tmpdir)
37 |         yield tmpdir
38 | 
39 | 
40 | @pytest.fixture
41 | def monkeypatch_bar_rate(monkeypatch):
42 |     # Converting to List instead to unpacking the Tuple
43 |     # because get_args returns different tuple sizes between py versions.
44 |     args = list(get_args(ProgressBar.format_meter))[0]
45 |     rate_arg_idx = args.index('rate')
46 | 
47 |     def override_rate(func):
48 | 
49 |         @wraps(func)
50 |         def wrapper(*args, **kwargs):
51 |             args = list(args)
52 |             if 'rate' in args:
53 |                 args[rate_arg_idx] = 10 ** 6
54 |             elif 'rate' in kwargs:
55 |                 kwargs['rate'] = 10 ** 6
56 |             return func(*args, **kwargs)
57 | 
58 |         return wrapper
59 | 
60 |     monkeypatch.setattr('shub.image.utils.ProgressBar.format_meter',
61 |                         staticmethod(override_rate(ProgressBar.format_meter)))
62 | 


--------------------------------------------------------------------------------
/shub/login.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import requests
 3 | from urllib.parse import urljoin
 4 | 
 5 | from shub.config import (load_shub_config, GLOBAL_SCRAPINGHUB_YML_PATH,
 6 |                          ShubConfig)
 7 | from shub.exceptions import AlreadyLoggedInException
 8 | from shub.utils import update_yaml_dict
 9 | 
10 | 
11 | HELP = """
12 | Add your Scrapinghub API key to your global configuration file
13 | (~/.scrapinghub.yml). This is necessary to gain access to projects associated
14 | with your Scrapinghub account.
15 | 
16 | You can find your API key in Scrapinghub's dashboard:
17 | https://app.zyte.com/account/apikey
18 | """
19 | 
20 | SHORT_HELP = "Save your Scrapinghub API key"
21 | 
22 | 
23 | @click.command(help=HELP, short_help=SHORT_HELP)
24 | def cli():
25 |     global_conf = load_shub_config(load_local=False, load_env=False)
26 |     if 'default' in global_conf.apikeys:
27 |         raise AlreadyLoggedInException
28 | 
29 |     conf = load_shub_config()
30 |     key = _get_apikey(
31 |         suggestion=conf.apikeys.get('default'),
32 |         endpoint=global_conf.endpoints.get('default'),
33 |     )
34 |     with update_yaml_dict(GLOBAL_SCRAPINGHUB_YML_PATH) as conf:
35 |         conf.setdefault('apikeys', {})
36 |         conf['apikeys']['default'] = key
37 | 
38 | 
39 | def _get_apikey(suggestion='', endpoint=None):
40 |     suggestion_txt = ' (%s)' % suggestion if suggestion else ''
41 |     click.echo(
42 |         "Enter your API key from https://app.zyte.com/o/settings/apikey"
43 |     )
44 |     while True:
45 |         key = input('API key%s: ' % suggestion_txt) or suggestion
46 |         click.echo("Validating API key...")
47 |         if _is_valid_apikey(key, endpoint=endpoint):
48 |             click.echo("API key is OK, you are logged in now.")
49 |             return key
50 |         else:
51 |             click.echo("API key failed, try again.")
52 | 
53 | 
54 | def _is_valid_apikey(key, endpoint=None):
55 |     endpoint = endpoint or ShubConfig.DEFAULT_ENDPOINT
56 |     validate_api_key_endpoint = urljoin(endpoint, "v2/users/me")
57 |     r = requests.get(validate_api_key_endpoint, params={'apikey': key})
58 |     return r.status_code == 200
59 | 


--------------------------------------------------------------------------------
/tests/image/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import tempfile
 4 | from contextlib import contextmanager
 5 | 
 6 | 
 7 | SH_CONFIG_FILE = """
 8 | projects:
 9 |   dev:
10 |     id: 12345
11 |     image: registry.io/user/project
12 |   xyz:
13 |     id: 32167
14 |     image: images.scrapinghub.com/project/32167
15 | endpoints:
16 |   dev: https://dash-fake
17 | apikeys:
18 |   default: abcdef
19 | """
20 | 
21 | SH_SETUP_FILE = """
22 | from setuptools import setup
23 | setup(
24 |     name = 'project', version = '1.0',
25 |     entry_points = {'scrapy': ['settings = test.settings']},
26 |     scripts = ['bin/scriptA.py', 'scriptB.py']
27 | )
28 | """
29 | 
30 | 
31 | @contextmanager
32 | def FakeProjectDirectory():
33 |     tmpdir = os.path.realpath(tempfile.mkdtemp())
34 |     current = os.getcwd()
35 |     os.chdir(tmpdir)
36 |     try:
37 |         yield tmpdir
38 |     finally:
39 |         os.chdir(current)
40 |         shutil.rmtree(tmpdir)
41 | 
42 | 
43 | def add_scrapy_fake_config(tmpdir):
44 |     # add fake scrapy.cfg
45 |     config_path = os.path.join(tmpdir, 'scrapy.cfg')
46 |     with open(config_path, 'w') as config_file:
47 |         config_file.write("[settings]\ndefault=test.settings")
48 | 
49 | 
50 | def add_sh_fake_config(tmpdir):
51 |     # add fake SH config
52 |     sh_config_path = os.path.join(tmpdir, 'scrapinghub.yml')
53 |     with open(sh_config_path, 'w') as sh_config_file:
54 |         sh_config_file.write(SH_CONFIG_FILE)
55 | 
56 | 
57 | def add_fake_requirements(tmpdir):
58 |     """Add fake requirements"""
59 |     reqs_path = os.path.join(tmpdir, 'fake-requirements.txt')
60 |     with open(reqs_path, 'w') as reqs_file:
61 |         reqs_file.write("mock\nrequests")
62 | 
63 | 
64 | def add_fake_dockerfile(tmpdir):
65 |     """Add fake Dockerfile"""
66 |     docker_path = os.path.join(tmpdir, 'Dockerfile')
67 |     with open(docker_path, 'w') as docker_file:
68 |         docker_file.write("FROM python:2.7")
69 | 
70 | 
71 | def add_fake_setup_py(tmpdir):
72 |     """Add fake setup.py for extract scripts tests"""
73 |     setup_path = os.path.join(tmpdir, 'setup.py')
74 |     with open(setup_path, 'w') as setup_file:
75 |         setup_file.write(SH_SETUP_FILE)
76 | 


--------------------------------------------------------------------------------
/.github/workflows/freeze-release-publish.yml:
--------------------------------------------------------------------------------
 1 | name: Freeze, Release & Publish
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |     tags:
 8 |       - v*
 9 |   pull_request:
10 |     branches:
11 |       - master
12 | 
13 | jobs:
14 |   freeze:
15 |     name: "Freeze: ${{ matrix.os }}"
16 |     runs-on: ${{ matrix.os }}
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         os: [ubuntu-latest, macos-latest, windows-latest]
21 | 
22 |     steps:
23 |     - uses: actions/checkout@v2
24 | 
25 |     - name: Set up Python
26 |       uses: actions/setup-python@v5
27 |       with:
28 |         python-version: "3.10"
29 | 
30 |     - name: Install tox
31 |       run: pip install tox
32 | 
33 |     - name: Build binary
34 |       run: tox -e freeze
35 | 
36 |     - name: Pack binary (Windows)
37 |       if: ${{ runner.os == 'Windows' }}
38 |       run: 7z a shub-Windows.zip dist_bin/shub.exe
39 | 
40 |     - name: Pack binary (Linux/macOS)
41 |       if: ${{ runner.os != 'Windows' }}
42 |       run: tar -czvf shub-${{ runner.os }}.tar.gz dist_bin/shub
43 | 
44 |     - name: Upload binary
45 |       uses: actions/upload-artifact@v4
46 |       with:
47 |         name: shub-${{ runner.os }}
48 |         path: |
49 |           shub-${{ runner.os }}.tar.gz
50 |           shub-${{ runner.os }}.zip
51 | 
52 |   release:
53 |     if: startsWith(github.ref, 'refs/tags/v')
54 |     needs: freeze
55 |     runs-on: ubuntu-latest
56 | 
57 |     steps:
58 |     - name: Download binaries
59 |       uses: actions/download-artifact@v2
60 |       with:
61 |         path: binaries
62 | 
63 |     - name: Display structure of downloaded files
64 |       run: ls -R binaries
65 | 
66 |     - name: Draft release
67 |       uses: softprops/action-gh-release@v1
68 |       with:
69 |         draft: true
70 |         files: binaries/**
71 | 
72 |   publish:
73 |     if: startsWith(github.ref, 'refs/tags/v')
74 |     runs-on: ubuntu-latest
75 | 
76 |     steps:
77 |     - uses: actions/checkout@v2
78 | 
79 |     - name: Set up Python
80 |       uses: actions/setup-python@v5
81 |       with:
82 |         python-version: "3.10"
83 | 
84 |     - name: Publish to PyPI
85 |       run: |
86 |         pip install --upgrade pip
87 |         pip install --upgrade setuptools wheel twine
88 |         python setup.py sdist bdist_wheel
89 |         export TWINE_USERNAME=__token__
90 |         export TWINE_PASSWORD=${{ secrets.PYPI_TOKEN }}
91 |         twine upload dist/*
92 | 


--------------------------------------------------------------------------------
/shub/deploy_reqs.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import os
 3 | import tempfile
 4 | import shutil
 5 | 
 6 | from shub import DEPLOY_DOCS_LINK
 7 | from shub.config import get_target_conf
 8 | from shub.utils import (build_and_deploy_eggs, decompress_egg_files,
 9 |                         download_from_pypi)
10 | 
11 | 
12 | HELP = """
13 | Build eggs of your project's requirements and deploy them to Scrapy Cloud.
14 | 
15 | You can either deploy to your default target (as defined in scrapinghub.yml),
16 | or explicitly supply a numerical project ID or a target defined in
17 | scrapinghub.yml (see shub deploy).
18 | 
19 | By default, requirements will be read from requirements.txt. You may supply a
20 | different file name with the -r option:
21 | 
22 |     shub deploy-reqs -r myreqs.txt
23 | 
24 | The requirements file must be in a format parsable by pip.
25 | """
26 | 
27 | SHORT_HELP = "[DEPRECATED] Build and deploy eggs from requirements.txt"
28 | 
29 | 
30 | @click.command(help=HELP, short_help=SHORT_HELP)
31 | @click.argument("target", required=False, default="default")
32 | @click.option("-r", "--requirements-file", default='requirements.txt',
33 |               type=click.STRING)
34 | def cli(target, requirements_file):
35 |     click.secho(
36 |         "deploy-reqs was deprecated, define a requirements file in your "
37 |         "scrapinghub.yml instead. See {}".format(DEPLOY_DOCS_LINK),
38 |         err=True, fg='yellow',
39 |     )
40 |     main(target, requirements_file)
41 | 
42 | 
43 | def main(target, requirements_file):
44 |     targetconf = get_target_conf(target)
45 |     requirements_full_path = os.path.abspath(requirements_file)
46 |     eggs_tmp_dir = _mk_and_cd_eggs_tmpdir()
47 |     _download_egg_files(eggs_tmp_dir, requirements_full_path)
48 |     decompress_egg_files()
49 |     build_and_deploy_eggs(targetconf.project_id, targetconf.endpoint,
50 |                           targetconf.apikey)
51 | 
52 | 
53 | def _mk_and_cd_eggs_tmpdir():
54 |     tmpdir = tempfile.mkdtemp(prefix="eggs")
55 |     os.chdir(tmpdir)
56 |     os.mkdir('eggs')
57 |     os.chdir('eggs')
58 |     return os.path.join(tmpdir, 'eggs')
59 | 
60 | 
61 | def _download_egg_files(eggs_dir, requirements_file):
62 |     editable_src_dir = tempfile.mkdtemp(prefix='pipsrc')
63 | 
64 |     click.echo('Downloading eggs...')
65 |     try:
66 |         download_from_pypi(eggs_dir, reqfile=requirements_file,
67 |                            extra_args=["--src", editable_src_dir])
68 |     finally:
69 |         shutil.rmtree(editable_src_dir, ignore_errors=True)
70 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.gitignore.io/api/vim,python
  3 | # Edit at https://www.gitignore.io/?templates=vim,python
  4 | 
  5 | ### Python ###
  6 | # Byte-compiled / optimized / DLL files
  7 | __pycache__/
  8 | *.py[cod]
  9 | *$py.class
 10 | 
 11 | # C extensions
 12 | *.so
 13 | 
 14 | # Distribution / packaging
 15 | .Python
 16 | build/
 17 | develop-eggs/
 18 | dist/
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | lib/
 23 | lib64/
 24 | parts/
 25 | sdist/
 26 | var/
 27 | wheels/
 28 | pip-wheel-metadata/
 29 | share/python-wheels/
 30 | *.egg-info/
 31 | .installed.cfg
 32 | *.egg
 33 | MANIFEST
 34 | 
 35 | # PyInstaller
 36 | #  Usually these files are written by a python script from a template
 37 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 38 | *.manifest
 39 | *.spec
 40 | 
 41 | # Installer logs
 42 | pip-log.txt
 43 | pip-delete-this-directory.txt
 44 | 
 45 | # Unit test / coverage reports
 46 | htmlcov/
 47 | .tox/
 48 | .nox/
 49 | .coverage
 50 | .coverage.*
 51 | .cache
 52 | nosetests.xml
 53 | coverage.xml
 54 | *.cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Scrapy stuff:
 63 | .scrapy
 64 | 
 65 | # Sphinx documentation
 66 | docs/_build/
 67 | 
 68 | # PyBuilder
 69 | target/
 70 | 
 71 | # pyenv
 72 | .python-version
 73 | 
 74 | # pipenv
 75 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 76 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 77 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 78 | #   install all needed dependencies.
 79 | #Pipfile.lock
 80 | 
 81 | # celery beat schedule file
 82 | celerybeat-schedule
 83 | 
 84 | # SageMath parsed files
 85 | *.sage.py
 86 | 
 87 | # Spyder project settings
 88 | .spyderproject
 89 | .spyproject
 90 | 
 91 | # Rope project settings
 92 | .ropeproject
 93 | 
 94 | # Mr Developer
 95 | .mr.developer.cfg
 96 | .project
 97 | .pydevproject
 98 | 
 99 | # mkdocs documentation
100 | /site
101 | 
102 | # mypy
103 | .mypy_cache/
104 | .dmypy.json
105 | dmypy.json
106 | 
107 | # Pyre type checker
108 | .pyre/
109 | 
110 | ### Vim ###
111 | # Swap
112 | [._]*.s[a-v][a-z]
113 | [._]*.sw[a-p]
114 | [._]s[a-rt-v][a-z]
115 | [._]ss[a-gi-z]
116 | [._]sw[a-p]
117 | 
118 | # Session
119 | Session.vim
120 | Sessionx.vim
121 | 
122 | # Temporary
123 | .netrwhist
124 | *~
125 | # Auto-generated tag files
126 | tags
127 | # Persistent undo
128 | [._]*.un~
129 | 
130 | # End of https://www.gitignore.io/api/vim,python
131 | 
132 | .vscode
133 | 


--------------------------------------------------------------------------------
/shub/image/run/wrapper.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | A helper wrapper over start-crawl to run a custom image locally.
 4 | 
 5 | The wrapper is used in `shub image run` command as an entrypoint
 6 | to create a FIFO file inside a Docker container, enforce using it
 7 | to communicate with crawl process and start the crawl process.
 8 | 
 9 | The initial version handles and prints only LOG entries to mimic
10 | Scrapy behavior when running locally, however it could be easily
11 | extended in the future.
12 | 
13 | Reading about SH custom image contract should bring you more context
14 | https://shub.readthedocs.io/en/stable/custom-images-contract.html.
15 | 
16 | FIFO based communication protocol is described well in
17 | https://doc.scrapinghub.com/scrapy-cloud-write-entrypoint.html
18 | 
19 | TODO As a custom image isn't necessarily based on Python, the wrapper
20 | should be rewritten in the future with something more basic and
21 | lightweight, to get rid of dependence on Python.
22 | """
23 | 
24 | 
25 | import os
26 | import sys
27 | import json
28 | import logging
29 | import datetime
30 | from multiprocessing import Process
31 | from shutil import which
32 | 
33 | 
34 | def _consume_from_fifo(fifo_path):
35 |     """Start reading/printing entries from FIFO."""
36 |     with open(fifo_path) as fifo:
37 |         while True:
38 |             line = fifo.readline()
39 |             # returns an empty string only in the end of the file
40 |             if not line:
41 |                 return
42 |             entry_type, entry_raw = line[:3], line[4:]
43 |             _print_fifo_entry(entry_type, json.loads(entry_raw))
44 | 
45 | 
46 | def _print_fifo_entry(message_type, message):
47 |     """Print only specific entries."""
48 |     if message_type == 'LOG':
49 |         timestamp = _millis_to_str(message['time'])
50 |         loglevel = logging.getLevelName(message['level'])
51 |         # mimic Scrapy logging format as much as possible
52 |         print('{} {} {}'.format(timestamp, loglevel, message['message']))
53 | 
54 | 
55 | def _millis_to_str(millis):
56 |     """Convert a datatime in ms to a formatted string."""
57 |     datetime_ts = datetime.datetime.fromtimestamp(millis / 1000.0)
58 |     return datetime_ts.strftime('%Y-%m-%d %H:%M:%S')
59 | 
60 | 
61 | def main():
62 |     """Main wrapper entrypoint."""
63 |     # create a named pipe for communication
64 |     fifo_path = os.environ.get('SHUB_FIFO_PATH')
65 |     os.mkfifo(fifo_path)
66 |     # create and start a consumer process to read from the fifo:
67 |     # non-daemon to allow it to finish reading from pipe before exit.
68 |     Process(target=_consume_from_fifo, args=[fifo_path]).start()
69 |     # replace current process with original start-crawl
70 |     os.execv(which('start-crawl'), sys.argv)
71 | 
72 | 
73 | if __name__ == '__main__':
74 |     sys.exit(main())
75 | 


--------------------------------------------------------------------------------
/docs/scheduling.rst:
--------------------------------------------------------------------------------
 1 | .. _scheduling:
 2 | 
 3 | =====================================
 4 | Scheduling jobs and fetching job data
 5 | =====================================
 6 | 
 7 | shub allows you to schedule a spider run from the command line::
 8 | 
 9 |     shub schedule SPIDER
10 | 
11 | where ``SPIDER`` should match the spider's name. By default, shub will schedule
12 | the spider in your default project (as defined in ``scrapinghub.yml``). You may
13 | also explicitly specify the project to use::
14 | 
15 |     shub schedule project_alias_or_id/SPIDER
16 | 
17 | You can supply spider arguments and job-specific settings through the ``-a``
18 | and ``-s`` options::
19 | 
20 |     $ shub schedule myspider -a ARG1=VALUE -a ARG2=VALUE
21 |     Spider myspider scheduled, job ID: 12345/2/15
22 |     Watch the log on the command line:
23 |         shub log -f 2/15
24 |     or print items as they are being scraped:
25 |         shub items -f 2/15
26 |     or watch it running in Scrapinghub's web interface:
27 |         https://app.zyte.com/p/12345/job/2/15
28 | 
29 | ::
30 | 
31 |     $ shub schedule 33333/myspider -s LOG_LEVEL=DEBUG
32 |     Spider myspider scheduled, job ID: 33333/2/15
33 |     Watch the log on the command line:
34 |         shub log -f 2/15
35 |     or print items as they are being scraped:
36 |         shub items -f 2/15
37 |     or watch it running in Scrapinghub's web interface:
38 |         https://app.zyte.com/p/33333/job/2/15
39 | 
40 | You can also specify the amount of Scrapy Cloud units (``-u``) and the priority (``-p``)::
41 | 
42 |     $ shub schedule myspider -p 3 -u 3
43 |     Spider myspider scheduled, job ID: 12345/2/16
44 |     Watch the log on the command line:
45 |         shub log -f 2/16
46 |     or print items as they are being scraped:
47 |         shub items -f 2/16
48 |     or watch it running in Scrapinghub's web interface:
49 |         https://app.zyte.com/p/12345/job/2/16
50 | 
51 | shub provides commands to retrieve log entries, scraped items, or requests from
52 | jobs. If the job is still running, you can provide the ``-f`` (follow) option
53 | to receive live updates::
54 | 
55 |     $ shub log -f 2/15
56 |     2016-01-02 16:38:35 INFO Log opened.
57 |     2016-01-02 16:38:35 INFO [scrapy.log] Scrapy 1.0.3.post6+g2d688cd started
58 |     ...
59 |     # shub will keep updating the log until the job finishes or you hit CTRL+C
60 | 
61 | ::
62 | 
63 |     $ shub items 2/15
64 |     {"name": "Example product", description": "Example description"}
65 |     {"name": "Another product", description": "Another description"}
66 | 
67 | ::
68 | 
69 |     $ shub requests 1/1/1
70 |     {"status": 200, "fp": "1ff11f1543809f1dbd714e3501d8f460b92a7a95", "rs": 138137, "_key": "1/1/1/0", "url": "http://blog.scrapinghub.com", "time": 1449834387621, "duration": 238, "method": "GET"}
71 |     {"status": 200, "fp": "418a0964a93e139166dbf9b33575f10f31f17a1", "rs": 138137, "_key": "1/1/1/0", "url": "http://blog.scrapinghub.com", "time": 1449834390881, "duration": 163, "method": "GET"}
72 | 


--------------------------------------------------------------------------------
/shub/cancel.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from scrapinghub import ScrapinghubAPIError
 4 | from scrapinghub.client.utils import parse_job_key
 5 | 
 6 | from shub.utils import get_scrapinghub_client_from_config
 7 | from shub.config import get_target_conf
 8 | from shub.exceptions import (
 9 |     ShubException,
10 |     BadParameterException,
11 |     SubcommandException,
12 | )
13 | 
14 | 
15 | HELP = """
16 | Cancel multiple jobs from Scrapy Cloud.
17 | 
18 | The cancel command expects the project ID (target) followed by
19 | the pair containing the spider ID and Job ID:
20 | 
21 |     shub cancel 12345 1/1 1/2 1/3
22 | 
23 | If the project ID is not defined it is going to use the default
24 | project (as defined in scrapinghub.yml):
25 | 
26 |     shub cancel 1/1 1/2 1/3
27 | 
28 | The cancel command requires a confirmation that could be skipped
29 | with the flag --force/-f:
30 | 
31 |     shub cancel --force 1/1 1/2 1/3
32 | """
33 | 
34 | 
35 | SHORT_HELP = "Cancel multiple jobs from Scrapy Cloud"
36 | 
37 | 
38 | @click.command(help=HELP, short_help=SHORT_HELP)
39 | @click.argument("target_or_key")
40 | @click.argument("keys", nargs=-1)
41 | @click.option('--force', '-f', is_flag=True,
42 |               help='It ignores the confirmation prompt')
43 | def cli(target_or_key, keys, force):
44 |     # target_or_key contains a target or just another job key
45 |     if "/" in target_or_key:
46 |         keys = (target_or_key,) + keys
47 |         target = "default"
48 |     else:
49 |         target = target_or_key
50 | 
51 |     targetconf = get_target_conf(target)
52 |     project_id = targetconf.project_id
53 |     client = get_scrapinghub_client_from_config(targetconf)
54 |     project = client.get_project(project_id)
55 | 
56 |     try:
57 |         job_keys = [validate_job_key(project_id, key) for key in keys]
58 |     except (BadParameterException, SubcommandException) as err:
59 |         click.echo('Error during keys validation: %s' % str(err))
60 |         exit(1)
61 | 
62 |     if not force:
63 |         jobs_str = ", ".join([str(job) for job in job_keys])
64 |         click.confirm(
65 |             'Do you want to cancel these %s jobs? \n\n%s \n\nconfirm?'
66 |             % (len(job_keys), jobs_str),
67 |             abort=True
68 |         )
69 | 
70 |     try:
71 |         output = project.jobs.cancel(
72 |             keys=[str(job) for job in job_keys]
73 |         )
74 |     except (ValueError, ScrapinghubAPIError) as err:
75 |         raise ShubException(str(err))
76 | 
77 |     click.echo(output)
78 | 
79 | 
80 | def validate_job_key(project_id, short_key):
81 |     job_key = f"{project_id}/{short_key}"
82 | 
83 |     if len(short_key.split("/")) != 2:
84 |         raise BadParameterException(
85 |             "keys must be defined as <spider_id>/<job_id>"
86 |         )
87 | 
88 |     try:
89 |         return parse_job_key(job_key)
90 |     except ValueError as err:
91 |         raise BadParameterException(str(err))
92 |     except Exception as err:
93 |         raise SubcommandException(str(err))
94 | 


--------------------------------------------------------------------------------
/shub/copy_eggs.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from urllib.parse import urljoin
 3 | from tempfile import mkdtemp
 4 | import click
 5 | import requests
 6 | from shutil import rmtree
 7 | 
 8 | from shub.config import get_target_conf
 9 | from shub.fetch_eggs import fetch_eggs
10 | from shub.utils import decompress_egg_files, _deploy_dependency_egg
11 | 
12 | SHORT_HELP = "Sync eggs from one project with other project"
13 | 
14 | HELP = SHORT_HELP + """
15 | 
16 | Fetch all eggs from one project and upload them to other project. This allows
17 | you to easily clone requirements from an old project into a new one."""
18 | 
19 | 
20 | @click.command(help=HELP, short_help=SHORT_HELP)
21 | @click.option("--source_project",
22 |               prompt="From which projects should I download eggs?")
23 | @click.option("--new_project",
24 |               prompt="To which project should I upload eggs?")
25 | @click.option("-m", "--copy-main", default=False, is_flag=True,
26 |               help="copy main Scrapy project egg")
27 | def cli(source_project, new_project, copy_main):
28 |     source = get_target_conf(source_project)
29 |     target = get_target_conf(new_project)
30 |     copy_eggs(source.project_id, source.endpoint, source.apikey,
31 |               target.project_id, target.endpoint, target.apikey,
32 |               copy_main)
33 | 
34 | 
35 | def copy_eggs(project, endpoint, apikey, new_project, new_endpoint, new_apikey,
36 |               copy_main):
37 |     egg_versions = get_eggs_versions(project, endpoint, apikey)
38 |     temp_dir = mkdtemp()
39 |     destfile = os.path.join(temp_dir, 'eggs-%s.zip' % project)
40 |     fetch_eggs(project, endpoint, apikey, destfile)
41 |     # Decompress project bundle (so temp_dir will contain all project eggs)
42 |     decompress_egg_files(directory=temp_dir)
43 |     destdir = os.path.join(temp_dir, f"eggs-{project}")
44 |     for egg_name in os.listdir(destdir):
45 |         if egg_name == "__main__.egg" and not copy_main:
46 |             continue
47 |         name = egg_name.partition(".egg")[0]
48 |         try:
49 |             version = egg_versions[name]
50 |         except KeyError:
51 |             click.secho(
52 |                 "WARNING: The following egg belongs to a Dash Addon: %s. "
53 |                 "Please manually enable the corresponding Addon in the target "
54 |                 "project." % name,
55 |                 fg='yellow',
56 |                 bold=True,
57 |             )
58 |             continue
59 |         egg_path = os.path.join(destdir, egg_name)
60 |         egg_info = (egg_name, egg_path)
61 |         _deploy_dependency_egg(new_project, new_endpoint, new_apikey,
62 |                                name=name, version=version, egg_info=egg_info)
63 |     rmtree(temp_dir)
64 | 
65 | 
66 | def get_eggs_versions(project, endpoint, apikey):
67 |     click.echo(f'Getting eggs list from project {project}...')
68 |     list_endpoint = urljoin(endpoint, "eggs/list.json")
69 |     response = requests.get(list_endpoint, params={"project": project},
70 |                             auth=(apikey, ''))
71 |     response.raise_for_status()
72 |     obj = response.json()
73 |     return {x['name']: x['version'] for x in obj['eggs']}
74 | 


--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import re
 3 | from unittest import mock
 4 | 
 5 | from click.testing import CliRunner
 6 | from tqdm.utils import _supports_unicode
 7 | 
 8 | from shub import config
 9 | 
10 | 
11 | class AssertInvokeRaisesMixin:
12 |     def assertInvokeRaises(self, exc, *args, **kwargs):
13 |         """
14 |         Invoke self.runner (or a new runner if nonexistent) with given *args
15 |         and **kwargs, assert that it raised an exception of type exc, and
16 |         return the runner's result.
17 |         """
18 |         runner = getattr(self, 'runner', None) or CliRunner()
19 |         kwargs['standalone_mode'] = False
20 |         result = runner.invoke(*args, **kwargs)
21 |         self.assertIsInstance(result.exception, exc)
22 |         return result
23 | 
24 | 
25 | def mock_conf(testcase, target=None, attr=None, conf=None):
26 |     if not conf:
27 |         conf = config.ShubConfig()
28 |         conf.projects.update({
29 |             'default': 1,
30 |             'prod': 2,
31 |             'vagrant': 'vagrant/3',
32 |             'custom1': {'id': 4, 'image': False},
33 |             'custom2': {'id': 5, 'image': True},
34 |             'custom3': {'id': 6, 'image': 'custom/image'},
35 |         })
36 |         conf.endpoints.update({
37 |             'vagrant': 'https://vagrant_ep/api/',
38 |         })
39 |         conf.apikeys.update({
40 |             'default': 32 * '1',
41 |             'vagrant': 32 * '2',
42 |         })
43 |         conf.version = 'version'
44 |     if target:
45 |         if attr:
46 |             patcher = mock.patch.object(target, attr, return_value=conf,
47 |                                         autospec=True)
48 |         else:
49 |             patcher = mock.patch(target, return_value=conf, autospec=True)
50 |     else:
51 |         patcher = mock.patch('shub.config.load_shub_config', return_value=conf,
52 |                              autospec=True)
53 |     patcher.start()
54 |     testcase.addCleanup(patcher.stop)
55 |     return conf
56 | 
57 | 
58 | def _is_tqdm_in_ascii_mode():
59 |     """Small helper deciding about placeholders for tqdm progress bars."""
60 |     with CliRunner().isolation():
61 |         return not _supports_unicode(sys.stdout)
62 | 
63 | 
64 | def format_expected_progress(progress):
65 |     """Replace unicode symbols in progress string for tqdm in ascii mode."""
66 |     if _is_tqdm_in_ascii_mode():
67 |         to_replace = {'█': '#', '▏': '2', '▎': '3', '▌': '5', '▋': '6'}
68 |         for sym in to_replace:
69 |             progress = progress.replace(sym, to_replace[sym])
70 |     return progress
71 | 
72 | 
73 | def clean_progress_output(output):
74 |     """Return output cleaned from \\r, \\n, and ANSI escape sequences"""
75 |     return re.sub(
76 |         r"""(?x)      # Matches:
77 |         \n|\r|        # 1. newlines or carriage returns, or
78 |         (\x1b\[|\x9b) # 2. ANSI control sequence introducer ("ESC[" or single
79 |                       #    byte \x9b) +
80 |         [^@-_]*[@-_]| #    private mode characters + command character, or
81 |         \x1b[@-_]     # 3. ANSI control codes without sequence introducer
82 |                       #    ("ESC"  + single command character)
83 |         """,
84 |         '', output)
85 | 


--------------------------------------------------------------------------------
/shub/image/upload.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from shub.config import list_targets_callback
 4 | from shub.image import build
 5 | from shub.image import push
 6 | from shub.image import deploy
 7 | from shub.image import utils
 8 | 
 9 | 
10 | SHORT_HELP = "Shortcut command for build-push-deploy chain"
11 | HELP = """
12 | Upload command is a handy shortcut to rebuild and redeploy your project
13 | (in other words it does consecutive calls of build-push-deploy cmds).
14 | 
15 | Obviously it accepts all the options for the commands above.
16 | """
17 | 
18 | 
19 | @click.command(help=HELP, short_help=SHORT_HELP)
20 | @click.argument("target", required=False, default="default")
21 | @click.option("-l", "--list-targets", is_flag=True, is_eager=True,
22 |               expose_value=False, callback=list_targets_callback,
23 |               help="List available project names defined in your config")
24 | @click.option("-d", "--debug", help="debug mode", is_flag=True,
25 |               callback=utils.deprecate_debug_parameter)
26 | @click.option("-v", "--verbose", is_flag=True,
27 |               help="stream upload logs to console")
28 | @click.option("-V", "--version", help="release version")
29 | @click.option("--username", help="docker registry name")
30 | @click.option("--password", help="docker registry password")
31 | @click.option("--email", help="docker registry email")
32 | @click.option("--apikey", help="SH apikey to use built-in registry")
33 | @click.option("--insecure", is_flag=True, help="use insecure registry")
34 | @click.option("--async", "async_", is_flag=True, help="[DEPRECATED] enable asynchronous mode",
35 |               callback=utils.deprecate_async_parameter)
36 | @click.option("-S", "--skip-tests", help="skip testing image", is_flag=True)
37 | @click.option("-R", "--reauth", is_flag=True,
38 |               help="re-authenticate to registry before pushing")
39 | @click.option("-n", "--no-cache", is_flag=True,
40 |               help="Do not use cache when building the image")
41 | @click.option("-b", "--build-arg", multiple=True,
42 |               help="Allow to pass build arguments to docker client.")
43 | @click.option("-f", "--file", "filename", default='Dockerfile',
44 |               help="Name of the Dockerfile (Default is 'PATH/Dockerfile')")
45 | def cli(target, debug, verbose, version, username, password, email,
46 |         apikey, insecure, async_, skip_tests, reauth, no_cache, build_arg, filename):
47 |     upload_cmd(target, version, username, password, email, apikey, insecure,
48 |                async_, skip_tests, reauth, no_cache, build_arg, filename)
49 | 
50 | 
51 | def upload_cmd(target, version, username=None, password=None, email=None,
52 |                apikey=None, insecure=False, async_=False, skip_tests=False,
53 |                reauth=False, no_cache=False, build_arg=(), filename='Dockerfile'):
54 |     build.build_cmd(target, version, skip_tests, no_cache, build_arg, filename=filename)
55 |     # skip tests for push command anyway because they run in build command if not skipped
56 |     push.push_cmd(target, version, username, password, email, apikey,
57 |                   insecure, skip_tests=True, reauth=reauth)
58 |     deploy.deploy_cmd(target, version, username, password, email,
59 |                       apikey, insecure, async_)
60 | 


--------------------------------------------------------------------------------
/shub/migrate_eggs.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import zipfile
  3 | 
  4 | import errno
  5 | 
  6 | from shub.compat import to_unicode
  7 | from urllib.parse import urljoin
  8 | 
  9 | from io import BytesIO
 10 | 
 11 | import click
 12 | import requests
 13 | 
 14 | from shub.config import get_target_conf, ShubConfig
 15 | 
 16 | HELP = """
 17 | Migrate eggs stored in Dash's "Code & Deploy" section.
 18 | 
 19 | Eggs that are available in PYPI will be stored in requirements.txt file.
 20 | The rest will be stored in user provided directory and send to Dash
 21 | for each deployment.
 22 | 
 23 | After the operation is completed, please review changes made to
 24 | scrapinghub.yml and requirements.txt files.
 25 | """
 26 | 
 27 | SHORT_HELP = "Migrate dash eggs to requirements.txt and project's directory"
 28 | 
 29 | 
 30 | @click.command(help=HELP, short_help=SHORT_HELP)
 31 | @click.argument("target", required=False, default='default')
 32 | def cli(target):
 33 |     main(target)
 34 | 
 35 | 
 36 | def main(target):
 37 |     targetconf = get_target_conf(target)
 38 | 
 39 |     url = urljoin(targetconf.endpoint, 'migrate-eggs.zip')
 40 |     params = {'project': targetconf.project_id}
 41 |     auth = (targetconf.apikey, '')
 42 | 
 43 |     response = requests.get(url, auth=auth, params=params, stream=True)
 44 | 
 45 |     with zipfile.ZipFile(BytesIO(response.content), 'r') as mfile:
 46 |         Migrator(mfile).start()
 47 | 
 48 | 
 49 | class Migrator:
 50 |     def __init__(self, mfile):
 51 |         self.mfile = mfile
 52 |         self.sh_yml = './scrapinghub.yml'
 53 |         self.conf = ShubConfig()
 54 |         self.conf.load_file(self.sh_yml)
 55 | 
 56 |         self.req_content = to_unicode(self.mfile.read('requirements.txt'))
 57 |         self.eggs = []
 58 | 
 59 |         for filename in self.mfile.namelist():
 60 |             if filename.endswith('.egg'):
 61 |                 self.eggs.append(filename)
 62 | 
 63 |     def start(self):
 64 |         if self.eggs:
 65 |             self.migrate_eggs()
 66 | 
 67 |         self.migrate_requirements_txt()
 68 | 
 69 |         self.conf.save(self.sh_yml)
 70 | 
 71 |     def migrate_eggs(self):
 72 |         eggsdir = './eggs'
 73 |         msg = f"Eggs will be stored in {eggsdir}, are you sure ? "
 74 |         click.confirm(msg)
 75 |         try:
 76 |             os.mkdir(eggsdir)
 77 |         except OSError as e:
 78 |             if e.errno != errno.EEXIST:
 79 |                 raise
 80 | 
 81 |         for filename in self.eggs:
 82 |             filepath = os.path.join(eggsdir, filename)
 83 |             if filepath in self.conf.eggs:
 84 |                 continue
 85 | 
 86 |             self.conf.eggs.append(filepath)
 87 |             self.mfile.extract(filename, eggsdir)
 88 | 
 89 |     def migrate_requirements_txt(self):
 90 |         req_file = self.conf.requirements_file or './requirements.txt'
 91 | 
 92 |         if os.path.isfile(req_file):
 93 |             y = click.confirm(
 94 |                 'requirements.txt already exists, '
 95 |                 'are you sure to override it ?'
 96 |             )
 97 |             if not y:
 98 |                 click.echo('Aborting')
 99 |                 return
100 | 
101 |         self.conf.requirements_file = req_file
102 | 
103 |         with open(self.conf.requirements_file, 'w') as reqfile:
104 |             reqfile.write(self.req_content)
105 | 


--------------------------------------------------------------------------------
/tests/image/test_test.py:
--------------------------------------------------------------------------------
 1 | from unittest import mock
 2 | 
 3 | import pytest
 4 | from click.testing import CliRunner
 5 | 
 6 | from shub import exceptions as shub_exceptions
 7 | from shub.image.test import (
 8 |     cli, _run_docker_command, _check_image_size, _check_start_crawl_entry,
 9 |     IMAGE_SIZE_LIMIT,
10 | )
11 | 
12 | from .utils import FakeProjectDirectory
13 | from .utils import add_sh_fake_config
14 | 
15 | 
16 | class MockedNotFound(Exception):
17 |     """Mocking docker.errors.NotFound"""
18 | 
19 | 
20 | @pytest.fixture
21 | def docker_client():
22 |     client = mock.Mock()
23 |     client.create_container.return_value = {'Id': '12345'}
24 |     client.wait.return_value = {'Error': None, 'StatusCode': 0}
25 |     client.logs.return_value = 'some-logs'
26 |     return client
27 | 
28 | 
29 | def test_test_cli(monkeypatch, docker_client):
30 |     """ This test mocks docker library to test the function itself """
31 |     monkeypatch.setattr('docker.errors.NotFound', MockedNotFound)
32 |     monkeypatch.setattr('shub.image.utils.get_docker_client',
33 |                         lambda *args, **kwargs: docker_client)
34 |     with FakeProjectDirectory() as tmpdir:
35 |         add_sh_fake_config(tmpdir)
36 |         runner = CliRunner()
37 |         result = runner.invoke(
38 |             cli, ["dev", "-v", "--version", "test"])
39 |         assert result.exit_code == 0
40 | 
41 | 
42 | def test_check_image_exists(monkeypatch, docker_client):
43 |     assert _check_image_size('img', docker_client) is None
44 | 
45 |     monkeypatch.setattr('docker.errors.NotFound', MockedNotFound)
46 |     docker_client.inspect_image.side_effect = MockedNotFound
47 |     with pytest.raises(shub_exceptions.NotFoundException):
48 |         _check_image_size('image', docker_client)
49 | 
50 | 
51 | def test_check_image_size(monkeypatch, docker_client):
52 |     docker_client.inspect_image.return_value = {'Size': IMAGE_SIZE_LIMIT}
53 |     assert _check_image_size('img', docker_client) is None
54 | 
55 |     docker_client.inspect_image.return_value = {'Size': IMAGE_SIZE_LIMIT + 1}
56 |     with pytest.raises(shub_exceptions.CustomImageTooLargeException):
57 |         _check_image_size('image', docker_client)
58 | 
59 | 
60 | def test_start_crawl(docker_client):
61 |     assert _check_start_crawl_entry('image', docker_client) is None
62 |     docker_client.create_container.assert_called_with(
63 |         image='image', command=['which', 'start-crawl'])
64 |     docker_client.wait.return_value = {'Error': None, 'StatusCode': 1}
65 |     with pytest.raises(shub_exceptions.NotFoundException):
66 |         _check_start_crawl_entry('image', docker_client)
67 | 
68 |     docker_client.wait.return_value = {'Error': None, 'StatusCode': 0}
69 |     docker_client.logs.return_value = ''
70 |     with pytest.raises(shub_exceptions.NotFoundException):
71 |         _check_start_crawl_entry('image', docker_client)
72 | 
73 | 
74 | def test_run_docker_command(docker_client):
75 |     assert _run_docker_command(
76 |         docker_client, 'image-name', ['some', 'cmd']) == \
77 |             (0, 'some-logs')
78 |     docker_client.create_container.assert_called_with(
79 |         image='image-name', command=['some', 'cmd'])
80 |     docker_client.start.assert_called_with({'Id': '12345'})
81 |     docker_client.wait.assert_called_with(container='12345')
82 |     docker_client.logs.assert_called_with(
83 |         container='12345', stdout=True, stderr=False,
84 |         stream=False, timestamps=False)
85 |     docker_client.remove_container.assert_called_with({'Id': '12345'})
86 | 


--------------------------------------------------------------------------------
/shub/exceptions.py:
--------------------------------------------------------------------------------
  1 | """
  2 | shub-specific exceptions.
  3 | 
  4 | Exit codes follow the sysexits.h convention:
  5 | https://www.freebsd.org/cgi/man.cgi?query=sysexits&sektion=3
  6 | """
  7 | 
  8 | 
  9 | import sys
 10 | import warnings
 11 | 
 12 | from click import BadParameter, ClickException
 13 | 
 14 | 
 15 | class ShubException(ClickException):
 16 |     def __init__(self, msg=None):
 17 |         super().__init__(msg or self.default_msg)
 18 | 
 19 | 
 20 | class MissingAuthException(ShubException):
 21 |     # EX_NOPERM would be more appropriate here but would forbid distinguishing
 22 |     # this from InvalidAuth by exit code
 23 |     exit_code = 67  # EX_NOUSER
 24 |     default_msg = "Not logged in. Please run 'shub login' first."
 25 | 
 26 | 
 27 | class InvalidAuthException(ShubException):
 28 |     exit_code = 77  # EX_NOPERM
 29 |     default_msg = ("Authentication failure. Please make sure that your API key"
 30 |                    " is valid.")
 31 | 
 32 | 
 33 | class AlreadyLoggedInException(ShubException):
 34 |     exit_code = 0
 35 |     default_msg = ("You are already logged in. To change credentials, use "
 36 |                    "'shub logout' first.")
 37 | 
 38 | 
 39 | class ConfigParseException(ShubException):
 40 |     exit_code = 65  # EX_DATAERR
 41 |     default_msg = "Unable to parse configuration."
 42 | 
 43 | 
 44 | class BadConfigException(ShubException):
 45 |     exit_code = 78  # EX_CONFIG
 46 |     # Should be initialised with more specific message
 47 |     default_msg = "Please check your scrapinghub.yml."
 48 | 
 49 | 
 50 | class NotFoundException(ShubException):
 51 |     # Should be initialised with more specific message
 52 |     exit_code = 69  # EX_UNAVAILABLE
 53 |     default_msg = "Not found."
 54 | 
 55 | 
 56 | class BadParameterException(BadParameter):
 57 |     exit_code = 64  # EX_USAGE
 58 | 
 59 | 
 60 | class SubcommandException(ShubException):
 61 |     exit_code = 65  # EX_DATAERR
 62 |     default_msg = "Error while calling subcommand."
 63 | 
 64 | 
 65 | class RemoteErrorException(ShubException):
 66 |     exit_code = 76  # EX_PROTOCOL
 67 |     # Should be initialised with more specific message
 68 |     default_msg = "Remote error."
 69 | 
 70 | 
 71 | class DeployRequestTooLargeException(ShubException):
 72 |     exit_code = 65  # EX_DATAERR
 73 |     default_msg = ("Deploy request is too large. Please make sure that your "
 74 |                    "project egg(s) size is less than 50MB in total.")
 75 | 
 76 | 
 77 | class CustomImageTooLargeException(ShubException):
 78 |     exit_code = 65  # EX_DATAERR
 79 |     default_msg = ("Custom Docker image is too large. Please make sure that "
 80 |                    "your image size is less than 3GB.")
 81 | 
 82 | 
 83 | class ShubWarning(Warning):
 84 |     """Base class for custom warnings."""
 85 | 
 86 | 
 87 | class ShubDeprecationWarning(ShubWarning):
 88 |     """Warning category for deprecated features, since the default
 89 |     DeprecationWarning is silenced on Python 2.7+
 90 |     """
 91 | 
 92 | 
 93 | def print_warning(msg, category=ShubWarning):
 94 |     """Helper to use Python warnings with custom formatter."""
 95 | 
 96 |     def custom_showwarning(message, *args, **kwargs):
 97 |         # ignore everything except the message
 98 |         try:
 99 |             sys.stderr.write("WARNING: " + str(message) + '\n')
100 |         # stderr is invalid - this warning just gets lost
101 |         except (OSError, UnicodeError):
102 |             pass
103 | 
104 |     old_showwarning = warnings.showwarning
105 |     try:
106 |         warnings.showwarning = custom_showwarning
107 |         warnings.warn(msg, category=category)
108 |     finally:
109 |         warnings.showwarning = old_showwarning
110 | 


--------------------------------------------------------------------------------
/tests/test_jobresource.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import time
 3 | import json
 4 | from unittest import mock
 5 | 
 6 | from click.testing import CliRunner
 7 | 
 8 | from shub import items, log, requests
 9 | 
10 | 
11 | class JobResourceTest(unittest.TestCase):
12 | 
13 |     def setUp(self):
14 |         self.runner = CliRunner()
15 | 
16 |     def _test_prints_objects(self, cmd_mod, resource_name):
17 |         objects = ['Object 1', 'Object 2']
18 |         jobid = '1/2/3'
19 |         with mock.patch.object(cmd_mod, 'get_job', autospec=True) as mock_gj:
20 |             # Patch job.items.iter_json() to return our objects
21 |             mock_gj.return_value._metadata_updated = time.time()
22 |             mock_resource = getattr(mock_gj.return_value, resource_name)
23 |             mock_resource.iter_json.return_value = objects
24 |             result = self.runner.invoke(cmd_mod.cli, (jobid,))
25 |             mock_gj.assert_called_once_with(jobid)
26 |             self.assertIn("\n".join(objects), result.output)
27 | 
28 |     def _test_forwards_follow(self, cmd_mod):
29 |         with mock.patch.object(cmd_mod, 'get_job'), \
30 |              mock.patch.object(cmd_mod, 'job_resource_iter', autospec=True) \
31 |              as mock_jri:
32 |             self.runner.invoke(cmd_mod.cli, ('1/2/3',))
33 |             self.assertFalse(mock_jri.call_args[1]['follow'])
34 |             self.runner.invoke(cmd_mod.cli, ('1/2/3', '-f'))
35 |             self.assertTrue(mock_jri.call_args[1]['follow'])
36 | 
37 |     def test_items(self):
38 |         self._test_prints_objects(items, 'items')
39 |         self._test_forwards_follow(items)
40 | 
41 |     def test_requests(self):
42 |         self._test_prints_objects(requests, 'requests')
43 |         self._test_forwards_follow(requests)
44 | 
45 |     def test_log(self):
46 |         objects = [
47 |             {'time': 0, 'level': 20, 'message': 'message 1'},
48 |             {'time': 1450874471000, 'level': 50, 'message': 'message 2'},
49 |         ]
50 |         jobid = '1/2/3'
51 |         with mock.patch.object(log, 'get_job', autospec=True) as mock_gj:
52 |             mock_gj.return_value._metadata_updated = time.time()
53 |             mock_gj.return_value.logs.iter_values.return_value = objects
54 |             result = self.runner.invoke(log.cli, (jobid,))
55 |             mock_gj.assert_called_once_with(jobid)
56 |             self.assertIn('1970-01-01 00:00:00 INFO message 1', result.output)
57 |             self.assertIn('2015-12-23 12:41:11 CRITICAL message 2', result.output)
58 |         with mock.patch.object(log, 'get_job', autospec=True) as mock_gj:
59 |             with mock.patch.object(log, 'job_resource_iter', autospec=True) as mock_res_iter:
60 |                 mock_res_iter.return_value = [json.dumps(x) for x in objects]
61 |                 result = self.runner.invoke(log.cli, (jobid, '--json'))
62 |                 self.assertTrue(mock_res_iter.call_args[1].get('output_json'))
63 |                 for idx, line in enumerate(result.output.splitlines()):
64 |                     self.assertEqual(json.loads(line), objects[idx])
65 |         self._test_forwards_follow(log)
66 | 
67 |     def test_log_unicode(self):
68 |         objects = [
69 |             {'time': 0, 'level': 20, 'message': 'jarzębina'}
70 |         ]
71 |         jobid = '1/2/3'
72 |         with mock.patch.object(log, 'get_job', autospec=True) as mock_gj:
73 |             mock_gj.return_value._metadata_updated = time.time()
74 |             mock_gj.return_value.logs.iter_values.return_value = objects
75 |             result = self.runner.invoke(log.cli, (jobid,))
76 |             mock_gj.assert_called_once_with(jobid)
77 |             self.assertIn('1970-01-01 00:00:00 INFO jarzębina', result.output)
78 | 


--------------------------------------------------------------------------------
/tests/test_cancel.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from collections import namedtuple
  3 | from unittest import mock
  4 | 
  5 | from click.testing import CliRunner
  6 | 
  7 | from shub import cancel
  8 | from shub.exceptions import (
  9 |     BadParameterException,
 10 |     ShubException,
 11 | )
 12 | 
 13 | from .utils import AssertInvokeRaisesMixin, mock_conf
 14 | 
 15 | 
 16 | class CancelTest(AssertInvokeRaisesMixin, unittest.TestCase):
 17 | 
 18 |     def setUp(self):
 19 |         self.runner = CliRunner()
 20 |         self.conf = mock_conf(self)
 21 | 
 22 |     @mock.patch('shub.cancel.get_scrapinghub_client_from_config')
 23 |     def test_simple_cancel_call(self, mock_client):
 24 |         client = mock_client.return_value
 25 |         mock_proj = client.get_project.return_value
 26 |         mock_proj.jobs.cancel.return_value = {'count': 2}
 27 | 
 28 |         result = self.runner.invoke(
 29 |             cancel.cli, ('123456', '1/1', '1/2',), input='y\n'
 30 |         )
 31 | 
 32 |         self.assertTrue("{'count': 2}" in result.output)
 33 |         self.assertEqual(0, result.exit_code)
 34 |         self.assertEqual(mock_proj.jobs.cancel.call_count, 1)
 35 |         mock_proj.jobs.cancel.assert_called_with(
 36 |             keys=['123456/1/1', '123456/1/2']
 37 |         )
 38 | 
 39 |     @mock.patch('shub.cancel.get_target_conf')
 40 |     @mock.patch('shub.cancel.get_scrapinghub_client_from_config')
 41 |     def test_cancel_default_project(self, mock_client, targetconf):
 42 |         client = mock_client.return_value
 43 |         mock_proj = client.get_project.return_value
 44 |         mock_proj.jobs.cancel.return_value = {'count': 2}
 45 | 
 46 |         Target = namedtuple('Target', 'project_id')
 47 |         targetconf.return_value = Target(project_id='123456')
 48 | 
 49 |         result = self.runner.invoke(
 50 |             cancel.cli, ('1/1', '1/2',), input='y\n'
 51 |         )
 52 | 
 53 |         self.assertTrue("{'count': 2}" in result.output)
 54 |         self.assertEqual(0, result.exit_code)
 55 |         self.assertEqual(mock_proj.jobs.cancel.call_count, 1)
 56 |         mock_proj.jobs.cancel.assert_called_with(
 57 |             keys=['123456/1/1', '123456/1/2']
 58 |         )
 59 | 
 60 |     @mock.patch('shub.cancel.get_scrapinghub_client_from_config')
 61 |     def test_invalid_job_key(self, mock_client):
 62 |         self.assertInvokeRaises(
 63 |             SystemExit,
 64 |             cancel.cli,
 65 |             ('123456', '1/1', '1',),
 66 |             input='y\n'
 67 |         )
 68 | 
 69 |         self.assertInvokeRaises(
 70 |             SystemExit,
 71 |             cancel.cli,
 72 |             ('123456', '1/abc', '1',),
 73 |             input='y\n'
 74 |         )
 75 | 
 76 |     @mock.patch('shub.cancel.get_scrapinghub_client_from_config')
 77 |     def test_cancel_failure(self, mock_client):
 78 |         client = mock_client.return_value
 79 |         mock_proj = client.get_project.return_value
 80 |         mock_proj.jobs.cancel.side_effect = ValueError('Error msg')
 81 | 
 82 |         self.assertInvokeRaises(
 83 |             ShubException,
 84 |             cancel.cli,
 85 |             ('123456', '1/1', '1/2',),
 86 |             input='y\n',
 87 |         )
 88 | 
 89 |     @mock.patch('shub.cancel.get_scrapinghub_client_from_config')
 90 |     def test_cancel_abort(self, mock_client):
 91 |         client = mock_client.return_value
 92 |         client.get_project.return_value
 93 | 
 94 |         result = self.runner.invoke(
 95 |             cancel.cli, ('123456', '1/1', '1/2',), input='N\n',
 96 |         )
 97 |         self.assertTrue('Aborted!' in result.output)
 98 | 
 99 |     def test_validate_job_key(self):
100 |         with self.assertRaises(BadParameterException):
101 |             cancel.validate_job_key('123456', '1')
102 | 
103 |         with self.assertRaises(BadParameterException):
104 |             cancel.validate_job_key('123456', '1/abc')
105 | 
106 |         with self.assertRaises(BadParameterException):
107 |             cancel.validate_job_key('123456', '')
108 | 


--------------------------------------------------------------------------------
/tests/test_login.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import textwrap
  3 | from unittest.mock import patch, MagicMock
  4 | 
  5 | import yaml
  6 | from click.testing import CliRunner
  7 | from yaml import CLoader as Loader
  8 | 
  9 | from shub import login
 10 | from shub.exceptions import AlreadyLoggedInException
 11 | 
 12 | from .utils import AssertInvokeRaisesMixin
 13 | 
 14 | 
 15 | VALID_KEY = 32 * '1'
 16 | 
 17 | 
 18 | @patch('shub.login.GLOBAL_SCRAPINGHUB_YML_PATH', new='.scrapinghub.yml')
 19 | @patch('shub.config.GLOBAL_SCRAPINGHUB_YML_PATH', new='.scrapinghub.yml')
 20 | @patch('shub.config.NETRC_PATH', new='.netrc')
 21 | @patch('shub.config.get_sources', new=MagicMock(return_value=[]))
 22 | class LoginTest(AssertInvokeRaisesMixin, unittest.TestCase):
 23 | 
 24 |     def setUp(self):
 25 |         self.runner = CliRunner()
 26 | 
 27 |     def _run(self, user_input=VALID_KEY, files=None, fs=None, **kwargs):
 28 |         """Invokes the login cli on an isolated filesystem"""
 29 | 
 30 |         def write_local_test_files():
 31 |             for path, content in (files or {}).items():
 32 |                 with open(path, 'w') as f:
 33 |                     f.write(content)
 34 | 
 35 |         def invoke():
 36 |             return self.runner.invoke(login.cli, input=user_input, **kwargs)
 37 | 
 38 |         def run():
 39 |             write_local_test_files()
 40 |             with patch.object(login, '_is_valid_apikey', return_value=True):
 41 |                 return invoke()
 42 | 
 43 |         if fs:
 44 |             return run()
 45 | 
 46 |         with self.runner.isolated_filesystem() as fs:
 47 |             return run()
 48 | 
 49 |     def test_write_key_to_new_file(self):
 50 |         with self.runner.isolated_filesystem() as fs:
 51 |             self._run(fs=fs)
 52 |             with open('.scrapinghub.yml') as f:
 53 |                 conf = yaml.load(f, Loader=Loader)
 54 |             self.assertEqual(conf['apikeys']['default'], VALID_KEY)
 55 | 
 56 |     def test_write_key_to_existing_file(self):
 57 |         VALID_SCRAPINGHUB_YML = textwrap.dedent("""
 58 |             endpoints:
 59 |                 other: some_endpoint
 60 |         """)
 61 |         with self.runner.isolated_filesystem() as fs:
 62 |             files = {'.scrapinghub.yml': VALID_SCRAPINGHUB_YML}
 63 |             self._run(files=files, fs=fs)
 64 |             with open('.scrapinghub.yml') as f:
 65 |                 conf = yaml.load(f, Loader=Loader)
 66 |             self.assertEqual(conf['apikeys']['default'], VALID_KEY)
 67 |             self.assertEqual(conf['endpoints']['other'], "some_endpoint")
 68 | 
 69 |     def test_suggest_project_key(self):
 70 |         PROJECT_SH_YML = textwrap.dedent("""
 71 |             apikeys:
 72 |                 default: KEY_SUGGESTION
 73 |         """)
 74 |         files = {'scrapinghub.yml': PROJECT_SH_YML}
 75 |         result = self._run(files=files)
 76 |         err = 'Unexpected output: %s' % result.output
 77 |         self.assertTrue('KEY_SUGGESTION' in result.output, err)
 78 | 
 79 |     def test_suggest_env_key(self):
 80 |         result = self._run(env={'SHUB_APIKEY': 'SHUB_APIKEY_VALUE'})
 81 |         err = 'Unexpected output: %s' % result.output
 82 |         self.assertTrue('SHUB_APIKEY_VALUE' in result.output, err)
 83 | 
 84 |     def test_use_suggestion_to_log_in(self):
 85 |         apikey_suggestion = 'SHUB_APIKEY_VALUE'
 86 |         with self.runner.isolated_filesystem() as fs:
 87 |             self._run(
 88 |                 env={'SHUB_APIKEY': apikey_suggestion},
 89 |                 user_input='\n',
 90 |                 fs=fs,
 91 |             )
 92 |             with open('.scrapinghub.yml') as f:
 93 |                 conf = yaml.load(f, Loader=Loader)
 94 |             self.assertEqual(conf['apikeys']['default'], apikey_suggestion)
 95 | 
 96 |     def test_login_attempt_after_login_doesnt_lead_to_an_error(self):
 97 |         with self.runner.isolated_filesystem() as fs:
 98 |             self._run(fs=fs)
 99 |             self.assertInvokeRaises(AlreadyLoggedInException, login.cli,
100 |                                     input=VALID_KEY)
101 | 


--------------------------------------------------------------------------------
/tests/test_bootstrap.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import zipfile
  3 | from unittest import mock
  4 | 
  5 | import pytest
  6 | import requests
  7 | import yaml
  8 | from click.testing import CliRunner
  9 | 
 10 | from shub.bootstrap import cli, EXAMPLE_REPO, list_projects, unzip_project
 11 | from shub.exceptions import (
 12 |     BadParameterException, NotFoundException, RemoteErrorException)
 13 | 
 14 | 
 15 | BOOTSTRAP_PROJECTS = """
 16 | projA:
 17 |     path: projects_dir/projA
 18 |     description: PROJECT_A_DESC
 19 | 
 20 | projB:
 21 |     path: projects_dir/projB
 22 |     description: PROJECT_B_DESC
 23 | """
 24 | 
 25 | 
 26 | REPO_ZIP_PATH = os.path.join(os.path.dirname(__file__), 'samples',
 27 |                              'custom-images-examples-master.zip')
 28 | 
 29 | 
 30 | @pytest.fixture
 31 | def requests_get_mock():
 32 |     with mock.patch('shub.bootstrap.requests.get') as m:
 33 |         yield m
 34 | 
 35 | 
 36 | @pytest.fixture
 37 | def github_responses(requests_get_mock):
 38 |     requests_get_mock.return_value.text = BOOTSTRAP_PROJECTS
 39 |     with open(REPO_ZIP_PATH, 'rb') as f:
 40 |         requests_get_mock.return_value.content = f.read()
 41 | 
 42 | 
 43 | def test_list_projects(capsys):
 44 |     projects = yaml.safe_load(BOOTSTRAP_PROJECTS)
 45 |     list_projects(projects)
 46 |     out, _ = capsys.readouterr()
 47 |     assert 'projA' in out
 48 |     assert 'PROJECT_A_DESC' in out
 49 |     assert 'projB' in out
 50 |     assert 'PROJECT_B_DESC' in out
 51 |     assert 'projects_dir' not in out
 52 | 
 53 | 
 54 | def test_unzip_project(tempdir):
 55 |     target_dir = str(tempdir.join('projA'))
 56 |     project = {'path': 'projects_dir/projA'}
 57 |     repo_zip = zipfile.ZipFile(REPO_ZIP_PATH)
 58 |     assert not os.path.exists(target_dir)
 59 |     unzip_project(repo_zip, project, target_dir)
 60 |     assert os.path.exists(target_dir)
 61 |     assert os.path.isfile(os.path.join(target_dir, 'a_file'))
 62 |     assert os.path.isdir(os.path.join(target_dir, 'a_dir'))
 63 |     assert os.path.isfile(os.path.join(target_dir, 'a_dir', 'a_dir_file'))
 64 | 
 65 | 
 66 | @pytest.mark.usefixtures('github_responses')
 67 | def test_cli_lists_projects():
 68 |     result = CliRunner().invoke(cli, ['-l'])
 69 |     assert result.exit_code == 0
 70 |     assert 'projA' in result.output
 71 |     assert 'PROJECT_A_DESC' in result.output
 72 | 
 73 | 
 74 | @pytest.mark.usefixtures('github_responses')
 75 | def test_cli_clones_project_into_default_dir(tempdir):
 76 |     target_dir = str(tempdir.join('projA'))
 77 |     assert not os.path.exists(target_dir)
 78 |     result = CliRunner().invoke(cli, ['projA'])
 79 |     assert result.exit_code == 0
 80 |     assert os.path.isdir(target_dir)
 81 |     assert os.path.isfile(os.path.join(target_dir, 'a_file'))
 82 | 
 83 | 
 84 | @pytest.mark.usefixtures('github_responses')
 85 | def test_cli_clones_project_into_target_dir(tempdir):
 86 |     target_dir = str(tempdir.join('target_dir'))
 87 |     assert not os.path.exists(target_dir)
 88 |     result = CliRunner().invoke(cli, ['projA', 'target_dir'])
 89 |     assert result.exit_code == 0
 90 |     assert os.path.isdir(target_dir)
 91 |     assert os.path.isfile(os.path.join(target_dir, 'a_file'))
 92 | 
 93 | 
 94 | def test_cli_fails_on_existing_target_dir(tempdir):
 95 |     os.mkdir('target_dir')
 96 |     result = CliRunner().invoke(cli, ['some_project', 'target_dir'])
 97 |     assert result.exit_code == BadParameterException.exit_code
 98 |     assert "exists" in result.output
 99 | 
100 | 
101 | @pytest.mark.usefixtures('github_responses')
102 | def test_cli_fails_on_unknown_project():
103 |     result = CliRunner().invoke(cli, ['nonexistent'])
104 |     assert result.exit_code == NotFoundException.exit_code
105 |     assert "shub bootstrap -l" in result.output
106 | 
107 | 
108 | def test_cli_links_to_repo_on_http_error(requests_get_mock):
109 |     requests_get_mock.return_value.raise_for_status.side_effect = (
110 |         requests.HTTPError)
111 |     result = CliRunner().invoke(cli, ['some_project'])
112 |     assert result.exit_code == RemoteErrorException.exit_code
113 |     assert EXAMPLE_REPO in result.output
114 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
  1 | name: Tests
  2 | 
  3 | on:
  4 |   push:
  5 |     branches:
  6 |       - master
  7 |   pull_request:
  8 |     branches:
  9 |       - master
 10 | 
 11 | jobs:
 12 |   tests-ubuntu:
 13 |     name: "Test: py${{ matrix.python-version }}, Ubuntu"
 14 |     runs-on: ${{ matrix.os || 'ubuntu-latest' }}
 15 |     strategy:
 16 |       fail-fast: false
 17 |       matrix:
 18 |         include:
 19 |         - python-version: '3.9'
 20 |           os: ubuntu-22.04
 21 |           tox-env: min
 22 |         - python-version: '3.9'
 23 |           tox-env: min-poetry
 24 |           os: ubuntu-22.04
 25 |         - python-version: '3.9'
 26 |         - python-version: '3.10'
 27 |         - python-version: '3.11'
 28 |         - python-version: '3.12'
 29 |         - python-version: '3.13'
 30 |         - python-version: '3.14'
 31 |         - python-version: '3.14'
 32 |           tox-env: poetry
 33 | 
 34 |     steps:
 35 |     - uses: actions/checkout@v2
 36 | 
 37 |     - name: Set up Python ${{ matrix.python-version }}
 38 |       uses: actions/setup-python@v5
 39 |       with:
 40 |         python-version: ${{ matrix.python-version }}
 41 | 
 42 |     - name: Install tox
 43 |       run: pip install tox
 44 | 
 45 |     - name: Run tests
 46 |       run: tox -e ${{ matrix.tox-env || 'py' }}
 47 | 
 48 |     - name: Run off-tox tests
 49 |       # https://github.com/scrapinghub/shub/issues/441
 50 |       run: |
 51 |         python -m venv venv
 52 |         . venv/bin/activate
 53 |         pip install .
 54 |         python -c "from shub.image.utils import get_docker_client; get_docker_client(validate=False)"
 55 | 
 56 |     - name: coverage
 57 |       uses: codecov/codecov-action@v5
 58 |       with:
 59 |         token: ${{ secrets.CODECOV_TOKEN }}
 60 | 
 61 |   tests-macos:
 62 |     name: "Test: py${{ matrix.python-version }}, macOS"
 63 |     runs-on: ${{ matrix.os || 'macos-latest' }}
 64 |     strategy:
 65 |       fail-fast: false
 66 |       matrix:
 67 |         include:
 68 |         - python-version: '3.9'
 69 |           tox-env: min
 70 |           os: macos-13
 71 |         - python-version: '3.9'
 72 |           tox-env: min-poetry
 73 |           os: macos-13
 74 |         - python-version: '3.9'
 75 |         - python-version: '3.10'
 76 |         - python-version: '3.11'
 77 |         - python-version: '3.12'
 78 |         - python-version: '3.13'
 79 |         - python-version: '3.14'
 80 |         - python-version: '3.14'
 81 |           tox-env: poetry
 82 | 
 83 |     steps:
 84 |     - uses: actions/checkout@v2
 85 | 
 86 |     - name: Set up Python ${{ matrix.python-version }}
 87 |       uses: actions/setup-python@v5
 88 |       with:
 89 |         python-version: ${{ matrix.python-version }}
 90 | 
 91 |     - name: Install tox
 92 |       run: pip install tox
 93 | 
 94 |     - name: Run tests
 95 |       run: tox -e ${{ matrix.tox-env || 'py' }}
 96 | 
 97 |     - name: coverage
 98 |       uses: codecov/codecov-action@v5
 99 |       with:
100 |         token: ${{ secrets.CODECOV_TOKEN }}
101 | 
102 |   tests-windows:
103 |     name: "Test: py${{ matrix.python-version }}, Windows"
104 |     runs-on: windows-latest
105 |     strategy:
106 |       fail-fast: false
107 |       matrix:
108 |         include:
109 |         - python-version: '3.9'
110 |           tox-env: min
111 |         - python-version: '3.9'
112 |           tox-env: min-poetry
113 |         - python-version: '3.9'
114 |         - python-version: '3.10'
115 |         - python-version: '3.11'
116 |         - python-version: '3.12'
117 |         - python-version: '3.13'
118 |         - python-version: '3.14'
119 |         - python-version: '3.14'
120 |           tox-env: poetry
121 | 
122 |     steps:
123 |     - uses: actions/checkout@v2
124 | 
125 |     - name: Set up Python ${{ matrix.python-version }}
126 |       uses: actions/setup-python@v5
127 |       with:
128 |         python-version: ${{ matrix.python-version }}
129 | 
130 |     - name: Install tox
131 |       run: pip install tox
132 | 
133 |     - name: Run tests
134 |       run: tox -e ${{ matrix.tox-env || 'py' }}
135 | 
136 |     - name: coverage
137 |       uses: codecov/codecov-action@v5
138 |       with:
139 |         token: ${{ secrets.CODECOV_TOKEN }}
140 | 


--------------------------------------------------------------------------------
/shub/schedule.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | import click
  4 | from scrapinghub import ScrapinghubClient, ScrapinghubAPIError
  5 | from urllib.parse import urljoin
  6 | 
  7 | from shub.exceptions import RemoteErrorException
  8 | from shub.config import get_target_conf
  9 | 
 10 | 
 11 | HELP = """
 12 | Schedule a spider to run on Scrapy Cloud, optionally with provided spider
 13 | arguments and job-specific settings.
 14 | 
 15 | The `spider` argument should match the spider's name, e.g.:
 16 | 
 17 |     shub schedule myspider
 18 | 
 19 | By default, shub will schedule the spider in your default project (as defined
 20 | in scrapinghub.yml). You may also explicitly specify the project to use by
 21 | supplying its ID:
 22 | 
 23 |     shub schedule 12345/myspider
 24 | 
 25 | Or by supplying an identifier defined in scrapinghub.yml:
 26 | 
 27 |     shub schedule production/myspider
 28 | 
 29 | Spider arguments can be supplied through the -a option:
 30 | 
 31 |     shub schedule myspider -a ARG1=VALUE1 -a ARG2=VALUE2
 32 | 
 33 | Similarly, job-specific settings can be supplied through the -s option:
 34 | 
 35 |     shub schedule myspider -s SETTING=VALUE -s LOG_LEVEL=DEBUG
 36 | """
 37 | 
 38 | SHORT_HELP = "Schedule a spider to run on Scrapy Cloud"
 39 | DEFAULT_PRIORITY = 2
 40 | 
 41 | 
 42 | @click.command(help=HELP, short_help=SHORT_HELP)
 43 | @click.argument('spider', type=click.STRING)
 44 | @click.option('-a', '--argument',
 45 |               help='Spider argument (-a name=value)', multiple=True)
 46 | @click.option('-s', '--set',
 47 |               help='Job-specific setting (-s name=value)', multiple=True)
 48 | @click.option('-p', '--priority', type=int, default=DEFAULT_PRIORITY,
 49 |               help='Job priority (-p number). From 0 (lowest) to 4 (highest)')
 50 | @click.option('-e', '--environment', multiple=True,
 51 |               help='Job environment variable (-e VAR=VAL)')
 52 | @click.option('-u', '--units', type=int,
 53 |               help='Amount of Scrapy Cloud units (-u number)')
 54 | @click.option('-t', '--tag',
 55 |               help='Job tags (-t tag)', multiple=True)
 56 | def cli(spider, argument, set, environment, priority, units, tag):
 57 |     try:
 58 |         target, spider = spider.rsplit('/', 1)
 59 |     except ValueError:
 60 |         target = 'default'
 61 |     targetconf = get_target_conf(target)
 62 |     job_key = schedule_spider(targetconf.project_id, targetconf.endpoint,
 63 |                               targetconf.apikey, spider, argument, set,
 64 |                               priority, units, tag, environment)
 65 |     watch_url = urljoin(
 66 |         targetconf.endpoint,
 67 |         '../p/{}/{}/{}'.format(*job_key.split('/')),
 68 |     )
 69 |     short_key = job_key.split('/', 1)[1] if target == 'default' else job_key
 70 |     click.echo(f"Spider {spider} scheduled, job ID: {job_key}")
 71 |     click.echo("Watch the log on the command line:\n    shub log -f {}"
 72 |                "".format(short_key))
 73 |     click.echo("or print items as they are being scraped:\n    shub items -f "
 74 |                "{}".format(short_key))
 75 |     click.echo("or watch it running in Zyte's web interface:\n    {}"
 76 |                "".format(watch_url))
 77 | 
 78 | 
 79 | def schedule_spider(project, endpoint, apikey, spider, arguments=(), settings=(),
 80 |                     priority=DEFAULT_PRIORITY, units=None, tag=(), environment=()):
 81 |     client = ScrapinghubClient(apikey, dash_endpoint=endpoint)
 82 |     try:
 83 |         project = client.get_project(project)
 84 |         args = dict(x.split('=', 1) for x in arguments)
 85 |         cmd_args = args.pop('cmd_args', None)
 86 |         meta = args.pop('meta', None)
 87 |         job = project.jobs.run(
 88 |             spider=spider,
 89 |             meta=json.loads(meta) if meta else {},
 90 |             cmd_args=cmd_args,
 91 |             job_args=args,
 92 |             job_settings=dict(x.split('=', 1) for x in settings),
 93 |             priority=priority,
 94 |             units=units,
 95 |             add_tag=tag,
 96 |             environment=dict(x.split('=', 1) for x in environment),
 97 |         )
 98 |         return job.key
 99 |     except ScrapinghubAPIError as e:
100 |         raise RemoteErrorException(str(e))
101 | 


--------------------------------------------------------------------------------
/tests/test_deploy_egg.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | import tempfile
  4 | import unittest
  5 | from unittest import mock
  6 | from zipfile import ZipFile
  7 | 
  8 | from shub import deploy_egg
  9 | from shub.exceptions import BadParameterException
 10 | 
 11 | 
 12 | class FakeRequester:
 13 |     """Used to mock shub.utils#make_deploy_request"""
 14 |     def fake_request(self, *args):
 15 |         self.url = args[0]
 16 |         self.data = args[1]
 17 |         self.files = args[2]
 18 |         self.auth = args[3]
 19 | 
 20 | 
 21 | @mock.patch.dict(os.environ, {'SHUB_APIKEY': '1234'})
 22 | class TestDeployEgg(unittest.TestCase):
 23 | 
 24 |     def setUp(self):
 25 |         self.curdir = os.getcwd()
 26 |         self.fake_requester = FakeRequester()
 27 |         deploy_egg.utils.make_deploy_request = self.fake_requester.fake_request
 28 |         self.tmp_dir = tempfile.mkdtemp(prefix="shub-test-deploy-eggs")
 29 | 
 30 |     def tearDown(self):
 31 |         os.chdir(self.curdir)
 32 |         if os.path.exists(self.tmp_dir):
 33 |             shutil.rmtree(self.tmp_dir)
 34 | 
 35 |     def test_parses_project_information_correctly(self):
 36 |         # this test's assertions are based on the values
 37 |         # defined on this folder's setup.py file
 38 |         shutil.rmtree(self.tmp_dir)
 39 |         shutil.copytree('tests/samples/deploy_egg_sample_project', self.tmp_dir)
 40 |         os.chdir(self.tmp_dir)
 41 | 
 42 |         data = self.call_main_and_check_request_data()
 43 |         self.assertEqual('1.2.0', data['version'])
 44 | 
 45 |     def test_can_clone_a_git_repo_and_deploy_the_egg(self):
 46 |         self._unzip_git_repo_to(self.tmp_dir)
 47 |         repo = os.path.join(self.tmp_dir, 'deploy_egg_sample_repo.git')
 48 | 
 49 |         self.call_main_and_check_request_data(from_url=repo)
 50 |         data = self.call_main_and_check_request_data()
 51 | 
 52 |         self.assertTrue('master' in data['version'])
 53 | 
 54 |     @unittest.skip('flaky')
 55 |     def test_can_deploy_an_egg_from_pypi(self):
 56 |         basepath = os.path.abspath('tests/samples/')
 57 |         pkg = os.path.join(basepath, 'deploy_egg_sample_project.zip')
 58 |         self.call_main_and_check_request_data(from_pypi=pkg)
 59 | 
 60 |     def test_can_clone_checkout_and_deploy_the_egg(self):
 61 |         self._unzip_git_repo_to(self.tmp_dir)
 62 |         repo = os.path.join(self.tmp_dir, 'deploy_egg_sample_repo.git')
 63 | 
 64 |         branch = 'dev'
 65 |         data = self.call_main_and_check_request_data(from_url=repo, git_branch=branch)
 66 |         self.assertTrue('dev' in data['version'])
 67 | 
 68 |     def test_fails_on_invalid_repo(self):
 69 |         self._unzip_git_repo_to(self.tmp_dir)
 70 |         repo = os.path.join(self.tmp_dir, 'deploy_egg_sample_repo.git')
 71 |         shutil.rmtree(os.path.join(repo, '.git'))
 72 | 
 73 |         with self.assertRaises(BadParameterException):
 74 |             self.call_main_and_check_request_data(from_url=repo)
 75 | 
 76 |     def test_fails_on_invalid_branch(self):
 77 |         self._unzip_git_repo_to(self.tmp_dir)
 78 |         repo = os.path.join(self.tmp_dir, 'deploy_egg_sample_repo.git')
 79 |         with self.assertRaises(BadParameterException):
 80 |             self.call_main_and_check_request_data(
 81 |                 from_url=repo, git_branch='nonexisting')
 82 | 
 83 |     def _unzip_git_repo_to(self, path):
 84 |         zipped_repo = os.path.abspath('tests/samples/deploy_egg_sample_repo.git.zip')
 85 |         ZipFile(zipped_repo).extractall(path)
 86 | 
 87 |     def call_main_and_check_request_data(self, project_id=0, from_url=None,
 88 |                                          git_branch=None, from_pypi=None):
 89 |         # WHEN
 90 |         deploy_egg.main(project_id, from_url, git_branch, from_pypi)
 91 | 
 92 |         data = self.fake_requester.data
 93 |         files = self.fake_requester.files
 94 | 
 95 |         # THEN
 96 |         # the egg was successfully built, let's check the data
 97 |         # that is sent to the scrapy cloud
 98 |         self.assertTrue('test_project', files['egg'][0])
 99 |         self.assertEqual(project_id, data['project'])
100 |         self.assertEqual('test_project', data['name'])
101 | 
102 |         return data
103 | 


--------------------------------------------------------------------------------
/shub/bootstrap.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | import tempfile
  4 | import zipfile
  5 | 
  6 | import click
  7 | import requests
  8 | import yaml
  9 | from click.formatting import HelpFormatter
 10 | from io import BytesIO
 11 | 
 12 | from shub.exceptions import (
 13 |     BadParameterException, NotFoundException, RemoteErrorException)
 14 | 
 15 | 
 16 | EXAMPLE_REPO = "scrapinghub/custom-images-examples"
 17 | AVAILABLE_PROJECTS_URL = (
 18 |     "https://raw.githubusercontent.com/%s/master/bootstrap_projects.yml"
 19 |     "" % EXAMPLE_REPO)
 20 | 
 21 | HELP = """
 22 | Through custom images, Scrapinghub allows you to run crawlers written in any
 23 | language you want. To get you started, we prepared a few examples projects in
 24 | different programming languages and frameworks. You can find them in our custom
 25 | images repository at:
 26 | 
 27 |     https://github.com/scrapinghub/custom-images-examples
 28 | 
 29 | The 'shub bootstrap' command clones an example project to the current directory
 30 | so that you can start hacking right away.
 31 | 
 32 | Run
 33 | 
 34 |     shub bootstrap -l
 35 | 
 36 | to get a list of all available example projects, then run
 37 | 
 38 |     shub bootstrap PROJECTNAME
 39 | 
 40 | to clone it.
 41 | """
 42 | 
 43 | SHORT_HELP = "Clone custom image example project"
 44 | 
 45 | 
 46 | def list_projects_callback(ctx, param, value):
 47 |     if not value or ctx.resilient_parsing:
 48 |         return
 49 |     projects = get_available_projects()
 50 |     list_projects(projects)
 51 |     ctx.exit()
 52 | 
 53 | 
 54 | @click.command(help=HELP, short_help=SHORT_HELP)
 55 | @click.option('-l', '--list', 'list_projects', help='list available projects',
 56 |               is_flag=True, callback=list_projects_callback,
 57 |               expose_value=False, is_eager=True)
 58 | @click.argument('project')
 59 | @click.argument('target_dir', required=False)
 60 | def cli(project, target_dir):
 61 |     target_dir = os.path.normpath(
 62 |         os.path.join(os.getcwd(), target_dir or project))
 63 |     if os.path.exists(target_dir):
 64 |         raise BadParameterException(
 65 |             "Target directory %s already exists, please delete it or supply a "
 66 |             "non-existing target." % target_dir)
 67 |     projects = get_available_projects()
 68 |     if project not in projects:
 69 |         raise NotFoundException(
 70 |             "There is no example project named '%s'. Run 'shub bootstrap -l' "
 71 |             "to get a list of all available projects." % project)
 72 |     click.echo("Downloading custom image examples")
 73 |     repo_zip = get_repo_zip(EXAMPLE_REPO)
 74 |     click.echo(f"Cloning project '{project}' into {target_dir}")
 75 |     unzip_project(repo_zip, project=projects[project], target_dir=target_dir)
 76 | 
 77 | 
 78 | def get_available_projects():
 79 |     try:
 80 |         resp = requests.get(AVAILABLE_PROJECTS_URL)
 81 |         resp.raise_for_status()
 82 |     except (requests.HTTPError, requests.ConnectionError) as e:
 83 |         raise RemoteErrorException(
 84 |             "There was an error while getting the list of available projects "
 85 |             "from GitHub: %s.\n\nPlease check your connection or go to\n  %s\n"
 86 |             "to browse the custom image examples manually."
 87 |             "" % (e, "https://github.com/%s" % EXAMPLE_REPO))
 88 |     return yaml.safe_load(resp.text)
 89 | 
 90 | 
 91 | def list_projects(projects):
 92 |     formatter = HelpFormatter()
 93 |     with formatter.section("Available projects"):
 94 |         formatter.write_dl(
 95 |             sorted(
 96 |                 [(name, info['description'])
 97 |                  for name, info in projects.items()],
 98 |                 key=lambda x: x[0]))
 99 |     click.echo(formatter.getvalue().strip())
100 | 
101 | 
102 | def get_repo_zip(repo):
103 |     zip_url = "https://github.com/%s/archive/master.zip" % repo
104 |     resp = requests.get(zip_url)
105 |     return zipfile.ZipFile(BytesIO(resp.content))
106 | 
107 | 
108 | def unzip_project(repo_zip, project, target_dir):
109 |     filenames = repo_zip.namelist()
110 |     repo_dirname = filenames[0]
111 |     project_filenames = [
112 |         fn
113 |         for fn in filenames
114 |         if fn.startswith(repo_dirname + project['path'])
115 |     ]
116 |     tempdir = tempfile.mkdtemp()
117 |     repo_zip.extractall(path=tempdir, members=project_filenames)
118 |     shutil.move(
119 |         os.path.join(tempdir, repo_dirname, project['path']),
120 |         target_dir,
121 |     )
122 |     shutil.rmtree(tempdir)
123 | 


--------------------------------------------------------------------------------
/shub/deploy_egg.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tempfile
  3 | from shutil import which
  4 | 
  5 | import click
  6 | 
  7 | from shub import utils, DEPLOY_DOCS_LINK
  8 | from shub.config import get_target_conf
  9 | from shub.exceptions import (BadParameterException, NotFoundException,
 10 |                              SubcommandException)
 11 | from shub.utils import (decompress_egg_files, download_from_pypi,
 12 |                         run_cmd)
 13 | 
 14 | 
 15 | HELP = """
 16 | Build a Python egg from source and deploy it to Scrapy Cloud.
 17 | 
 18 | You can either deploy to your default target (as defined in scrapinghub.yml),
 19 | or explicitly supply a numerical project ID or a target defined in
 20 | scrapinghub.yml (see shub deploy).
 21 | 
 22 | By default, shub will try to build the egg using the current folder's setup.py.
 23 | You can also build the egg from a remote (git/mercurial/bazaar) repository by
 24 | using the --from-url option:
 25 | 
 26 |     shub deploy-egg --from-url https://github.com/scrapinghub/shub.git
 27 | 
 28 | For git repositories, you may additionally specify the branch to be checked
 29 | out:
 30 | 
 31 |     shub deploy-egg --from-url https://xy.git --git-branch my-feature
 32 | 
 33 | Alternatively, you can build the egg from a PyPI package:
 34 | 
 35 |     shub deploy-egg --from-pypi shub
 36 | """
 37 | 
 38 | SHORT_HELP = "[DEPRECATED] Build and deploy egg from source"
 39 | 
 40 | 
 41 | @click.command(help=HELP, short_help=SHORT_HELP)
 42 | @click.argument("target", required=False, default='default')
 43 | @click.option("--from-url", help="Git, bazaar or mercurial repository URL")
 44 | @click.option("--git-branch", help="Git branch to checkout")
 45 | @click.option("--from-pypi", help="Name of package on pypi")
 46 | def cli(target, from_url=None, git_branch=None, from_pypi=None):
 47 |     click.secho(
 48 |         "deploy-egg was deprecated, define the eggs you would like to deploy "
 49 |         "in your scrapinghub.yml instead. See {}".format(DEPLOY_DOCS_LINK),
 50 |         err=True, fg='yellow',
 51 |     )
 52 |     main(target, from_url, git_branch, from_pypi)
 53 | 
 54 | 
 55 | def main(target, from_url=None, git_branch=None, from_pypi=None):
 56 |     targetconf = get_target_conf(target)
 57 | 
 58 |     if from_pypi:
 59 |         _fetch_from_pypi(from_pypi)
 60 |         decompress_egg_files()
 61 |         utils.build_and_deploy_eggs(targetconf.project_id, targetconf.endpoint,
 62 |                                     targetconf.apikey)
 63 |         return
 64 | 
 65 |     if from_url:
 66 |         _checkout(from_url, git_branch)
 67 | 
 68 |     if not os.path.isfile('setup.py'):
 69 |         error = "No setup.py -- are you running from a valid Python project?"
 70 |         raise NotFoundException(error)
 71 | 
 72 |     utils.build_and_deploy_egg(targetconf.project_id, targetconf.endpoint,
 73 |                                targetconf.apikey)
 74 | 
 75 | 
 76 | def _checkout(repo, git_branch=None, target_dir='egg-tmp-clone'):
 77 |     tmpdir = tempfile.mkdtemp(prefix='shub-deploy-egg-from-url')
 78 | 
 79 |     click.echo("Cloning the repository to a tmp folder...")
 80 |     os.chdir(tmpdir)
 81 | 
 82 |     vcs_commands = [
 83 |         ['git', 'clone', repo, target_dir],
 84 |         ['hg', 'clone', repo, target_dir],
 85 |         ['bzr', 'branch', repo, target_dir],
 86 |     ]
 87 |     missing_exes = []
 88 |     for cmd in vcs_commands:
 89 |         exe = which(cmd[0])
 90 |         if not exe:
 91 |             missing_exes.append(cmd[0])
 92 |             continue
 93 |         try:
 94 |             run_cmd([exe] + cmd[1:])
 95 |         except SubcommandException:
 96 |             pass
 97 |         else:
 98 |             break
 99 |     else:
100 |         if missing_exes:
101 |             click.secho(
102 |                 "shub was unable to find the following VCS executables and "
103 |                 "could not try to check out your repository with these: %s"
104 |                 "" % ', '.join(missing_exes), fg='yellow')
105 |         raise BadParameterException(
106 |             "\nERROR: The provided repository URL is not valid: %s\n")
107 | 
108 |     os.chdir(target_dir)
109 | 
110 |     if git_branch:
111 |         try:
112 |             run_cmd([which('git'), 'checkout', git_branch])
113 |         except SubcommandException:
114 |             raise BadParameterException("Branch %s is not valid" % git_branch)
115 |         click.echo("%s branch was checked out" % git_branch)
116 | 
117 | 
118 | def _fetch_from_pypi(pkg):
119 |     tmpdir = tempfile.mkdtemp(prefix='shub-deploy-egg-from-pypi')
120 |     click.echo('Fetching %s from pypi' % pkg)
121 |     download_from_pypi(tmpdir, pkg=pkg)
122 |     click.echo('Package fetched successfully')
123 |     os.chdir(tmpdir)
124 | 


--------------------------------------------------------------------------------
/docs/deploying.rst:
--------------------------------------------------------------------------------
  1 | .. _deploying:
  2 | 
  3 | ===================================
  4 | Deploying projects and dependencies
  5 | ===================================
  6 | 
  7 | Deploying projects
  8 | ------------------
  9 | 
 10 | To deploy a Scrapy project to Scrapy Cloud, navigate into the project's folder
 11 | and run::
 12 | 
 13 |     shub deploy [TARGET]
 14 | 
 15 | where ``[TARGET]`` is either a project name defined in ``scrapinghub.yml`` or a
 16 | numerical Scrapinghub project ID. If you have configured a default target in
 17 | your ``scrapinghub.yml``, you can leave out the parameter completely::
 18 | 
 19 |     $ shub deploy
 20 |     Packing version 3af023e-master
 21 |     Deploying to Scrapy Cloud project "12345"
 22 |     {"status": "ok", "project": 12345, "version": "3af023e-master", "spiders": 1}
 23 |     Run your spiders at: https://app.zyte.com/p/12345/
 24 | 
 25 | You can also deploy your project from a Python egg, or build one without
 26 | deploying::
 27 | 
 28 |     $ shub deploy --egg egg_name --version 1.0.0
 29 |     Using egg: egg_name
 30 |     Deploying to Scrapy Cloud project "12345"
 31 |     {"status": "ok", "project": 12345, "version": "1.0.0", "spiders": 1}
 32 |     Run your spiders at: https://app.zyte.com/p/12345/
 33 | 
 34 | ::
 35 | 
 36 |     $ shub deploy --build-egg egg_name
 37 |     Writing egg to egg_name
 38 | 
 39 | 
 40 | .. _deploying-dependencies:
 41 | 
 42 | Deploying dependencies
 43 | ----------------------
 44 | 
 45 | Sometimes your project will depend on third party libraries that are not
 46 | available on Scrapy Cloud. You can easily upload these by specifying a
 47 | `requirements file`_::
 48 | 
 49 |     # project_directory/scrapinghub.yml
 50 | 
 51 |     projects:
 52 |       default: 12345
 53 |       prod: 33333
 54 | 
 55 |     requirements:
 56 |       file: requirements.txt
 57 | 
 58 | Note that this requirements file is an *extension* of the `Scrapy Cloud
 59 | stack`_, and therefore should not contain packages that are already part of the
 60 | stack, such as ``scrapy``.
 61 | 
 62 | In case you use `pipenv`_ you may also specify a ``Pipfile``::
 63 | 
 64 |     # project_directory/scrapinghub.yml
 65 | 
 66 |     projects:
 67 |       default: 12345
 68 |       prod: 33333
 69 | 
 70 |     requirements:
 71 |       file: Pipfile
 72 | 
 73 | In this case the ``Pipfile`` must be locked and ``pipenv`` available in the 
 74 | environment.
 75 | 
 76 | .. note::
 77 | 
 78 |     To install pipenv tool, use ``pip install pipenv`` or check `its documentation
 79 |     <https://pipenv.readthedocs.io/>`_.
 80 | 
 81 | A requirements.txt file will be created out of the ``Pipfile`` so like the
 82 | requirements file above, it should not contain packages that are already part
 83 | of the stack.
 84 | 
 85 | If you use `Poetry`_ you can specify your ``pyproject.toml``::
 86 | 
 87 |     # project_directory/scrapinghub.yml
 88 | 
 89 |     projects:
 90 |       default: 12345
 91 |       prod: 33333
 92 | 
 93 |     requirements:
 94 |       file: pyproject.toml
 95 | 
 96 | This will use Poetry's ``export`` command to create a requirements.txt file. For
 97 | Poetry >= 2.0 this command is no longer installed by default and needs to manually
 98 | added as described in the
 99 | `plugin's documentation <https://github.com/python-poetry/poetry-plugin-export>`_.
100 | If ``poetry.lock`` does not exist yet, it will be created during this process.
101 | 
102 | .. note::
103 | 
104 |     `Poetry`_ is a tool for dependency management and packaging in Python.
105 | 
106 | When your dependencies cannot be specified in a requirements file, e.g.
107 | because they are not publicly available, you can supply them as Python eggs::
108 | 
109 |     # project_directory/scrapinghub.yml
110 | 
111 |     projects:
112 |       default: 12345
113 |       prod: 33333
114 | 
115 |     requirements:
116 |       file: requirements.txt
117 |       eggs:
118 |         - privatelib.egg
119 |         - path/to/otherlib.egg
120 | 
121 | Alternatively, if you cannot or don't want to supply Python eggs, you can also
122 | build your own Docker image to be used on Scrapy Cloud. See
123 | :ref:`deploy-custom-image`.
124 | 
125 | .. _requirements file: https://pip.pypa.io/en/stable/user_guide/#requirements-files
126 | 
127 | .. _pipenv: https://github.com/pypa/pipenv
128 | 
129 | .. _Poetry: https://poetry.eustace.io/
130 | 
131 | .. _choose-custom-stack:
132 | 
133 | Choosing a Scrapy Cloud stack
134 | -----------------------------
135 | 
136 | You can specify the `Scrapy Cloud stack`_ to deploy your spider to by adding a
137 | ``stack`` entry to your configuration::
138 | 
139 |     # project_directory/scrapinghub.yml
140 | 
141 |     projects:
142 |       default: 12345
143 |     stack: scrapy:1.3-py3
144 | 
145 | It is also possible to define the stack per project for advanced use cases::
146 | 
147 |     # project_directory/scrapinghub.yml
148 | 
149 |     projects:
150 |       default:
151 |         id: 12345
152 |         stack: scrapy:1.3-py3
153 |       prod: 33333  # will use Scrapinghub's default stack
154 | 
155 | .. _`Scrapy Cloud stack`: https://helpdesk.scrapinghub.com/support/solutions/articles/22000200402-scrapy-cloud-stacks
156 | 


--------------------------------------------------------------------------------
/tests/image/test_init.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import pytest
  4 | from click.testing import CliRunner
  5 | 
  6 | from shub.exceptions import BadConfigException
  7 | from shub.image.init import cli
  8 | from shub.image.init import _format_system_deps
  9 | from shub.image.init import _format_system_env
 10 | from shub.image.init import _format_requirements
 11 | from shub.image.init import _wrap
 12 | 
 13 | from .utils import add_fake_requirements
 14 | 
 15 | 
 16 | @pytest.fixture
 17 | def project_dir(project_dir):
 18 |     """Overriden project_dir fixture without Dockerfile"""
 19 |     os.remove(os.path.join(project_dir, 'Dockerfile'))
 20 |     return project_dir
 21 | 
 22 | 
 23 | def test_cli_default_settings(project_dir):
 24 |     dockerfile_path = os.path.join(project_dir, 'Dockerfile')
 25 |     assert not os.path.exists(dockerfile_path)
 26 |     runner = CliRunner()
 27 |     result = runner.invoke(cli, [])
 28 |     assert result.exit_code == 0
 29 |     msg = f'Dockerfile is saved to {dockerfile_path}'
 30 |     assert msg in result.output
 31 |     assert os.path.exists(dockerfile_path)
 32 | 
 33 | 
 34 | @pytest.mark.usefixtures('project_dir')
 35 | def test_cli_list_recommended_reqs():
 36 |     runner = CliRunner()
 37 |     result = runner.invoke(cli, ["--list-recommended-reqs"])
 38 |     assert result.exit_code == 0
 39 |     assert "Recommended Python deps list:" in result.output
 40 | 
 41 | 
 42 | def test_cli_abort_if_dockerfile_exists(project_dir):
 43 |     dockerfile_path = os.path.join(project_dir, 'Dockerfile')
 44 |     open(dockerfile_path, 'w').close()
 45 |     runner = CliRunner()
 46 |     result = runner.invoke(cli, [], input='yes\n')
 47 |     assert result.exit_code == 1
 48 |     assert 'Found a Dockerfile in the project directory, aborting' in result.output
 49 |     assert os.path.exists(os.path.join(project_dir, 'Dockerfile'))
 50 |     with open(dockerfile_path) as f:
 51 |         assert f.read() == ''
 52 | 
 53 | 
 54 | def test_cli_create_setup_py(project_dir):
 55 |     setup_py_path = os.path.join(project_dir, 'setup.py')
 56 |     os.remove(setup_py_path)
 57 |     runner = CliRunner()
 58 |     result = runner.invoke(cli, [], input='yes\n')
 59 |     assert result.exit_code == 0
 60 |     assert os.path.isfile(setup_py_path)
 61 | 
 62 | 
 63 | def test_wrap():
 64 |     short_cmd = "run short command wrapping another one short"
 65 |     assert _wrap(short_cmd) == short_cmd
 66 |     assert _wrap(short_cmd + ' ' + short_cmd) == (
 67 |         short_cmd + ' ' + ' '.join(short_cmd.split()[:3]) +
 68 |         " \\\n    " + ' '.join(short_cmd.split()[3:]))
 69 | 
 70 | 
 71 | def test_format_system_deps():
 72 |     # no deps at all
 73 |     assert _format_system_deps('-', None) is None
 74 |     # base deps only
 75 |     assert _format_system_deps('a,b,cd', None) == (
 76 |         "RUN apt-get update -qq && \\\n"
 77 |         "    apt-get install -qy a b cd && \\\n"
 78 |         "    rm -rf /var/lib/apt/lists/*")
 79 |     # base & additional deps only
 80 |     assert _format_system_deps('a,b,cd', 'ef,hk,b') == (
 81 |         "RUN apt-get update -qq && \\\n"
 82 |         "    apt-get install -qy a b cd ef hk && \\\n"
 83 |         "    rm -rf /var/lib/apt/lists/*")
 84 |     # additional deps only
 85 |     assert _format_system_deps('-', 'ef,hk,b') == (
 86 |         "RUN apt-get update -qq && \\\n"
 87 |         "    apt-get install -qy b ef hk && \\\n"
 88 |         "    rm -rf /var/lib/apt/lists/*")
 89 | 
 90 | 
 91 | def test_format_system_env():
 92 |     assert _format_system_env(None) == 'ENV TERM xterm'
 93 |     assert _format_system_env('test.settings') == (
 94 |         "ENV TERM xterm\n"
 95 |         "ENV SCRAPY_SETTINGS_MODULE test.settings")
 96 | 
 97 | 
 98 | def test_format_requirements(project_dir):
 99 |     add_fake_requirements(project_dir)
100 |     basereqs = os.path.join(project_dir, 'requirements.txt')
101 |     if os.path.exists(basereqs):
102 |         os.remove(basereqs)
103 |     # use given requirements
104 |     assert _format_requirements(
105 |         os.getcwd(), 'fake-requirements.txt') == (
106 |             "COPY ./fake-requirements.txt /app/requirements.txt\n"
107 |             "RUN pip install --no-cache-dir -r requirements.txt")
108 |     assert not os.path.exists(basereqs)
109 |     # using base requirements
110 |     assert _format_requirements(
111 |         os.getcwd(), 'requirements.txt') == (
112 |             "COPY ./requirements.txt /app/requirements.txt\n"
113 |             "RUN pip install --no-cache-dir -r requirements.txt")
114 |     assert os.path.exists(basereqs)
115 |     os.remove(basereqs)
116 | 
117 | 
118 | def test_no_scrapy_cfg(project_dir):
119 |     os.remove(os.path.join(project_dir, 'scrapy.cfg'))
120 |     runner = CliRunner()
121 |     result = runner.invoke(cli, [])
122 |     assert result.exit_code == BadConfigException.exit_code
123 |     error_msg = (
124 |         'Error: Cannot find Scrapy project settings. Please ensure that current '
125 |         'directory contains scrapy.cfg with settings section, see example at '
126 |         'https://doc.scrapy.org/en/latest/topics/commands.html#default-structure-of-scrapy-projects'
127 |     )
128 |     assert error_msg in result.output
129 |     assert not os.path.exists(os.path.join(project_dir, 'Dockerfile'))
130 | 


--------------------------------------------------------------------------------
/tests/test_schedule.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from unittest import mock
  3 | 
  4 | from click.testing import CliRunner
  5 | from scrapinghub import ScrapinghubAPIError
  6 | 
  7 | from shub import schedule
  8 | from shub.exceptions import RemoteErrorException
  9 | 
 10 | from .utils import mock_conf
 11 | 
 12 | 
 13 | class ScheduleTest(unittest.TestCase):
 14 | 
 15 |     def setUp(self):
 16 |         self.runner = CliRunner()
 17 |         self.conf = mock_conf(self)
 18 | 
 19 |     @mock.patch('shub.schedule.schedule_spider', autospec=True)
 20 |     def test_schedules_job_if_input_is_ok(self, mock_schedule):
 21 |         proj, endpoint, apikey = self.conf.get_target('default')
 22 |         # Default
 23 |         self.runner.invoke(schedule.cli, ['spider'])
 24 |         mock_schedule.assert_called_with(
 25 |             proj, endpoint, apikey, 'spider', (), (), 2, None, (), ())
 26 |         # Other project
 27 |         self.runner.invoke(schedule.cli, ['123/spider'])
 28 |         mock_schedule.assert_called_with(
 29 |             123, endpoint, apikey, 'spider', (), (), 2, None, (), ())
 30 |         # Other endpoint
 31 |         proj, endpoint, apikey = self.conf.get_target('vagrant')
 32 |         self.runner.invoke(schedule.cli, ['vagrant/spider'])
 33 |         mock_schedule.assert_called_with(
 34 |             proj, endpoint, apikey, 'spider', (), (), 2, None, (), ())
 35 |         # Other project at other endpoint
 36 |         self.runner.invoke(schedule.cli, ['vagrant/456/spider'])
 37 |         mock_schedule.assert_called_with(
 38 |             456, endpoint, apikey, 'spider', (), (), 2, None, (), ())
 39 | 
 40 |     @mock.patch('shub.schedule.ScrapinghubClient', autospec=True)
 41 |     def test_schedule_invalid_spider(self, mock_client):
 42 |         mock_proj = mock_client.return_value.get_project.return_value
 43 |         mock_proj.jobs.run.side_effect = ScrapinghubAPIError('')
 44 |         with self.assertRaises(RemoteErrorException):
 45 |             schedule.schedule_spider(1, 'https://endpoint/api/',
 46 |                                      'FAKE_API_KEY', 'fake_spider')
 47 | 
 48 |     @mock.patch('shub.schedule.ScrapinghubClient', autospec=True)
 49 |     def test_schedule_spider_calls_project_jobs_run(self, mock_client):
 50 |         mock_proj = mock_client.return_value.get_project.return_value
 51 |         schedule.schedule_spider(1, 'https://endpoint/api/',
 52 |                                  'FAKE_API_KEY', 'fake_spider')
 53 |         self.assertTrue(mock_proj.jobs.run)
 54 | 
 55 |     @mock.patch('shub.schedule.ScrapinghubClient', autospec=True)
 56 |     def test_forwards_args_and_settings(self, mock_client):
 57 |         mock_proj = mock_client.return_value.get_project.return_value
 58 |         self.runner.invoke(
 59 |             schedule.cli,
 60 |             "testspider -s SETT=99 -a ARG=val1 --set SETTWITHEQUAL=10=10 "
 61 |             "--argument ARGWITHEQUAL=val2=val2".split(' '),
 62 |         )
 63 |         job_args = mock_proj.jobs.run.call_args[1]['job_args']
 64 |         self.assertLessEqual(
 65 |             {'ARG': 'val1', 'ARGWITHEQUAL': 'val2=val2'}.items(),
 66 |             job_args.items(),
 67 |         )
 68 |         job_settings = mock_proj.jobs.run.call_args[1]['job_settings']
 69 |         self.assertEqual(
 70 |             {'SETT': '99', 'SETTWITHEQUAL': '10=10'},
 71 |             job_settings,
 72 |         )
 73 | 
 74 |     @mock.patch('shub.schedule.ScrapinghubClient', autospec=True)
 75 |     def test_forwards_tags(self, mock_client):
 76 |         mock_proj = mock_client.return_value.get_project.return_value
 77 |         self.runner.invoke(schedule.cli, 'testspider -t tag1 -t tag2 --tag tag3'.split())
 78 |         call_kwargs = mock_proj.jobs.run.call_args[1]
 79 |         assert call_kwargs['add_tag'] == ('tag1', 'tag2', 'tag3')
 80 | 
 81 |     @mock.patch('shub.schedule.ScrapinghubClient', autospec=True)
 82 |     def test_forwards_priority(self, mock_client):
 83 |         mock_proj = mock_client.return_value.get_project.return_value
 84 |         # short option name
 85 |         self.runner.invoke(schedule.cli, 'testspider -p 3'.split())
 86 |         call_kwargs = mock_proj.jobs.run.call_args[1]
 87 |         assert call_kwargs['priority'] == 3
 88 |         # long option name
 89 |         self.runner.invoke(schedule.cli, 'testspider --priority 1'.split())
 90 |         call_kwargs = mock_proj.jobs.run.call_args[1]
 91 |         assert call_kwargs['priority'] == 1
 92 | 
 93 |     @mock.patch('shub.schedule.ScrapinghubClient', autospec=True)
 94 |     def test_forwards_units(self, mock_client):
 95 |         mock_proj = mock_client.return_value.get_project.return_value
 96 |         # no units specified
 97 |         self.runner.invoke(schedule.cli, 'testspider'.split())
 98 |         call_kwargs = mock_proj.jobs.run.call_args[1]
 99 |         assert call_kwargs['units'] is None
100 |         # short option name
101 |         self.runner.invoke(schedule.cli, 'testspider -u 4'.split())
102 |         call_kwargs = mock_proj.jobs.run.call_args[1]
103 |         assert call_kwargs['units'] == 4
104 |         # long option name
105 |         self.runner.invoke(schedule.cli, 'testspider --units 3'.split())
106 |         call_kwargs = mock_proj.jobs.run.call_args[1]
107 |         assert call_kwargs['units'] == 3
108 | 
109 |     @mock.patch('shub.schedule.ScrapinghubClient', autospec=True)
110 |     def test_forwards_environment(self, mock_client):
111 |         mock_proj = mock_client.return_value.get_project.return_value
112 |         self.runner.invoke(
113 |             schedule.cli,
114 |             "testspider -e VAR1=VAL1 --environment VAR2=VAL2".split(' '),
115 |         )
116 |         call_kwargs = mock_proj.jobs.run.call_args[1]
117 |         self.assertLessEqual(
118 |             {'VAR1': 'VAL1', 'VAR2': 'VAL2'}.items(),
119 |             call_kwargs['environment'].items(),
120 |         )
121 | 


--------------------------------------------------------------------------------
/shub/image/build.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | 
  4 | import click
  5 | 
  6 | from shub import exceptions as shub_exceptions
  7 | from shub.config import load_shub_config, list_targets_callback
  8 | from shub.image import utils
  9 | from shub.image.test import test_cmd
 10 | from shub.utils import create_scrapinghub_yml_wizard
 11 | 
 12 | 
 13 | SHORT_HELP = 'Build release image.'
 14 | 
 15 | HELP = """
 16 | Build command uses your Dockerfile to build an image and tag it properly.
 17 | 
 18 | Internally, this command is a simple wrapper to `docker build` and uses
 19 | docker daemon on your system to build an image. Also it can generate
 20 | project version for you, and locate root project directory by itself.
 21 | 
 22 | Image should be set via scrapinghub.yml, section "images". If version is not
 23 | provided, the tool uses VCS-based stamp over project directory (the same as
 24 | shub utils itself).
 25 | """
 26 | 
 27 | BUILD_STEP_REGEX = re.compile(r'Step (\d+)/(\d+) :.*')
 28 | BUILD_SUCCESS_REGEX = re.compile(r'Successfully built ([0-9a-f]+)')
 29 | 
 30 | 
 31 | @click.command(help=HELP, short_help=SHORT_HELP)
 32 | @click.argument("target", required=False, default="default")
 33 | @click.option("-l", "--list-targets", is_flag=True, is_eager=True,
 34 |               expose_value=False, callback=list_targets_callback,
 35 |               help="List available project names defined in your config")
 36 | @click.option("-d", "--debug", help="debug mode", is_flag=True,
 37 |               callback=utils.deprecate_debug_parameter)
 38 | @click.option("-v", "--verbose", is_flag=True,
 39 |               help="stream build logs to console")
 40 | @click.option("-V", "--version", help="release version")
 41 | @click.option("-S", "--skip-tests", help="skip testing image", is_flag=True)
 42 | @click.option("-n", "--no-cache", is_flag=True,
 43 |               help="Do not use cache when building the image")
 44 | @click.option("-b", "--build-arg", multiple=True,
 45 |               help="Allow to pass build arguments to docker client.")
 46 | @click.option("-f", "--file", "filename", default='Dockerfile',
 47 |               help="Name of the Dockerfile (Default is 'PATH/Dockerfile')")
 48 | def cli(target, debug, verbose, version, skip_tests, no_cache, build_arg, filename):
 49 |     build_cmd(target, version, skip_tests, no_cache, build_arg, filename=filename)
 50 | 
 51 | 
 52 | def build_cmd(target, version, skip_tests, no_cache, build_arg, filename='Dockerfile'):
 53 |     config = load_shub_config()
 54 |     create_scrapinghub_yml_wizard(config, target=target, image=True)
 55 |     client = utils.get_docker_client()
 56 |     project_dir = utils.get_project_dir()
 57 |     image = config.get_image(target)
 58 |     image_name = utils.format_image_name(image, version)
 59 |     build_args = dict(a.split('=', 1) for a in build_arg)
 60 |     if not os.path.exists(os.path.join(project_dir, filename)):
 61 |         raise shub_exceptions.NotFoundException(
 62 |             "Dockerfile is not found and it is required because project '{}' is configured "
 63 |             "to deploy Docker images. Please add a Dockerfile that will be used to build "
 64 |             "the image and retry this command. If you want to migrate an existing Scrapy project "
 65 |             "you can use `shub image init` command to create a Dockerfile.".format(target))
 66 |     if utils.is_verbose():
 67 |         build_progress_cls = _LoggedBuildProgress
 68 |     else:
 69 |         build_progress_cls = _BuildProgress
 70 |     click.echo(f"Building {image_name}.")
 71 |     events = client.build(
 72 |         path=project_dir,
 73 |         tag=image_name,
 74 |         decode=True,
 75 |         dockerfile=filename,
 76 |         nocache=no_cache,
 77 |         rm=True,
 78 |         buildargs=build_args,
 79 |     )
 80 |     build_progress = build_progress_cls(events)
 81 |     build_progress.show()
 82 |     click.echo(f"The image {image_name} build is completed.")
 83 |     # Test the image content after building it
 84 |     if not skip_tests:
 85 |         test_cmd(target, version)
 86 | 
 87 | 
 88 | class _LoggedBuildProgress(utils.BaseProgress):
 89 |     """Visualize build progress in verbose mode.
 90 | 
 91 |     Output all the events received from the docker daemon.
 92 |     """
 93 |     def handle_event(self, event):
 94 |         super().handle_event(event)
 95 |         if 'stream' in event:
 96 |             self.handle_stream_event(event)
 97 | 
 98 |     def handle_stream_event(self, event):
 99 |         utils.debug_log("{}".format(event['stream'].rstrip()))
100 | 
101 | 
102 | class _BuildProgress(_LoggedBuildProgress):
103 |     """Visualize build progress in non-verbose mode.
104 | 
105 |     Show total progress bar.
106 |     """
107 | 
108 |     def __init__(self, events):
109 |         super().__init__(events)
110 |         self.bar = utils.create_progress_bar(
111 |             total=1,
112 |             desc='Steps',
113 |             # don't need rate here, let's simplify the bar
114 |             bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}',
115 |         )
116 |         self.is_built = False
117 | 
118 |     def show(self):
119 |         super().show()
120 |         if self.bar:
121 |             self.bar.close()
122 |         if not self.is_built:
123 |             raise shub_exceptions.RemoteErrorException(
124 |                 "Build image operation failed")
125 | 
126 |     def handle_stream_event(self, event):
127 |         if BUILD_SUCCESS_REGEX.search(event['stream']):
128 |             self.is_built = True
129 |             return
130 |         step_row = BUILD_STEP_REGEX.match(event['stream'])
131 |         if not step_row:
132 |             return
133 |         step_id, total = (int(val) for val in step_row.groups())
134 |         self.bar.total = max(self.bar.total, total)
135 |         # ignore onbuild sub-steps
136 |         if step_id > self.bar.n and self.bar.total == total:
137 |             self.bar.update()
138 | 


--------------------------------------------------------------------------------
/shub/image/test.py:
--------------------------------------------------------------------------------
  1 | import click
  2 | 
  3 | from shub import exceptions as shub_exceptions
  4 | from shub.config import load_shub_config, list_targets_callback
  5 | from shub.image import utils
  6 | 
  7 | SHORT_HELP = "Test a built image with Scrapy Cloud contract"
  8 | HELP = """
  9 | A command to test an image after build step to make sure it fits contract.
 10 | 
 11 | It consists of the following steps:
 12 | 
 13 | 1) check that image exists on local machine
 14 | 2) check that image has start-crawl entrypoint
 15 | 3) check that image has shub-image-info entrypoint
 16 | 
 17 | If any of the checks fails - the test command fails as a whole. By default,
 18 | the test command is also executed automatically as a part of build command
 19 | in its end (if you do not provide -S/--skip-tests parameter explicitly).
 20 | """
 21 | 
 22 | IMAGE_SIZE_LIMIT = 3 * 1024 * 1024 * 1024  # 3GB
 23 | CONTRACT_CMD_NOT_FOUND_WARNING = (
 24 |     'Command %s is not found in the image. '
 25 |     'Please make sure you provided it according to Scrapy Cloud contract '
 26 |     '(https://shub.readthedocs.io/en/stable/custom-images-contract.html) or '
 27 |     'added scrapinghub-entrypoint-scrapy>=0.8.0 to your requirements file '
 28 |     'if you use Scrapy.'
 29 | )
 30 | LIST_SPIDERS_DEPRECATED_WARNING = (
 31 |     'list-spiders command is deprecated in favour of shub-image-info command: '
 32 |     'its format is described well in Scrapy Cloud contract '
 33 |     '(https://shub.readthedocs.io/en/stable/custom-images-contract.html), '
 34 |     'please review and update your code.'
 35 | )
 36 | IMAGE_TOO_LARGE_WARNING = (
 37 |     'Custom image for the project is too large (more than 3GB), it can lead '
 38 |     'to various performance issues when running it in Scrapy Cloud. '
 39 |     'Please reduce the image size or ask support team for help '
 40 |     '(one of the recommended articles to start with is '
 41 |     'https://www.codacy.com/blog/five-ways-to-slim-your-docker-images/).'
 42 | )
 43 | 
 44 | 
 45 | @click.command(help=HELP, short_help=SHORT_HELP)
 46 | @click.argument("target", required=False, default="default")
 47 | @click.option("-l", "--list-targets", is_flag=True, is_eager=True,
 48 |               expose_value=False, callback=list_targets_callback,
 49 |               help="List available project names defined in your config")
 50 | @click.option("-d", "--debug", help="debug mode", is_flag=True,
 51 |               callback=utils.deprecate_debug_parameter)
 52 | @click.option("-v", "--verbose", is_flag=True,
 53 |               help="stream test logs to console")
 54 | @click.option("-V", "--version", help="release version")
 55 | def cli(target, debug, verbose, version):
 56 |     test_cmd(target, version)
 57 | 
 58 | 
 59 | def test_cmd(target, version):
 60 |     config = load_shub_config()
 61 |     image = config.get_image(target)
 62 |     version = version or config.get_version()
 63 |     image_name = utils.format_image_name(image, version)
 64 |     docker_client = utils.get_docker_client()
 65 |     for check in [_check_image_size,
 66 |                   _check_start_crawl_entry,
 67 |                   _check_shub_image_info_entry]:
 68 |         check(image_name, docker_client)
 69 | 
 70 | 
 71 | def _check_image_size(image_name, docker_client):
 72 |     """Check that the image exists on local machine and validate its size."""
 73 |     # if there's no docker lib, the command will fail earlier
 74 |     # with an exception when getting a client in get_docker_client()
 75 |     from docker.errors import NotFound
 76 |     try:
 77 |         size = docker_client.inspect_image(image_name).get('Size')
 78 |         if size and isinstance(size, int) and size > IMAGE_SIZE_LIMIT:
 79 |             raise shub_exceptions.CustomImageTooLargeException(
 80 |                 IMAGE_TOO_LARGE_WARNING)
 81 |     except NotFound as exc:
 82 |         utils.debug_log(exc)
 83 |         raise shub_exceptions.NotFoundException(
 84 |             "The image doesn't exist yet, please use build command at first.")
 85 | 
 86 | 
 87 | def _check_shub_image_info_entry(image_name, docker_client):
 88 |     """Check that the image has shub-image-info entrypoint"""
 89 |     status, logs = _run_docker_command(
 90 |         docker_client, image_name, ['which', 'shub-image-info'])
 91 |     if status != 0 or not logs:
 92 |         _check_fallback_to_list_spiders(image_name, docker_client)
 93 | 
 94 | 
 95 | def _check_fallback_to_list_spiders(image_name, docker_client):
 96 |     status, logs = _run_docker_command(
 97 |         docker_client, image_name, ['which', 'list-spiders'])
 98 |     if status != 0 or not logs:
 99 |         raise shub_exceptions.NotFoundException(
100 |             CONTRACT_CMD_NOT_FOUND_WARNING % 'shub-image-info (& list-spiders)')
101 |     else:
102 |         click.echo(LIST_SPIDERS_DEPRECATED_WARNING)
103 | 
104 | 
105 | def _check_start_crawl_entry(image_name, docker_client):
106 |     """Check that the image has start-crawl entrypoint"""
107 |     status, logs = _run_docker_command(
108 |         docker_client, image_name, ['which', 'start-crawl'])
109 |     if status != 0 or not logs:
110 |         raise shub_exceptions.NotFoundException(
111 |             CONTRACT_CMD_NOT_FOUND_WARNING % 'start-crawl')
112 | 
113 | 
114 | def _run_docker_command(client, image_name, command):
115 |     """A helper to execute an arbitrary cmd with given docker image"""
116 |     container = client.create_container(image=image_name, command=command)
117 |     try:
118 |         client.start(container)
119 |         statuscode = client.wait(container=container['Id'])['StatusCode']
120 |         logs = client.logs(container=container['Id'], stdout=True,
121 |                            stderr=True if statuscode else False,
122 |                            stream=False, timestamps=False)
123 |         utils.debug_log(f"{command} results:\n{logs}")
124 |         return statuscode, logs
125 |     finally:
126 |         client.remove_container(container)
127 | 


--------------------------------------------------------------------------------
/shub/image/run/__init__.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import stat
  3 | import shlex
  4 | import signal
  5 | import os.path
  6 | from os import stat as os_stat
  7 | from shutil import copyfile
  8 | 
  9 | import click
 10 | 
 11 | from shub.config import load_shub_config
 12 | from shub.image import utils
 13 | 
 14 | 
 15 | SHORT_HELP = 'Run custom image locally.'
 16 | HELP = """
 17 | Run a custom Docker image locally.
 18 | 
 19 | The command should be helpful to ensure that your custom image is properly
 20 | written and do some preliminary local tests before pushing it to Scrapy Cloud.
 21 | 
 22 | Most of the command parameters coincide with parameters for 'shub schedule'
 23 | command to simplfy its usage.
 24 | 
 25 | The `spider` argument should match the spider's name, e.g.:
 26 | 
 27 |     shub image run myspider
 28 | 
 29 | A more advanced example of using non-default target with settings/arguments:
 30 | 
 31 |     shub image run production/myspider -a ARG1=VAL1 -s LOG_LEVEL=DEBUG
 32 | """
 33 | 
 34 | SCRAPINGHUB_VOLUME = '/scrapinghub'
 35 | WRAPPER_FILENAME = 'start-crawl-local'
 36 | WRAPPER_LOCAL_PATH = os.path.join(os.path.dirname(__file__), 'wrapper.py')
 37 | WRAPPER_IMAGE_PATH = os.path.join(SCRAPINGHUB_VOLUME, WRAPPER_FILENAME)
 38 | 
 39 | 
 40 | @click.command(help=HELP, short_help=SHORT_HELP)
 41 | @click.argument("spider", type=click.STRING)
 42 | @click.option('-a', '--argument', 'args',
 43 |               help='Spider argument (-a name=value)', multiple=True)
 44 | @click.option('-s', '--set', 'settings',
 45 |               help='Job-specific setting (-s name=value)', multiple=True)
 46 | @click.option('-e', '--environment', multiple=True,
 47 |               help='Job environment variable (-e VAR=VAL)')
 48 | @click.option("-V", "--version", help="use custom release version")
 49 | @click.option("-v", "--verbose", is_flag=True,
 50 |               help="stream additional logs to console")
 51 | @click.option("-k", "--keep-volume", help="Keep volume folder", is_flag=True)
 52 | def cli(spider, args, settings, environment, version, verbose, keep_volume):
 53 |     run_cmd(spider, args, settings, environment, version, keep_volume)
 54 | 
 55 | 
 56 | def run_cmd(spider, args, settings, environment, version, keep_volume):
 57 |     try:
 58 |         target, spider = spider.rsplit('/', 1)
 59 |     except ValueError:
 60 |         target = 'default'
 61 | 
 62 |     config = load_shub_config()
 63 |     image = config.get_image(target)
 64 |     version = version or config.get_version()
 65 |     image_name = utils.format_image_name(image, version)
 66 |     docker_client = utils.get_docker_client()
 67 | 
 68 |     env = _format_environment(spider, args, settings, environment)
 69 |     _run_with_docker(docker_client, image_name, env, keep_volume)
 70 | 
 71 | 
 72 | def _format_environment(spider, args, settings, environment):
 73 |     """Convert all input crawl args to environment variables."""
 74 |     # required defaults, can be overwritten with meta if needed
 75 |     job_data = {'spider': spider, 'key': '1/2/3', 'auth': '<auth>'}
 76 | 
 77 |     args = dict(x.split('=', 1) for x in args)
 78 |     cmd_args = shlex.split(args.pop('cmd_args', ''))
 79 |     if spider.startswith('py:'):
 80 |         job_data['job_cmd'] = [spider] + cmd_args
 81 |     else:
 82 |         job_data['spider_args'] = args
 83 |     meta = args.pop('meta', None)
 84 |     if meta:
 85 |         job_data.update(json.loads(meta))
 86 | 
 87 |     job_environment = dict(x.split('=', 1) for x in environment)
 88 |     job_settings = dict(x.split('=', 1) for x in settings)
 89 |     return {
 90 |         'SHUB_JOBKEY': job_data['key'],
 91 |         'SHUB_SPIDER': spider,
 92 |         'SHUB_JOB_DATA': _json_dumps(job_data),
 93 |         'SHUB_JOB_ENV': _json_dumps(job_environment),
 94 |         'SHUB_SETTINGS': _json_dumps({'job_settings': job_settings}),
 95 |         'PYTHONUNBUFFERED': 1,
 96 |     }
 97 | 
 98 | 
 99 | def _json_dumps(data):
100 |     return json.dumps(data, sort_keys=True, separators=(',', ':'))
101 | 
102 | 
103 | def _run_with_docker(client, image_name, env, keep_volume=False):
104 |     """Run a local docker container with the given custom image."""
105 | 
106 |     def _signal_handler(sig, _):
107 |         client.kill(container, sig)
108 | 
109 |     tmpdir_kw = {'prefix': 'shub-image-run-', 'cleanup': not keep_volume}
110 |     with utils.make_temp_directory(**tmpdir_kw) as volume_dir:
111 |         container = _create_container(client, image_name, env, volume_dir)
112 |         try:
113 |             client.start(container)
114 |             signal.signal(signal.SIGINT, _signal_handler)
115 |             signal.signal(signal.SIGTERM, _signal_handler)
116 |             for log in client.logs(container, stream=True):
117 |                 click.echo(log.rstrip())
118 |         finally:
119 |             client.remove_container(container, force=True)
120 | 
121 | 
122 | def _create_container(client, image_name, environment, volume_dir):
123 |     """Create a docker container and customize its setup."""
124 |     # copy start-crawl wrapper to the volume temporary directory
125 |     wrapper_cont_path = os.path.join(volume_dir, WRAPPER_FILENAME)
126 |     copyfile(WRAPPER_LOCAL_PATH, wrapper_cont_path)
127 |     wrapper_perms = os_stat(wrapper_cont_path).st_mode | stat.S_IEXEC
128 |     os.chmod(wrapper_cont_path, wrapper_perms)  # must be executable
129 |     fifo_path = os.path.join(volume_dir, 'scrapinghub.fifo')
130 |     environment['SHUB_FIFO_PATH'] = fifo_path
131 |     # keep using default /scrapinghub volume but mount it as a temporary
132 |     # directory in the host /tmp/ to have access to the files in needed
133 |     binds = {volume_dir: {'bind': SCRAPINGHUB_VOLUME, 'mode': 'rw'}}
134 |     host_config = client.create_host_config(binds=binds)
135 |     return client.create_container(
136 |         image=image_name,
137 |         command=[WRAPPER_IMAGE_PATH],
138 |         environment=environment,
139 |         volumes=[volume_dir],
140 |         host_config=host_config,
141 |     )
142 | 


--------------------------------------------------------------------------------
/tests/image/test_run.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | import tempfile
  3 | from unittest import mock
  4 | 
  5 | try:
  6 |     from StringIO import StringIO
  7 | except ImportError:
  8 |     from io import StringIO
  9 | 
 10 | import pytest
 11 | from click.testing import CliRunner
 12 | 
 13 | from shub.image.run import cli, _json_dumps, WRAPPER_IMAGE_PATH
 14 | from shub.image.run.wrapper import _consume_from_fifo, _millis_to_str
 15 | from shub.image.utils import make_temp_directory
 16 | 
 17 | 
 18 | def _format_job_data(spider='spider', auth='<auth>', **kwargs):
 19 |     data = {'key': '1/2/3', 'spider': spider, 'auth': auth}
 20 |     data.update(kwargs)
 21 |     return _json_dumps(data)
 22 | 
 23 | 
 24 | @pytest.mark.usefixtures('project_dir')
 25 | def test_cli(docker_client_mock):
 26 |     docker_client_mock.create_host_config.return_value = {'host': 'config'}
 27 |     docker_client_mock.create_container.return_value = 'contID'
 28 |     docker_client_mock.logs.return_value = ['some', 'logs']
 29 |     # wrap make_temp_directory to validate its call args
 30 |     tmp_dir_fun = 'shub.image.utils.make_temp_directory'
 31 |     with mock.patch(tmp_dir_fun, wraps=make_temp_directory) as tmp_dir_mock:
 32 |         result = CliRunner().invoke(cli, ["dev/spider"])
 33 |     assert result.exit_code == 0, result.stdout
 34 |     assert tmp_dir_mock.call_args[1] == {
 35 |         'prefix': 'shub-image-run-', 'cleanup': True
 36 |     }
 37 |     docker_client_mock.start.assert_called_with('contID')
 38 |     docker_client_mock.logs.assert_called_with('contID', stream=True)
 39 |     docker_client_mock.remove_container.assert_called_with('contID', force=True)
 40 |     # validate create_container args
 41 |     docker_client_mock.create_container.assert_called_once()
 42 |     call_args = docker_client_mock.create_container.call_args[1]
 43 |     assert call_args['command'] == [WRAPPER_IMAGE_PATH]
 44 |     # validate environment
 45 |     call_env = call_args['environment']
 46 |     fifo_path = call_env.pop('SHUB_FIFO_PATH')
 47 |     assert fifo_path.endswith('scrapinghub.fifo')
 48 |     job_data = _format_job_data(spider_args={})
 49 |     expected_env = {
 50 |         'SHUB_JOBKEY': '1/2/3',
 51 |         'SHUB_SPIDER': 'spider',
 52 |         'SHUB_JOB_DATA': job_data,
 53 |         'SHUB_JOB_ENV': '{}',
 54 |         'SHUB_SETTINGS': '{"job_settings":{}}',
 55 |         'PYTHONUNBUFFERED': 1,
 56 |     }
 57 |     assert call_env == expected_env
 58 |     # validate other configuration parts
 59 |     assert call_args['host_config'] == {'host': 'config'}
 60 |     assert call_args['image'] == 'registry.io/user/project:1.0'
 61 |     assert call_args['volumes'] == [os.path.dirname(fifo_path)]
 62 | 
 63 | 
 64 | @pytest.mark.usefixtures('project_dir')
 65 | def test_cli_with_args(docker_client_mock):
 66 |     docker_client_mock.logs.return_value = []
 67 |     result = CliRunner().invoke(cli, (
 68 |         'dev/spider -a arg0= -a arg1=val1 --argument arg2=val2 '
 69 |         '-s SET1=VAL1 --set SET2=VAL2 '
 70 |         '-e ENV1=ENVVAL1 --environment ENV2=ENVVAL2 '
 71 |         '-a meta={"auth":"custom"}'.split(' ')
 72 |     ))
 73 |     assert result.exit_code == 0, result.stdout
 74 |     call_args = docker_client_mock.create_container.call_args[1]
 75 |     call_env = call_args['environment']
 76 |     expected_settings = {"job_settings": {"SET1": "VAL1", "SET2": "VAL2"}}
 77 |     assert call_env['SHUB_SETTINGS'] == _json_dumps(expected_settings)
 78 |     expected_env = {"ENV1": "ENVVAL1", "ENV2": "ENVVAL2"}
 79 |     assert call_env['SHUB_JOB_ENV'] == _json_dumps(expected_env)
 80 |     expected_jobdata = {"arg0": "", "arg1": "val1", "arg2": "val2"}
 81 |     assert call_env['SHUB_JOB_DATA'] == _format_job_data(
 82 |         spider_args=expected_jobdata, auth='custom'
 83 |     )
 84 | 
 85 | 
 86 | @pytest.mark.usefixtures('project_dir')
 87 | def test_cli_with_version(docker_client_mock):
 88 |     docker_client_mock.logs.return_value = []
 89 |     result = CliRunner().invoke(cli, ['dev/spider', '-V', 'custom'])
 90 |     assert result.exit_code == 0, result.stdout
 91 |     call_args = docker_client_mock.create_container.call_args[1]
 92 |     assert call_args['image'] == 'registry.io/user/project:custom'
 93 | 
 94 | 
 95 | @pytest.mark.usefixtures('project_dir')
 96 | def test_cli_with_script(docker_client_mock):
 97 |     docker_client_mock.logs.return_value = []
 98 |     script_args = "--flag1 --flag2=0 val1 val2"
 99 |     result = CliRunner().invoke(cli, [
100 |         'dev/py:testargs.py', '-a', 'cmd_args="%s"' % script_args
101 |     ])
102 |     assert result.exit_code == 0, result.stdout
103 |     call_args = docker_client_mock.create_container.call_args[1]
104 |     call_env = call_args['environment']
105 |     assert call_env['SHUB_JOB_DATA'] == _format_job_data(
106 |         spider='py:testargs.py',
107 |         job_cmd=["py:testargs.py", script_args],
108 |     )
109 | 
110 | 
111 | # Separate section for wrapper tests.
112 | 
113 | FIFO_TEST_TS = 1485269941065
114 | FIFO_TEST_DATA = """\
115 | LOG {"time": %(ts)d, "level": 20, "message": "Some message"}
116 | ITM {"key": "value", "should-be": "ignored"}
117 | LOG {"time": %(ts)d, "level": 30, "message": "Other message"}\
118 | """ % {'ts': FIFO_TEST_TS}
119 | 
120 | 
121 | @mock.patch('sys.stdout', new_callable=StringIO)
122 | def test_consume_from_fifo(mock_stdout):
123 |     try:
124 |         # XXX work-around to use NamedTemporaryFile on Windows
125 |         # https://github.com/appveyor/ci/issues/2547
126 |         with tempfile.NamedTemporaryFile(mode='w', delete=False) as temp:
127 |             filename = temp.name
128 |             temp.write(FIFO_TEST_DATA)
129 |             temp.seek(0)
130 |             _consume_from_fifo(filename)
131 |     finally:
132 |         os.remove(filename)
133 |     local_datetime_string = _millis_to_str(FIFO_TEST_TS)
134 |     assert mock_stdout.getvalue() == (
135 |         '{date} INFO Some message\n'
136 |         '{date} WARNING Other message\n'.format(date=local_datetime_string)
137 |     )
138 | 


--------------------------------------------------------------------------------
/tests/image/test_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import tempfile
  4 | from unittest import mock, TestCase
  5 | 
  6 | import pytest
  7 | 
  8 | from shub.exceptions import BadConfigException, BadParameterException, NotFoundException
  9 | from shub.image.utils import (
 10 |     get_credentials,
 11 |     get_docker_client,
 12 |     get_image_registry,
 13 |     get_project_dir,
 14 |     format_image_name,
 15 |     load_status_url,
 16 |     store_status_url,
 17 |     STATUS_FILE_LOCATION,
 18 | )
 19 | 
 20 | from .utils import FakeProjectDirectory, add_sh_fake_config
 21 | 
 22 | 
 23 | class ReleaseUtilsTest(TestCase):
 24 | 
 25 |     def test_get_project_dir(self):
 26 |         self.assertRaises(BadConfigException, get_project_dir)
 27 |         with FakeProjectDirectory() as tmpdir:
 28 |             add_sh_fake_config(tmpdir)
 29 |             assert get_project_dir() == tmpdir
 30 | 
 31 |     def test_get_docker_client(self):
 32 |         mocked_docker = mock.Mock()
 33 |         sys.modules['docker'] = mocked_docker
 34 |         client_mock = mock.Mock()
 35 | 
 36 |         class DockerClientMock:
 37 | 
 38 |             def __init__(self, *args, **kwargs):
 39 |                 client_mock(*args, **kwargs)
 40 | 
 41 |             def version(self):
 42 |                 return {}
 43 | 
 44 |         mocked_docker.APIClient = DockerClientMock
 45 |         assert get_docker_client()
 46 |         client_mock.assert_called_with(base_url=None, tls=None, version='auto')
 47 |         # set basic test environment
 48 |         os.environ['DOCKER_HOST'] = 'http://127.0.0.1'
 49 |         os.environ['DOCKER_API_VERSION'] = '1.40'
 50 |         assert get_docker_client()
 51 |         client_mock.assert_called_with(
 52 |             base_url='http://127.0.0.1', tls=None, version='1.40')
 53 |         # test for tls
 54 |         os.environ['DOCKER_TLS_VERIFY'] = '1'
 55 |         os.environ['DOCKER_CERT_PATH'] = 'some-path'
 56 |         mocked_tls = mock.Mock()
 57 |         mocked_docker.tls.TLSConfig.return_value = mocked_tls
 58 |         assert get_docker_client()
 59 |         client_mock.assert_called_with(
 60 |             base_url='http://127.0.0.1',
 61 |             tls=mocked_tls,
 62 |             version='1.40')
 63 |         mocked_docker.tls.TLSConfig.assert_called_with(
 64 |             client_cert=(os.path.join('some-path', 'cert.pem'),
 65 |                          os.path.join('some-path', 'key.pem')),
 66 |             verify=os.path.join('some-path', 'ca.pem'),
 67 |             assert_hostname=False)
 68 | 
 69 |     def test_format_image_name(self):
 70 |         assert format_image_name('simple', 'tag') == 'simple:tag'
 71 |         assert format_image_name('user/simple', 'tag') == 'user/simple:tag'
 72 |         assert format_image_name('registry/user/simple', 'tag') == \
 73 |             'registry/user/simple:tag'
 74 |         assert format_image_name('registry:port/user/simple', 'tag') == \
 75 |             'registry:port/user/simple:tag'
 76 |         assert format_image_name('registry:port/user/simple:test', 'tag') == \
 77 |             'registry:port/user/simple:tag'
 78 |         with mock.patch('shub.config.load_shub_config') as mocked:
 79 |             config = mock.Mock()
 80 |             config.get_version.return_value = 'test-version'
 81 |             mocked.return_value = config
 82 |             assert format_image_name('test', None) == 'test:test-version'
 83 | 
 84 |     def test_get_credentials(self):
 85 |         assert get_credentials(insecure=True) == (None, None)
 86 |         with pytest.raises(BadParameterException):
 87 |             get_credentials(username='user', insecure=True)
 88 |         with pytest.raises(BadParameterException):
 89 |             get_credentials(password='pass', insecure=True)
 90 |         assert get_credentials(apikey='apikey') == ('apikey', ' ')
 91 |         assert get_credentials(
 92 |             username='user', password='pass') == ('user', 'pass')
 93 |         with pytest.raises(BadParameterException):
 94 |             get_credentials(username='user')
 95 |         with pytest.raises(BadParameterException):
 96 |             get_credentials(password='pass')
 97 |         assert get_credentials(target_apikey='tapikey') == ('tapikey', ' ')
 98 | 
 99 |     def test_get_image_registry(self):
100 |         assert get_image_registry('ubuntu:12.04') is None
101 |         assert get_image_registry('someuser/image:tagA') is None
102 |         assert get_image_registry('registry.io/imageA') == 'registry.io'
103 |         assert get_image_registry('registry.io/user/name:tag') == 'registry.io'
104 |         assert get_image_registry('registry:8012/image') == 'registry:8012'
105 |         assert get_image_registry('registry:8012/user/repo') == 'registry:8012'
106 | 
107 | 
108 | class StatusUrlsTest(TestCase):
109 | 
110 |     def setUp(self):
111 |         self.curdir = os.getcwd()
112 |         self.tmp_dir = tempfile.gettempdir()
113 |         os.chdir(self.tmp_dir)
114 |         self.status_file = os.path.join(self.tmp_dir, STATUS_FILE_LOCATION)
115 |         if os.path.exists(self.status_file):
116 |             os.remove(self.status_file)
117 | 
118 |     def tearDown(self):
119 |         os.chdir(self.curdir)
120 | 
121 |     def test_load_status_url(self):
122 |         self.assertRaises(NotFoundException, load_status_url, 0)
123 |         # try with void file
124 |         open(self.status_file, 'a').close()
125 |         self.assertRaises(BadConfigException, load_status_url, 0)
126 |         # try with data
127 |         with open(self.status_file, 'w') as f:
128 |             f.write('1: http://link1\n2: https://link2\n')
129 |         self.assertRaises(NotFoundException, load_status_url, 0)
130 |         assert load_status_url(1) == 'http://link1'
131 |         assert load_status_url(2) == 'https://link2'
132 | 
133 |     def test_store_status_url(self):
134 |         assert not os.path.exists(self.status_file)
135 |         # create and add first entry
136 |         store_status_url('http://test0', 2)
137 |         assert os.path.exists(self.status_file)
138 |         with open(self.status_file) as f:
139 |             assert f.read() == '0: http://test0\n'
140 |         # add another one
141 |         store_status_url('http://test1', 2)
142 |         with open(self.status_file) as f:
143 |             assert f.read() == '0: http://test0\n1: http://test1\n'
144 |         # replacement
145 |         assert store_status_url('http://test2', 2) == 2
146 |         with open(self.status_file) as f:
147 |             assert f.read() == '1: http://test1\n2: http://test2\n'
148 |         # existing
149 |         assert store_status_url('http://test1', 2) == 1
150 |         with open(self.status_file) as f:
151 |             assert f.read() == '1: http://test1\n2: http://test2\n'
152 | 


--------------------------------------------------------------------------------
/tests/test_migrate_eggs.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import unittest
  3 | from unittest import mock
  4 | 
  5 | import yaml
  6 | from click.testing import CliRunner
  7 | from yaml import CLoader as Loader
  8 | 
  9 | from shub.migrate_eggs import main
 10 | from shub.config import Target
 11 | 
 12 | 
 13 | class MigrateEggsTest(unittest.TestCase):
 14 |     REQ_LIST = [
 15 |         'boto==2.38.0',
 16 |         'dateparser==0.3.3',
 17 |         'decorator==4.0.10',
 18 |         'dicttoxml==1.6.6',
 19 |         'httpretty==0.8.0',
 20 |         'hubstorage==0.16.1',
 21 |         'jdatetime==1.7.2',
 22 |         'six==1.9.0', 'spur==0.3.15',
 23 |         'SQLAlchemy==1.0.5',
 24 |         'sqlitedict==1.3.0',
 25 |         'urllib3==1.11',
 26 |         'wheel==0.24.0',
 27 |         'wsgiref==0.1.2',
 28 |     ]
 29 | 
 30 |     def run(self, *a, **kw):
 31 |         runner = CliRunner()
 32 |         with runner.isolated_filesystem():
 33 |             super().run(*a, **kw)
 34 | 
 35 |     def setUp(self):
 36 |         self.clickm = mock.patch('shub.migrate_eggs.click').start()
 37 |         gtc = mock.patch('shub.migrate_eggs.get_target_conf').start()
 38 |         self.requestsm = mock.patch('shub.migrate_eggs.requests').start()
 39 | 
 40 |         self.curr_dir = os.path.dirname(os.path.realpath(__file__))
 41 | 
 42 |         with open('./scrapinghub.yml', 'w') as f:
 43 |             f.write('')
 44 | 
 45 |         gtc.return_value = Target(
 46 |             project_id=123,
 47 |             endpoint='endpoint1',
 48 |             apikey='apikey1',
 49 |             stack='',
 50 |             image='',
 51 |             requirements_file='',
 52 |             version='',
 53 |             eggs=[],
 54 |         )
 55 | 
 56 |         self.addCleanup(mock.patch.stopall)
 57 | 
 58 |     def walksorted(self):
 59 |         return [
 60 |             (sorted(dirs), sorted(files))
 61 |             for _, dirs, files in os.walk('.')
 62 |         ]
 63 | 
 64 |     def _assert_requirements_content(self):
 65 |         with open('./requirements.txt') as f:
 66 |             content = f.read()
 67 |             self.assertIn('DISABLE_DASH_EGGS', content)
 68 |             requirements = [line for line in content.split('\n') if '==' in line]
 69 |             self.assertListEqual(requirements, self.REQ_LIST)
 70 | 
 71 |     def test_full(self):
 72 |         migrate_zip = os.path.join(self.curr_dir, 'samples/migrate-eggs.zip')
 73 |         with open(migrate_zip, 'rb') as f:
 74 |             self.requestsm.get().content = f.read()
 75 | 
 76 |         main('default')
 77 |         self.clickm.confirm.assert_called_with(
 78 |             'Eggs will be stored in ./eggs, are you sure ? '
 79 |         )
 80 | 
 81 |         files = self.walksorted()
 82 | 
 83 |         self.assertEqual(
 84 |             files[0],
 85 |             (['eggs'], ['requirements.txt', 'scrapinghub.yml']),
 86 |         )
 87 |         self.assertEqual(
 88 |             files[1],
 89 |             ([], ['1.egg', '2.egg', '3.egg'])
 90 |         )
 91 | 
 92 |         with open('./scrapinghub.yml') as f:
 93 |             abc = yaml.load(f, Loader=Loader)
 94 |             eggs = abc['requirements'].pop('eggs')
 95 |             eggs = [e.replace('\\', '/') for e in eggs]
 96 |             self.assertEqual(
 97 |                 eggs,
 98 |                 [
 99 |                     './eggs/1.egg',
100 |                     './eggs/2.egg',
101 |                     './eggs/3.egg',
102 |                 ],
103 |             )
104 |             self.assertDictEqual(
105 |                 abc,
106 |                 {
107 |                     'requirements': {
108 |                         'file': './requirements.txt'
109 |                     },
110 |                 }
111 |             )
112 | 
113 |         self._assert_requirements_content()
114 | 
115 |         for i in range(1, 4):
116 |             i = str(i)
117 |             with open('./eggs/%s.egg' % i) as f:
118 |                 self.assertEqual(f.read().strip(), i)
119 | 
120 |     def test_no_eggs(self):
121 |         file_ = 'samples/migrate-eggs-no-eggs.zip'
122 |         migrate_zip = os.path.join(self.curr_dir, file_)
123 |         with open(migrate_zip, 'rb') as f:
124 |             self.requestsm.get().content = f.read()
125 | 
126 |         main('default')
127 |         self.assertFalse(self.clickm.confirm.called)
128 | 
129 |         files = self.walksorted()
130 |         self.assertListEqual(
131 |             files,
132 |             [([], ['requirements.txt', 'scrapinghub.yml'])]
133 |         )
134 | 
135 |         with open('./scrapinghub.yml') as f:
136 |             abc = yaml.load(f, Loader=Loader)
137 |             self.assertDictEqual(
138 |                 abc,
139 |                 {
140 |                     'requirements': {
141 |                         'file': './requirements.txt'
142 |                     },
143 |                 }
144 |             )
145 | 
146 |         self._assert_requirements_content()
147 | 
148 |     def test_override_reqs_file(self):
149 |         file_ = 'samples/migrate-eggs-no-eggs.zip'
150 |         migrate_zip = os.path.join(self.curr_dir, file_)
151 |         with open(migrate_zip, 'rb') as f:
152 |             self.requestsm.get().content = f.read()
153 |         with open('./requirements.txt', 'w') as f:
154 |             f.write('smth==1.2.3')
155 | 
156 |         self.clickm.confirm.return_value = False
157 |         main('default')
158 |         self.clickm.confirm.assert_called_with(
159 |             'requirements.txt already exists, are you sure to override it ?'
160 |         )
161 | 
162 |         files = self.walksorted()
163 |         self.assertListEqual(
164 |             files,
165 |             [([], ['requirements.txt', 'scrapinghub.yml'])]
166 |         )
167 | 
168 |         with open('./scrapinghub.yml') as f:
169 |             self.assertEqual(f.read(), '')
170 | 
171 |         with open('./requirements.txt') as f:
172 |             content = f.read()
173 |             self.assertEqual(content, 'smth==1.2.3')
174 | 
175 |         self.clickm.reset_mock()
176 |         self.clickm.confirm.return_value = True
177 |         main('default')
178 | 
179 |         self.clickm.confirm.assert_called_with(
180 |             'requirements.txt already exists, are you sure to override it ?'
181 |         )
182 | 
183 |         files = self.walksorted()
184 |         self.assertListEqual(
185 |             files,
186 |             [([], ['requirements.txt', 'scrapinghub.yml'])]
187 |         )
188 | 
189 |         with open('./scrapinghub.yml') as f:
190 |             abc = yaml.load(f, Loader=Loader)
191 |             self.assertDictEqual(
192 |                 abc,
193 |                 {
194 |                     'requirements': {
195 |                         'file': './requirements.txt'
196 |                     },
197 |                 }
198 |             )
199 | 
200 |         self._assert_requirements_content()
201 | 


--------------------------------------------------------------------------------
/shub/image/list.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | import click
  4 | import docker
  5 | import requests
  6 | from urllib.parse import urljoin
  7 | 
  8 | from shub.exceptions import ShubException
  9 | from shub.config import load_shub_config, list_targets_callback
 10 | from shub.image import utils
 11 | 
 12 | 
 13 | SETTING_TYPES = ['project_settings',
 14 |                  'organization_settings',
 15 |                  'enabled_addons']
 16 | 
 17 | SHORT_HELP = 'List spiders.'
 18 | 
 19 | HELP = """
 20 | List command tries to run your image locally and get a spiders list.
 21 | 
 22 | Internally, this command is a simple wrapper to `docker run` and uses
 23 | docker daemon on your system to run a new container using your image.
 24 | Before creating the container, there's a Dash call to get your project
 25 | settings to get your spiders list properly (respecting SPIDERS_MODULE
 26 | setting, etc).
 27 | 
 28 | Image should be set via scrapinghub.yml, section "images". If version is not
 29 | provided, the tool uses VCS-based stamp over project directory (the same as
 30 | shub utils itself).
 31 | """
 32 | 
 33 | 
 34 | @click.command(help=HELP, short_help=SHORT_HELP)
 35 | @click.argument("target", required=False, default="default")
 36 | @click.option("-l", "--list-targets", is_flag=True, is_eager=True,
 37 |               expose_value=False, callback=list_targets_callback,
 38 |               help="List available project names defined in your config")
 39 | @click.option("-d", "--debug", help="debug mode", is_flag=True,
 40 |               callback=utils.deprecate_debug_parameter)
 41 | @click.option("-v", "--verbose", is_flag=True, help="stream logs to console")
 42 | @click.option("-s", "--silent", is_flag=True,
 43 |               help="don't warn if Dash project is not defined in config")
 44 | @click.option("-V", "--version", help="release version")
 45 | def cli(target, debug, verbose, silent, version):
 46 |     list_cmd_full(target, silent, version)
 47 | 
 48 | 
 49 | def list_cmd_full(target, silent, version):
 50 |     config = load_shub_config()
 51 |     image = config.get_image(target)
 52 |     version = version or config.get_version()
 53 |     image_name = utils.format_image_name(image, version)
 54 |     target_conf = config.get_target_conf(target)
 55 |     metadata = list_cmd(image_name,
 56 |                         target_conf.project_id,
 57 |                         target_conf.endpoint,
 58 |                         target_conf.apikey)
 59 |     for spider in metadata.get('spiders', []):
 60 |         click.echo(spider)
 61 | 
 62 | 
 63 | def list_cmd(image_name, project, endpoint, apikey):
 64 |     """Short version of list cmd to use with deploy cmd."""
 65 |     settings = _get_project_settings(project, endpoint, apikey)
 66 |     environment = {'JOB_SETTINGS': json.dumps(settings)}
 67 |     exit_code, logs = _run_cmd_in_docker_container(
 68 |         image_name, 'shub-image-info', environment)
 69 |     if exit_code == 0:
 70 |         return _extract_metadata_from_image_info_output(logs)
 71 |     # shub-image-info command not found, fallback to list-spiders
 72 |     elif exit_code == 127:
 73 |         # FIXME we should pass some value for SCRAPY_PROJECT_ID anyway
 74 |         # to handle `scrapy list` cmd properly via sh_scrapy entrypoint
 75 |         # environment['SCRAPY_PROJECT_ID'] = str(project) if project else ''
 76 |         exit_code, logs = _run_cmd_in_docker_container(
 77 |             image_name, 'list-spiders', environment)
 78 |         if exit_code != 0:
 79 |             click.echo(logs)
 80 |             raise ShubException('Container with list cmd exited with code %s' % exit_code)
 81 |         return {
 82 |             'project_type': 'scrapy',
 83 |             'spiders': utils.valid_spiders(logs.splitlines()),
 84 |         }
 85 |     else:
 86 |         click.echo(logs)
 87 |         raise ShubException(
 88 |             'Container with shub-image-info cmd exited with code %s' % exit_code)
 89 | 
 90 | 
 91 | def _get_project_settings(project, endpoint, apikey):
 92 |     utils.debug_log(f'Getting settings for {project} project:')
 93 |     req = requests.get(
 94 |         urljoin(endpoint, '/api/settings/get.json'),
 95 |         params={'project': project},
 96 |         auth=(apikey, ''),
 97 |         timeout=300,
 98 |         allow_redirects=False
 99 |     )
100 |     req.raise_for_status()
101 |     utils.debug_log(f"Response: {req.json()}")
102 |     return {k: v for k, v in req.json().items() if k in SETTING_TYPES}
103 | 
104 | 
105 | def _run_cmd_in_docker_container(image_name, command, environment):
106 |     """Run a command inside the image container."""
107 |     client = utils.get_docker_client()
108 |     container = client.create_container(
109 |         image=image_name,
110 |         command=[command],
111 |         environment=environment,
112 |     )
113 |     if 'Id' not in container:
114 |         raise ShubException("Create container error:\n %s" % container)
115 |     try:
116 |         client.start(container)
117 |     except docker.errors.APIError as e:
118 |         explanation = utils.ensure_unicode(e.explanation or '')
119 |         if 'executable file not found' in explanation:
120 |             # docker.errors.APIError: 500 Server Error:
121 |             # Internal Server Error ("Cannot start container xxx:
122 |             # [8] System error: exec: "shub-image-info":
123 |             # executable file not found in $PATH")
124 |             return 127, None
125 |         raise
126 |     statuscode = client.wait(container=container['Id'])['StatusCode']
127 |     logs = client.logs(
128 |         container=container['Id'], stream=False, timestamps=False,
129 |         stdout=True, stderr=True if statuscode else False,
130 |     )
131 |     return statuscode, utils.ensure_unicode(logs)
132 | 
133 | 
134 | def _extract_metadata_from_image_info_output(output):
135 |     """Extract and validate spiders list from `shub-image-info` output."""
136 | 
137 |     def raise_shub_image_info_error(error):
138 |         """Helper to raise ShubException with prefix and output"""
139 |         msg = f"shub-image-info: {error} \n[output '{output}']"
140 |         raise ShubException(msg)
141 | 
142 |     try:
143 |         metadata = json.loads(output)
144 |         project_type = metadata.get('project_type')
145 |     except (AttributeError, ValueError):
146 |         raise_shub_image_info_error('output is not a valid JSON dict')
147 |     if not isinstance(project_type, str):
148 |         raise_shub_image_info_error('"project_type" key is required and must be a string')
149 | 
150 |     spiders_list = metadata.get('spiders')
151 |     if not isinstance(spiders_list, list):
152 |         raise_shub_image_info_error('"spiders" key is required and must be a list')
153 |     spiders, scripts = [], []
154 |     for name in spiders_list:
155 |         if not (name and isinstance(name, str)):
156 |             raise_shub_image_info_error("spider name can't be empty or non-string")
157 |         if project_type == 'scrapy' and name.startswith('py:'):
158 |             scripts.append(name[3:])
159 |         else:
160 |             spiders.append(name)
161 |     return {
162 |         'project_type': project_type,
163 |         'spiders': utils.valid_spiders(spiders),
164 |         'scripts': utils.valid_spiders(scripts),
165 |     }
166 | 


--------------------------------------------------------------------------------
/shub/image/init.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import textwrap
  3 | from string import Template
  4 | 
  5 | import click
  6 | 
  7 | from shub import exceptions as shub_exceptions
  8 | from shub import utils as shub_utils
  9 | 
 10 | 
 11 | DOCKER_APP_DIR = '/app'
 12 | DOCKERFILE_TEMPLATE = """\
 13 | FROM $base_image
 14 | $system_deps
 15 | $system_env
 16 | RUN mkdir -p {docker_app_dir}
 17 | WORKDIR {docker_app_dir}
 18 | $requirements
 19 | COPY . {docker_app_dir}
 20 | RUN python -m pip install .
 21 | """.format(docker_app_dir=DOCKER_APP_DIR)
 22 | 
 23 | DEFAULT_BASE_IMAGE = "scrapinghub/scrapinghub-stack-scrapy:2.13-20250721"
 24 | RECOMMENDED_PYTHON_DEPS = [
 25 |     'guppy==0.1.10',
 26 | ]
 27 | 
 28 | SHORT_HELP = "Create Dockerfile for existing Scrapy project."
 29 | 
 30 | HELP = """
 31 | Init command creates a Dockerfile for existing Scrapy project. This tool is for users
 32 | who want to create a custom Docker image and don't have a Dockerfile yet. If generated
 33 | Dockerfile doesn't fit your project feel free to edit it.
 34 | 
 35 | Python packages
 36 | 
 37 | If there's a requirements.txt file in the project directory - it will be added to the
 38 | Dockerfile. Also it's possible to provide a path to requirements file via --requirements
 39 | option. Otherwise new requirements.txt file will be created in the project directory
 40 | with the recommended Python packages. Use --list-recommended-reqs to list them.
 41 | 
 42 | It's recommended to include scrapinghub-entrypoint-scrapy package - it is a
 43 | support layer that passes data from the job to Scrapinghub storage. Otherwise
 44 | you will need to send data to Scrapinghub storage using HTTP API.
 45 | 
 46 | System packages
 47 | 
 48 | You can extend list of system packages installed in the image via --add-deps option.
 49 | """
 50 | 
 51 | 
 52 | def list_recommended_python_reqs(ctx, param, value):
 53 |     """List recommended Python requirements"""
 54 |     if not value:
 55 |         return
 56 |     click.echo("Recommended Python deps list:")
 57 |     for dep in RECOMMENDED_PYTHON_DEPS:
 58 |         click.echo(f'- {dep}')
 59 |     ctx.exit()
 60 | 
 61 | 
 62 | def _deprecate_base_deps_parameter(ctx, param, value):
 63 |     if value:
 64 |         click.echo("WARNING: --base-deps parameter is deprecated. "
 65 |                    "Please use --add-deps parameter instead.",
 66 |                    err=True)
 67 |     return value
 68 | 
 69 | 
 70 | @click.command(help=HELP, short_help=SHORT_HELP)
 71 | @click.option("--list-recommended-reqs", is_flag=True, is_eager=True,
 72 |               expose_value=False, callback=list_recommended_python_reqs,
 73 |               help="list recommended python requirements")
 74 | @click.option("--project", default="default",
 75 |               help="project name to get settings module from scrapy.cfg")
 76 | @click.option("--base-image", default=DEFAULT_BASE_IMAGE,
 77 |               help="base docker image name")
 78 | @click.option("--base-deps", default='',
 79 |               help="[DEPRECATED] a comma-separated list with base system dependencies",
 80 |               callback=_deprecate_base_deps_parameter)
 81 | @click.option("--add-deps",
 82 |               help="a comma-separated list with additional system dependencies")
 83 | @click.option("--requirements", default="requirements.txt",
 84 |               help="path to requirements.txt")
 85 | def cli(project, base_image, base_deps, add_deps, requirements):
 86 |     closest_scrapy_cfg = shub_utils.closest_file('scrapy.cfg')
 87 |     scrapy_config = shub_utils.get_config()
 88 |     if not closest_scrapy_cfg or not scrapy_config.has_option('settings', project):
 89 |         raise shub_exceptions.BadConfigException(
 90 |             'Cannot find Scrapy project settings. Please ensure that current directory '
 91 |             'contains scrapy.cfg with settings section, see example at '
 92 |             'https://doc.scrapy.org/en/latest/topics/commands.html#default-structure-of-scrapy-projects')  # NOQA
 93 |     project_dir = os.path.dirname(closest_scrapy_cfg)
 94 |     dockefile_path = os.path.join(project_dir, 'Dockerfile')
 95 |     if os.path.exists(dockefile_path):
 96 |         raise shub_exceptions.ShubException('Found a Dockerfile in the project directory, aborting')
 97 |     settings_module = scrapy_config.get('settings', 'default')
 98 |     shub_utils.create_default_setup_py(settings=settings_module)
 99 |     values = {
100 |         'base_image':   base_image,
101 |         'system_deps':  _format_system_deps(base_deps, add_deps),
102 |         'system_env':   _format_system_env(settings_module),
103 |         'requirements': _format_requirements(project_dir, requirements),
104 |     }
105 |     values = {key: value if value else '' for key, value in values.items()}
106 |     source = Template(DOCKERFILE_TEMPLATE)
107 |     results = source.substitute(values)
108 |     results = results.replace('\n\n', '\n')
109 |     with open(dockefile_path, 'w') as dockerfile:
110 |         dockerfile.write(results)
111 |     click.echo(f"Dockerfile is saved to {dockefile_path}")
112 | 
113 | 
114 | def _format_system_deps(base_deps, add_deps):
115 |     """Prepare a list with system dependencies install cmds"""
116 |     system_deps = base_deps.split(',') if base_deps != '-' else []
117 |     if add_deps:
118 |         system_add_deps = add_deps.split(',')
119 |         system_deps = list(set(system_deps + system_add_deps))
120 |     system_deps = sorted(filter(None, system_deps))
121 |     if not system_deps:
122 |         return
123 |     commands = ["apt-get update -qq",
124 |                 "apt-get install -qy {}".format(' '.join(system_deps)),
125 |                 "rm -rf /var/lib/apt/lists/*"]
126 |     return 'RUN ' + ' && \\\n    '.join(
127 |         [_wrap(cmd) for cmd in commands])
128 | 
129 | 
130 | def _wrap(text):
131 |     """Wrap dependencies with separator"""
132 |     lines = textwrap.wrap(text, subsequent_indent='    ',
133 |                           break_long_words=False,
134 |                           break_on_hyphens=False)
135 |     return ' \\\n'.join(lines)
136 | 
137 | 
138 | def _format_system_env(settings_module):
139 |     rows = ['ENV TERM xterm']
140 |     if settings_module:
141 |         rows.append('ENV SCRAPY_SETTINGS_MODULE %s' % settings_module)
142 |     return '\n'.join(rows)
143 | 
144 | 
145 | def _format_requirements(project_dir, requirements):
146 |     """Prepare cmds for project requirements"""
147 |     rel_reqs_path = os.path.relpath(
148 |         os.path.join(project_dir, requirements), project_dir)
149 |     if os.path.isfile(rel_reqs_path):
150 |         if rel_reqs_path.startswith('../'):
151 |             raise shub_exceptions.BadParameterException(
152 |                 "Requirements file must be inside your project directory, "
153 |                 "otherwise it will not be included in the Docker build context.")
154 |     else:
155 |         # let's create requirements.txt with base dependencies
156 |         with open(rel_reqs_path, 'w') as reqs_file:
157 |             reqs_file.writelines("%s\n" % line for line in RECOMMENDED_PYTHON_DEPS)
158 |         click.echo('Created base requirements.txt in project dir.')
159 |     rows = [
160 |         f'COPY ./{rel_reqs_path} {DOCKER_APP_DIR}/requirements.txt',
161 |         'RUN pip install --no-cache-dir -r requirements.txt',
162 |     ]
163 |     return '\n'.join(rows)
164 | 


--------------------------------------------------------------------------------
/tests/image/test_build.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | from unittest import mock
  4 | 
  5 | import pytest
  6 | from click.testing import CliRunner
  7 | 
  8 | from shub import exceptions as shub_exceptions
  9 | from shub.image.build import cli
 10 | 
 11 | from ..utils import clean_progress_output, format_expected_progress
 12 | 
 13 | 
 14 | @pytest.fixture
 15 | def test_mock():
 16 |     """Mock for shub image test command"""
 17 |     with mock.patch('shub.image.build.test_cmd') as m:
 18 |         yield m
 19 | 
 20 | 
 21 | def test_cli(docker_client_mock, project_dir, test_mock):
 22 |     docker_client_mock.build.return_value = [
 23 |         {"stream": "all is ok"},
 24 |         {"stream": "Successfully built 12345"}
 25 |     ]
 26 |     runner = CliRunner()
 27 |     result = runner.invoke(cli, ["dev", "-v"])
 28 |     assert result.exit_code == 0
 29 |     docker_client_mock.build.assert_called_with(
 30 |         decode=True,
 31 |         path=project_dir,
 32 |         tag='registry.io/user/project:1.0',
 33 |         dockerfile='Dockerfile',
 34 |         nocache=False,
 35 |         rm=True,
 36 |         buildargs={}
 37 |     )
 38 |     test_mock.assert_called_with("dev", None)
 39 | 
 40 | 
 41 | def test_cli_with_nocache(docker_client_mock, project_dir, test_mock):
 42 |     docker_client_mock.build.return_value = [
 43 |         {"stream": "all is ok"},
 44 |         {"stream": "Successfully built 12345"}
 45 |     ]
 46 |     runner = CliRunner()
 47 |     result = runner.invoke(cli, ["dev", "-v", "--no-cache"])
 48 |     assert result.exit_code == 0
 49 |     docker_client_mock.build.assert_called_with(
 50 |         decode=True,
 51 |         path=project_dir,
 52 |         tag='registry.io/user/project:1.0',
 53 |         dockerfile='Dockerfile',
 54 |         nocache=True,
 55 |         rm=True,
 56 |         buildargs={}
 57 |     )
 58 |     test_mock.assert_called_with("dev", None)
 59 | 
 60 | 
 61 | def test_cli_with_buildargs(docker_client_mock, project_dir, test_mock):
 62 |     docker_client_mock.build.return_value = [
 63 |         {"stream": "all is ok"},
 64 |         {"stream": "Successfully built 12345"}
 65 |     ]
 66 |     runner = CliRunner()
 67 |     result = runner.invoke(cli, ["dev", "-v", "-b", "AWS_KEY=asdasdeg", "-b",
 68 |                                  "AWS_SEC=ashthku", "-b", "PARAM=query=4"])
 69 |     assert result.exit_code == 0
 70 |     docker_client_mock.build.assert_called_with(
 71 |         decode=True,
 72 |         path=project_dir,
 73 |         tag='registry.io/user/project:1.0',
 74 |         dockerfile='Dockerfile',
 75 |         nocache=False,
 76 |         rm=True,
 77 |         buildargs={'AWS_KEY': 'asdasdeg', 'AWS_SEC': 'ashthku', 'PARAM': 'query=4'}
 78 |     )
 79 |     test_mock.assert_called_with("dev", None)
 80 | 
 81 | 
 82 | def test_cli_with_progress(docker_client_mock, project_dir, test_mock):
 83 |     docker_client_mock.build.return_value = [
 84 |         {"stream": "Step 1/3 : FROM some_image"},
 85 |         {"stream": "some internal actions"},
 86 |         {"stream": "Step 2/3 : RUN cmd1"},
 87 |         {"stream": "some other actions"},
 88 |         {"stream": "Step 3/3 : RUN cmd2"},
 89 |         {"stream": "Successfully built 12345"}
 90 |     ]
 91 |     runner = CliRunner()
 92 |     result = runner.invoke(cli, ["dev"])
 93 |     assert result.exit_code == 0
 94 |     expected = format_expected_progress(
 95 |         r'Building registry\.io/user/project:1\.0\.'
 96 |         r'Steps:   0%\| +\| 0/1'
 97 |         r'Steps: 100%\|█+\| 3/3'
 98 |         r'The image registry\.io/user/project:1\.0 build is completed\.'
 99 |     )
100 |     assert re.search(clean_progress_output(result.output), expected)
101 | 
102 | 
103 | def test_cli_custom_version(docker_client_mock, project_dir, test_mock):
104 |     docker_client_mock.build.return_value = [
105 |         {"stream": "all is ok"},
106 |         {"stream": "Successfully built 12345"}
107 |     ]
108 |     runner = CliRunner()
109 |     result = runner.invoke(cli, ["dev", "--version", "test"])
110 |     assert result.exit_code == 0
111 |     docker_client_mock.build.assert_called_with(
112 |         decode=True,
113 |         path=project_dir,
114 |         tag='registry.io/user/project:test',
115 |         dockerfile='Dockerfile',
116 |         nocache=False,
117 |         rm=True,
118 |         buildargs={}
119 |     )
120 |     test_mock.assert_called_with("dev", "test")
121 | 
122 | 
123 | def test_cli_no_dockerfile(docker_client_mock, project_dir):
124 |     docker_client_mock.build.return_value = [
125 |         {"error": "Minor", "errorDetail": "Testing output"},
126 |         {"stream": "Successfully built 12345"}
127 |     ]
128 |     os.remove(os.path.join(project_dir, 'Dockerfile'))
129 |     runner = CliRunner()
130 |     result = runner.invoke(cli, ["dev"])
131 |     assert result.exit_code == shub_exceptions.NotFoundException.exit_code
132 | 
133 | 
134 | @pytest.mark.usefixtures('project_dir')
135 | def test_cli_fail(docker_client_mock):
136 |     docker_client_mock.build.return_value = [
137 |         {"error": "Minor", "errorDetail": "Test"}
138 |     ]
139 |     runner = CliRunner()
140 |     result = runner.invoke(cli, ["dev"])
141 |     assert result.exit_code == shub_exceptions.RemoteErrorException.exit_code
142 | 
143 | 
144 | @pytest.mark.parametrize('skip_tests_flag', ['-S', '--skip-tests'])
145 | def test_cli_skip_tests(docker_client_mock, test_mock, project_dir, skip_tests_flag):
146 |     docker_client_mock.build.return_value = [
147 |         {"stream": "all is ok"},
148 |         {"stream": "Successfully built 12345"}
149 |     ]
150 |     runner = CliRunner()
151 |     result = runner.invoke(cli, ["dev", skip_tests_flag])
152 |     assert result.exit_code == 0
153 |     docker_client_mock.build.assert_called_with(
154 |         decode=True,
155 |         path=project_dir,
156 |         tag='registry.io/user/project:1.0',
157 |         dockerfile='Dockerfile',
158 |         nocache=False,
159 |         rm=True,
160 |         buildargs={}
161 |     )
162 |     assert test_mock.call_count == 0
163 | 
164 | 
165 | @pytest.mark.parametrize('file_param', ['-f', '--file'])
166 | def test_cli_custom_dockerfile(docker_client_mock, project_dir, test_mock, file_param):
167 |     docker_client_mock.build.return_value = [
168 |         {"stream": "all is ok"},
169 |         {"stream": "Successfully built 12345"}
170 |     ]
171 |     runner = CliRunner()
172 |     result = runner.invoke(cli, ["dev", file_param, "Dockerfile"])
173 |     assert result.exit_code == 0
174 |     docker_client_mock.build.assert_called_with(
175 |         decode=True,
176 |         path=project_dir,
177 |         tag='registry.io/user/project:1.0',
178 |         dockerfile='Dockerfile',
179 |         nocache=False,
180 |         rm=True,
181 |         buildargs={}
182 |     )
183 |     test_mock.assert_called_with("dev", None)
184 | 
185 | 
186 | @pytest.mark.usefixtures('project_dir')
187 | @pytest.mark.parametrize('file_param', ['-f', '--file'])
188 | def test_cli_missing_custom_dockerfile(docker_client_mock, file_param):
189 |     docker_client_mock.build.return_value = [
190 |         {"error": "Minor", "errorDetail": "Testing output"},
191 |         {"stream": "Successfully built 12345"}
192 |     ]
193 |     runner = CliRunner()
194 |     result = runner.invoke(cli, ["dev", file_param, "Dockerfile-missing"])
195 |     assert result.exit_code == shub_exceptions.NotFoundException.exit_code
196 | 


--------------------------------------------------------------------------------
/tests/image/test_list.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from unittest import mock
  3 | 
  4 | import docker
  5 | import pytest
  6 | from click.testing import CliRunner
  7 | 
  8 | from shub.exceptions import BadParameterException, ShubException
  9 | from shub.image.list import cli, list_cmd
 10 | from shub.image.list import _run_cmd_in_docker_container
 11 | from shub.image.list import _extract_metadata_from_image_info_output
 12 | 
 13 | 
 14 | def _mock_docker_client(wait_code=0, logs=None):
 15 |     client_mock = mock.Mock()
 16 |     client_mock.create_container.return_value = {'Id': '1234'}
 17 |     client_mock.wait.return_value = {'Error': None, 'StatusCode': wait_code}
 18 |     client_mock.logs.return_value = logs or ''
 19 |     return client_mock
 20 | 
 21 | 
 22 | def _get_settings_mock(settings=None):
 23 |     settings_mock = mock.Mock()
 24 |     settings_mock.json.return_value = settings or {}
 25 |     return settings_mock
 26 | 
 27 | 
 28 | def _convert_str(data, to_binary=False):
 29 |     """Helper to convert str to corresponding string or binary type.
 30 | 
 31 |     `data` has `str` type (in both Python 2/3), the function converts it
 32 |     to corresponding string or binary representation depending on Python
 33 |     version and boolean `to_binary` parameter.
 34 |     """
 35 |     if to_binary:
 36 |         return data.encode('utf-8')
 37 |     return data
 38 | 
 39 | 
 40 | def test_cli_no_scrapinghub_config():
 41 |     result = CliRunner().invoke(cli, ["dev", "-v", "--version", "test"])
 42 |     assert result.exit_code == BadParameterException.exit_code
 43 |     assert 'Could not find target "dev"' in result.output
 44 | 
 45 | 
 46 | @pytest.mark.usefixtures('project_dir')
 47 | @pytest.mark.parametrize('is_binary_logs', [True, False])
 48 | @mock.patch('shub.image.utils.get_docker_client')
 49 | @mock.patch('requests.get')
 50 | def test_cli(requests_get_mock, get_docker_client_mock, is_binary_logs):
 51 |     """Case when shub-image-info succeeded."""
 52 |     requests_get_mock.return_value = _get_settings_mock()
 53 |     mocked_logs = json.dumps({'project_type': 'scrapy',
 54 |                               'spiders': ['abc', 'def']})
 55 |     mocked_logs = _convert_str(mocked_logs, to_binary=is_binary_logs)
 56 |     docker_client = _mock_docker_client(logs=mocked_logs)
 57 |     get_docker_client_mock.return_value = docker_client
 58 |     result = CliRunner().invoke(cli, ["dev", "-v", "-s", "--version", "test"])
 59 |     assert result.exit_code == 0
 60 |     assert result.output.endswith('abc\ndef\n')
 61 |     requests_get_mock.assert_called_with(
 62 |         'https://app.zyte.com/api/settings/get.json',
 63 |         allow_redirects=False, auth=('abcdef', ''),
 64 |         params={'project': 12345}, timeout=300)
 65 | 
 66 | 
 67 | @pytest.mark.usefixtures('project_dir')
 68 | @mock.patch('shub.image.utils.get_docker_client')
 69 | @mock.patch('requests.get')
 70 | def test_cli_image_info_error(requests_get_mock, get_docker_client_mock):
 71 |     """Case when shub-image-info command failed with unknown exit code."""
 72 |     requests_get_mock.return_value = _get_settings_mock()
 73 |     docker_client = _mock_docker_client(wait_code=1, logs='some-error')
 74 |     get_docker_client_mock.return_value = docker_client
 75 |     result = CliRunner().invoke(cli, ["dev", "-v", "--version", "test"])
 76 |     assert result.exit_code == 1
 77 |     assert 'Container with shub-image-info cmd exited with code 1' in result.output
 78 | 
 79 | 
 80 | @pytest.mark.usefixtures('project_dir')
 81 | @mock.patch('shub.image.utils.get_docker_client')
 82 | @mock.patch('requests.get')
 83 | def test_cli_image_info_not_found(requests_get_mock, get_docker_client_mock):
 84 |     """Case when shub-image-info cmd not found with fallback to list-spiders."""
 85 |     requests_get_mock.return_value = _get_settings_mock({'SETTING': 'VALUE'})
 86 |     docker_client = _mock_docker_client()
 87 |     docker_client.wait.side_effect = [
 88 |         {'Error': None, 'StatusCode': 127},
 89 |         {'Error': None, 'StatusCode': 0}
 90 |     ]
 91 |     docker_client.logs.side_effect = ["not-found", "spider1\nspider2\n"]
 92 |     get_docker_client_mock.return_value = docker_client
 93 |     result = CliRunner().invoke(cli, ["dev", "-v", "--version", "test"])
 94 |     assert result.exit_code == 0
 95 |     assert 'spider1\nspider2' in result.output
 96 | 
 97 | 
 98 | @pytest.mark.usefixtures('project_dir')
 99 | @mock.patch('shub.image.utils.get_docker_client')
100 | @mock.patch('requests.get')
101 | def test_cli_both_commands_failed(requests_get_mock, get_docker_client_mock):
102 |     """Case when shub-image-info cmd not found with fallback to list-spiders."""
103 |     requests_get_mock.return_value = _get_settings_mock({'SETTING': 'VALUE'})
104 |     docker_client = _mock_docker_client(wait_code=127, logs='not-found')
105 |     get_docker_client_mock.return_value = docker_client
106 |     result = CliRunner().invoke(cli, ["dev", "-v", "--version", "test"])
107 |     assert result.exit_code == 1
108 |     assert 'Container with list cmd exited with code 127' in result.output
109 | 
110 | 
111 | @mock.patch('shub.image.utils.get_docker_client')
112 | def test_run_cmd_in_docker_container(get_docker_client_mock):
113 |     docker_client = _mock_docker_client(logs='abc\ndef\ndsd')
114 |     get_docker_client_mock.return_value = docker_client
115 |     test_env = {'TEST_ENV1': 'VAL1', 'TEST_ENV2': 'VAL2'}
116 |     result = _run_cmd_in_docker_container('image', 'test-cmd', test_env)
117 |     assert result[0] == 0
118 |     assert result[1] == 'abc\ndef\ndsd'
119 |     docker_client.create_container.assert_called_with(
120 |         command=['test-cmd'], environment=test_env, image='image')
121 |     docker_client.start.assert_called_with({'Id': '1234'})
122 |     docker_client.wait.assert_called_with(container="1234")
123 |     docker_client.logs.assert_called_with(
124 |         container='1234', stderr=False, stdout=True,
125 |         stream=False, timestamps=False)
126 | 
127 | 
128 | @pytest.mark.parametrize('is_binary_explanation', [True, False])
129 | @mock.patch('shub.image.list._get_project_settings', return_value={})
130 | @mock.patch('shub.image.utils.get_docker_client')
131 | def test_shub_image_info_fallback(get_docker_client_mock, _,
132 |                                   is_binary_explanation):
133 |     error_msg = ('Cannot start container xxx: [8] System error: exec:'
134 |                  ' "shub-image-info": executable file not found in $PATH')
135 |     error_msg = _convert_str(error_msg, to_binary=is_binary_explanation)
136 |     exception = docker.errors.APIError(mock.Mock(), mock.Mock(),
137 |                                        explanation=error_msg)
138 |     get_docker_client_mock().create_container.return_value = {'Id': 'id'}
139 |     get_docker_client_mock().start.side_effect = [
140 |         exception,
141 |         None,
142 |     ]
143 |     get_docker_client_mock().wait.return_value = {'Error': None, 'StatusCode': 0}
144 |     get_docker_client_mock().logs.return_value = 'abc\ndef\n'
145 |     result = list_cmd('image_name', 111, 'endpoint', 'apikey')
146 |     assert get_docker_client_mock().start.call_count == 2
147 |     assert result == {'spiders': ['abc', 'def'], 'project_type': 'scrapy'}
148 | 
149 | 
150 | @pytest.mark.parametrize('output,error_msg', [
151 |     ('bad-json', 'output is not a valid JSON dict'),
152 |     (['data'], 'output is not a valid JSON dict'),
153 |     ({'spiders': []}, '"project_type" key is required and must be a string'),
154 |     ({'project_type': 1}, '"project_type" key is required and must be a string'),
155 |     ({'project_type': 'scrapy'}, '"spiders" key is required and must be a list'),
156 |     ({'project_type': 'scrapy', 'spiders': 'bad-data'}, '"spiders" key is required and must be a list'),
157 |     ({'project_type': 'scrapy', 'spiders': ['']}, "spider name can't be empty or non-string"),
158 |     ({'project_type': 'scrapy', 'spiders': [123]}, "spider name can't be empty or non-string"),
159 | ])
160 | def test_extract_metadata_from_image_info_output_failures(output, error_msg):
161 |     with pytest.raises(ShubException) as exc:
162 |         _extract_metadata_from_image_info_output(json.dumps(output))
163 |     assert error_msg in exc.value.message
164 | 


--------------------------------------------------------------------------------
/shub/image/push.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | 
  3 | import click
  4 | 
  5 | from shub import exceptions as shub_exceptions
  6 | from shub.config import load_shub_config, list_targets_callback
  7 | from shub.image import utils
  8 | from shub.image.test import test_cmd
  9 | from shub.image.utils import get_image_registry
 10 | 
 11 | SHORT_HELP = 'Push an image to a specified docker registry'
 12 | 
 13 | HELP = """
 14 | A command to push your image to specified docker registry.
 15 | 
 16 | The command is a simple wrapper for `docker push` command and uses docker
 17 | daemon on your system to build an image. The only differences are that it
 18 | can generate correct image version and provide easy registry login logic.
 19 | 
 20 | The optional params are mostly related with registry authorization.
 21 | By default, the tool tries to call the registry in insecure manner,
 22 | otherwise you have to enter your credentials (at least username/password).
 23 | """
 24 | 
 25 | LOGIN_ERROR_MSG = 'Please authorize with docker login'
 26 | 
 27 | 
 28 | @click.command(help=HELP, short_help=SHORT_HELP)
 29 | @click.argument("target", required=False, default="default")
 30 | @click.option("-l", "--list-targets", is_flag=True, is_eager=True,
 31 |               expose_value=False, callback=list_targets_callback,
 32 |               help="List available project names defined in your config")
 33 | @click.option("-d", "--debug", help="debug mode", is_flag=True,
 34 |               callback=utils.deprecate_debug_parameter)
 35 | @click.option("-v", "--verbose", is_flag=True,
 36 |               help="stream push logs to console")
 37 | @click.option("-V", "--version", help="release version")
 38 | @click.option("--username", help="docker registry name")
 39 | @click.option("--password", help="docker registry password")
 40 | @click.option("--email", help="docker registry email")
 41 | @click.option("--apikey", help="SH apikey to use built-in registry")
 42 | @click.option("--insecure", is_flag=True, help="use insecure registry")
 43 | @click.option("-S", "--skip-tests", help="skip testing image", is_flag=True)
 44 | @click.option("-R", "--reauth", is_flag=True,
 45 |               help="re-authenticate to registry")
 46 | def cli(target, debug, verbose, version, username, password, email, apikey,
 47 |         insecure, skip_tests, reauth):
 48 |     push_cmd(target, version, username, password, email, apikey, insecure,
 49 |              skip_tests, reauth)
 50 | 
 51 | 
 52 | def push_cmd(target, version, username, password, email, apikey, insecure,
 53 |              skip_tests, reauth):
 54 |     # Test the image content after building it
 55 |     if not skip_tests:
 56 |         test_cmd(target, version)
 57 | 
 58 |     client = utils.get_docker_client()
 59 |     config = load_shub_config()
 60 |     image = config.get_image(target)
 61 |     username, password = utils.get_credentials(
 62 |         username=username, password=password, insecure=insecure,
 63 |         apikey=apikey, target_apikey=config.get_apikey(target))
 64 | 
 65 |     if username:
 66 |         _execute_push_login(
 67 |             client, image, username, password, email, reauth)
 68 |     image_name = utils.format_image_name(image, version)
 69 |     click.echo(f"Pushing {image_name} to the registry.")
 70 |     events = client.push(image_name, stream=True, decode=True)
 71 |     if utils.is_verbose():
 72 |         push_progress_cls = _LoggedPushProgress
 73 |     else:
 74 |         push_progress_cls = _PushProgress
 75 |     push_progress = push_progress_cls(events)
 76 |     push_progress.show()
 77 |     click.echo(f"The image {image_name} pushed successfully.")
 78 | 
 79 | 
 80 | def _execute_push_login(client, image, username, password, email, reauth):
 81 |     """Login if there're provided credentials for the registry"""
 82 |     registry = get_image_registry(image)
 83 |     resp = client.login(username=username, password=password,
 84 |                         email=email, registry=registry, reauth=reauth)
 85 |     if not (isinstance(resp, dict) and 'username' in resp or
 86 |             ('Status' in resp and resp['Status'] == 'Login Succeeded')):
 87 |         raise shub_exceptions.RemoteErrorException(
 88 |             "Docker registry login error.")
 89 |     click.echo(f"Login to {registry} succeeded.")
 90 | 
 91 | 
 92 | class _LoggedPushProgress(utils.BaseProgress):
 93 |     """Visualize push progress in verbose mode.
 94 | 
 95 |     Output all the events received from the docker daemon.
 96 |     """
 97 |     def handle_event(self, event):
 98 |         if 'error' in event and LOGIN_ERROR_MSG in event['error']:
 99 |             click.echo(
100 |                "Something went wrong when trying to authenticate to Docker "
101 |                "registry when pushing the image. Please ensure your "
102 |                "credentials are correct and try again with --reauth flag.")
103 |             raise shub_exceptions.RemoteErrorException(
104 |                 "Docker registry authentication error")
105 |         super().handle_event(event)
106 |         if 'status' in event:
107 |             self.handle_status_event(event)
108 | 
109 |     def handle_status_event(self, event):
110 |         msg = "Logs:{} {}".format(event['status'], event.get('progress'))
111 |         utils.debug_log(msg)
112 | 
113 | 
114 | class _PushProgress(_LoggedPushProgress):
115 |     """Visualize push progress in non-verbose mode.
116 | 
117 |     Show total progress bar and separate bar for each pushed layer.
118 |     """
119 | 
120 |     def __init__(self, push_events):
121 |         super().__init__(push_events)
122 |         # Total bar repesents total progress in terms of amount of layers.
123 |         self.total_bar = self._create_total_bar()
124 |         self.layers = set()
125 |         # XXX: has to be OrderedDict to make tqdm.write/click.echo work as expected.
126 |         # Otherwise it writes at random position, usually in the middle of the progress bars.
127 |         self.layers_bars = OrderedDict()
128 | 
129 |     def handle_status_event(self, event):
130 |         layer_id = event.get('id')
131 |         status = event.get('status')
132 |         progress = event.get('progressDetail')
133 |         # `preparing` events are correlated with amount of layers to push
134 |         if status in ('Preparing', 'Waiting'):
135 |             self._add_layer(layer_id)
136 |         # the events are final and used to update total bar once per layer
137 |         elif status in ('Layer already exists', 'Pushed'):
138 |             self._add_layer(layer_id)
139 |             self.total_bar.update()
140 |         # `pushing` events represents actual push process per layer
141 |         elif event.get('status') == 'Pushing' and progress:
142 |             progress_current = progress.get('current', 0)
143 |             progress_total = max(progress.get('total', 0), progress_current)
144 |             if layer_id not in self.layers_bars:
145 |                 if not progress_total:
146 |                     return
147 |                 # create a progress bar per pushed layer
148 |                 self.layers_bars[layer_id] = self._create_bar_per_layer(
149 |                     layer_id, progress_total, progress_current)
150 |             bar = self.layers_bars[layer_id]
151 |             bar.total = max(bar.total, progress_total)
152 |             bar.update(max(progress_current - bar.n, 0))
153 | 
154 |     def _add_layer(self, layer_id):
155 |         self.layers.add(layer_id)
156 |         self.total_bar.total = max(self.total_bar.total, len(self.layers))
157 |         self.total_bar.refresh()
158 | 
159 |     def show(self):
160 |         super().show()
161 |         self.total_bar.close()
162 |         for bar in self.layers_bars.values():
163 |             bar.close()
164 | 
165 |     def _create_total_bar(self):
166 |         return utils.create_progress_bar(
167 |             total=1,
168 |             desc='Layers',
169 |             # don't need rate here, let's simplify the bar
170 |             bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}'
171 |         )
172 | 
173 |     def _create_bar_per_layer(self, layer_id, total, initial):
174 |         return utils.create_progress_bar(
175 |             desc=layer_id,
176 |             total=total,
177 |             initial=initial,
178 |             unit='B',
179 |             unit_scale=True,
180 |             # don't need estimates here, keep only rate
181 |             bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{rate_fmt}]',
182 |         )
183 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  applehelp  to make an Apple Help Book"
 34 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 35 | 	@echo "  epub       to make an epub"
 36 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 37 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 38 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 39 | 	@echo "  text       to make text files"
 40 | 	@echo "  man        to make manual pages"
 41 | 	@echo "  texinfo    to make Texinfo files"
 42 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 43 | 	@echo "  gettext    to make PO message catalogs"
 44 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 45 | 	@echo "  xml        to make Docutils-native XML files"
 46 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 47 | 	@echo "  linkcheck  to check all external links for integrity"
 48 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 49 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 50 | 
 51 | clean:
 52 | 	rm -rf $(BUILDDIR)/*
 53 | 
 54 | html:
 55 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 58 | 
 59 | livehtml:
 60 | 	sphinx-autobuild -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 61 | 
 62 | dirhtml:
 63 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 66 | 
 67 | singlehtml:
 68 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 69 | 	@echo
 70 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 71 | 
 72 | pickle:
 73 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the pickle files."
 76 | 
 77 | json:
 78 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 79 | 	@echo
 80 | 	@echo "Build finished; now you can process the JSON files."
 81 | 
 82 | htmlhelp:
 83 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 84 | 	@echo
 85 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 86 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 87 | 
 88 | qthelp:
 89 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 90 | 	@echo
 91 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 92 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 93 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/shub.image.qhcp"
 94 | 	@echo "To view the help file:"
 95 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/shub.image.qhc"
 96 | 
 97 | applehelp:
 98 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
 99 | 	@echo
100 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
101 | 	@echo "N.B. You won't be able to view it unless you put it in" \
102 | 	      "~/Library/Documentation/Help or install it in your application" \
103 | 	      "bundle."
104 | 
105 | devhelp:
106 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
107 | 	@echo
108 | 	@echo "Build finished."
109 | 	@echo "To view the help file:"
110 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/shub.image"
111 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/shub.image"
112 | 	@echo "# devhelp"
113 | 
114 | epub:
115 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
116 | 	@echo
117 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
118 | 
119 | latex:
120 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | 	@echo
122 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
123 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
124 | 	      "(use \`make latexpdf' here to do that automatically)."
125 | 
126 | latexpdf:
127 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
128 | 	@echo "Running LaTeX files through pdflatex..."
129 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
130 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
131 | 
132 | latexpdfja:
133 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
134 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
135 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
136 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
137 | 
138 | text:
139 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
140 | 	@echo
141 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
142 | 
143 | man:
144 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
145 | 	@echo
146 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
147 | 
148 | texinfo:
149 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
150 | 	@echo
151 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
152 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
153 | 	      "(use \`make info' here to do that automatically)."
154 | 
155 | info:
156 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
157 | 	@echo "Running Texinfo files through makeinfo..."
158 | 	make -C $(BUILDDIR)/texinfo info
159 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
160 | 
161 | gettext:
162 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
163 | 	@echo
164 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
165 | 
166 | changes:
167 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
168 | 	@echo
169 | 	@echo "The overview file is in $(BUILDDIR)/changes."
170 | 
171 | linkcheck:
172 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
173 | 	@echo
174 | 	@echo "Link check complete; look for any errors in the above output " \
175 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
176 | 
177 | doctest:
178 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
179 | 	@echo "Testing of doctests in the sources finished, look at the " \
180 | 	      "results in $(BUILDDIR)/doctest/output.txt."
181 | 
182 | coverage:
183 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
184 | 	@echo "Testing of coverage in the sources finished, look at the " \
185 | 	      "results in $(BUILDDIR)/coverage/python.txt."
186 | 
187 | xml:
188 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
189 | 	@echo
190 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
191 | 
192 | pseudoxml:
193 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
194 | 	@echo
195 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
196 | 


--------------------------------------------------------------------------------