├── deployer
├── __init__.py
├── exceptions.py
├── constants.py
├── utils.py
├── main.py
└── upload.py
├── .gitignore
├── setup.cfg
├── pypi-release.sh
├── .therapist.yml
├── .env-dist
├── setup.py
├── README.md
└── LICENSE
/deployer/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | stumptown_deployer.egg-info
2 | build/
3 | dist/
4 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal = 1
3 |
4 | [flake8]
5 | max-line-length = 88
6 |
--------------------------------------------------------------------------------
/pypi-release.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -eo pipefail
3 |
4 | pip install twine
5 | # From https://pypi.org/project/twine/
6 | rm -fr dist/
7 | python setup.py sdist bdist_wheel
8 | twine upload dist/*
9 |
--------------------------------------------------------------------------------
/.therapist.yml:
--------------------------------------------------------------------------------
1 | actions:
2 | black:
3 | run: black --check --diff {files}
4 | fix: black {files}
5 | include: "*.py"
6 |
7 | flake8:
8 | run: flake8 {files}
9 | include: "*.py"
10 |
--------------------------------------------------------------------------------
/deployer/exceptions.py:
--------------------------------------------------------------------------------
1 | class CoreException(Exception):
2 | """Exists for the benefit of making the cli easier to catch exceptions."""
3 |
4 |
5 | class NoGitDirectory(CoreException):
6 | """When trying to find a/the git directory and failing."""
7 |
--------------------------------------------------------------------------------
/.env-dist:
--------------------------------------------------------------------------------
1 | # If you're hacking on this locally and want to test the whole thing
2 | # outside of Mozilla's infra, this setting you might want to change to
3 | # something unique to you.
4 | #DEPLOYER_DEFAULT_BUCKET=yari
5 |
6 | #AWS_PROFILE=default
7 |
8 | #S3_DEFAULT_BUCKET_LOCATION=us-east-2
9 |
--------------------------------------------------------------------------------
/deployer/constants.py:
--------------------------------------------------------------------------------
1 | import json
2 | import sys
3 | import os
4 |
5 | from decouple import AutoConfig
6 |
7 | config = AutoConfig(os.curdir)
8 |
9 | DEFAULT_BUCKET = config("DEPLOYER_DEFAULT_BUCKET", "yari")
10 |
11 | DEFAULT_NAME_PATTERN = config(
12 | "DEPLOYER_DEFAULT_NAME_PATTERN", "{username}-{branchname}"
13 | )
14 |
15 | AWS_PROFILE = config("AWS_PROFILE", default="default")
16 |
17 | # E.g. us-east-1
18 | S3_DEFAULT_BUCKET_LOCATION = config("S3_DEFAULT_BUCKET_LOCATION", default="")
19 |
20 | # When uploading a bunch of files, the work is done in a thread pool.
21 | # If you use too many "workers" it might saturate your network meaning it's
22 | # slower.
23 | MAX_WORKERS_PARALLEL_UPLOADS = config(
24 | "DEPLOYER_MAX_WORKERS_PARALLEL_UPLOADS", default=50, cast=int
25 | )
26 |
27 | # E.g. /en-US/docs/Foo/Bar/index.html
28 | DEFAULT_CACHE_CONTROL = config(
29 | "DEPLOYER_DEFAULT_CACHE_CONTROL", default=60 * 60, cast=int
30 | )
31 | # E.g. '2.02b14290.chunk.css'
32 | HASHED_CACHE_CONTROL = config(
33 | "DEPLOYER_HASHED_CACHE_CONTROL", default=60 * 60 * 24 * 365, cast=int
34 | )
35 |
36 |
37 | DEFAULT_NO_PROGRESS_BAR = config(
38 | "NO_PROGRESS_BAR",
39 | cast=bool,
40 | default=not sys.stdout.isatty() or bool(json.loads(os.environ.get("CI", "0"))),
41 | )
42 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from os import path
2 |
3 | from setuptools import find_packages, setup
4 |
5 | _here = path.dirname(__file__)
6 |
7 |
8 | dev_requirements = ["black==19.3b0", "flake8==3.7.8", "therapist"]
9 |
10 | setup(
11 | name="stumptown-deployer",
12 | version="0.2.6",
13 | author="Mozilla MDN",
14 | url="https://github.com/mdn/stumptown-deployer",
15 | description="Deploying static Stumptown sites",
16 | long_description=open(path.join(_here, "README.md")).read(),
17 | long_description_content_type="text/markdown",
18 | license="MPL 2.0",
19 | classifiers=[
20 | "Programming Language :: Python",
21 | "Programming Language :: Python :: 3",
22 | "Programming Language :: Python :: Implementation :: CPython",
23 | "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
24 | ],
25 | packages=find_packages(),
26 | include_package_data=True,
27 | zip_safe=False,
28 | install_requires=["boto3", "click", "PyGithub", "GitPython", "python-decouple"],
29 | extras_require={"dev": dev_requirements},
30 | entry_points="""
31 | [console_scripts]
32 | stumptown-deployer=deployer.main:cli
33 | """,
34 | setup_requires=[],
35 | tests_require=["pytest"],
36 | keywords="git github s3 boto3 stumptown mdn",
37 | )
38 |
--------------------------------------------------------------------------------
/deployer/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | from pathlib import Path
3 |
4 | import click
5 |
6 |
7 | def error(*msg):
8 | msg = " ".join([str(x) for x in msg])
9 | click.echo(click.style(msg, fg="red"))
10 |
11 |
12 | def warning(*msg):
13 | msg = " ".join([str(x) for x in msg])
14 | click.echo(click.style(msg, fg="yellow"))
15 |
16 |
17 | def info(*msg):
18 | msg = " ".join([str(x) for x in msg])
19 | click.echo(click.style(msg))
20 |
21 |
22 | def success(*msg):
23 | msg = " ".join([str(x) for x in msg])
24 | click.echo(click.style(msg, fg="green"))
25 |
26 |
27 | def ppath(path: Path, current_dir=None):
28 | current_dir = current_dir or Path(os.curdir)
29 | p = Path(path)
30 | try:
31 | return p.relative_to(current_dir)
32 | except ValueError:
33 | # FIXME: Would be nice if it could produce something like ../../other/dir
34 | return path
35 |
36 |
37 | def is_junk_file(file_path: Path):
38 | if file_path.name == ".DS_Store":
39 | return True
40 | if file_path.name.endswith("~"):
41 | return True
42 | return False
43 |
44 |
45 | def fmt_size(bytes_):
46 | if bytes_ > 1024 * 1024:
47 | return f"{bytes_ / 1024 / 1024:.1f}MB"
48 | if bytes_ > 1024:
49 | return f"{bytes_ / 1024:.1f}KB"
50 | return f"{int(bytes_)}B"
51 |
52 |
53 | def fmt_seconds(seconds):
54 | if seconds < 1:
55 | return f"{seconds * 1000:.1f}ms"
56 | if seconds >= 60 * 60:
57 | minutes = seconds / 60
58 | return f"{int(minutes) // 60}h{int(minutes) % 60}m"
59 | if seconds >= 60:
60 | return f"{int(seconds) // 60}m{int(seconds) % 60}s"
61 | return f"{seconds:.1f}s"
62 |
--------------------------------------------------------------------------------
/deployer/main.py:
--------------------------------------------------------------------------------
1 | import functools
2 | import pkg_resources
3 | from pathlib import Path
4 |
5 | import click
6 |
7 | from .constants import (
8 | DEFAULT_NAME_PATTERN,
9 | DEFAULT_BUCKET,
10 | S3_DEFAULT_BUCKET_LOCATION,
11 | DEFAULT_NO_PROGRESS_BAR,
12 | )
13 | from .exceptions import CoreException
14 | from .upload import upload_site
15 | from .utils import error, info
16 |
17 |
18 | def cli_wrap(fn):
19 | @functools.wraps(fn)
20 | def inner(*args, **kwargs):
21 | try:
22 | fn(*args, **kwargs)
23 | except CoreException as exception:
24 | info(exception.__class__.__name__)
25 | error(str(exception))
26 | raise click.Abort
27 |
28 | return inner
29 |
30 |
31 | @click.group()
32 | @click.option("--debug/--no-debug", default=False)
33 | @click.pass_context
34 | def cli(ctx, debug):
35 | ctx.ensure_object(dict)
36 | ctx.obj["debug"] = debug
37 |
38 |
39 | @cli.command()
40 | @click.pass_context
41 | @cli_wrap
42 | @click.option(
43 | "--bucket",
44 | default=DEFAULT_BUCKET,
45 | help=f"Name of the bucket (default {DEFAULT_BUCKET!r})",
46 | )
47 | @click.option(
48 | "--name", default=None, help=f"Name of the site (default {DEFAULT_NAME_PATTERN!r})"
49 | )
50 | @click.option(
51 | "--bucket-location",
52 | default=S3_DEFAULT_BUCKET_LOCATION,
53 | help=f"Name of the bucket (default {S3_DEFAULT_BUCKET_LOCATION!r})",
54 | )
55 | @click.option(
56 | "--refresh",
57 | default=False,
58 | help="Ignores checking if files exist already",
59 | show_default=True,
60 | is_flag=True,
61 | )
62 | @click.option(
63 | "--bucket-lifecycle-days",
64 | required=False,
65 | type=int,
66 | help=(
67 | "If specified, the number of days until uploaded objects are deleted. "
68 | "(Only applicable when buckets are created!)"
69 | ),
70 | )
71 | @click.option(
72 | "--dry-run",
73 | default=False,
74 | help="No actual uploading",
75 | show_default=True,
76 | is_flag=True,
77 | )
78 | @click.option(
79 | "--no-progress-bar",
80 | default=DEFAULT_NO_PROGRESS_BAR,
81 | help="Don't use an iteractive progress bar",
82 | show_default=True,
83 | is_flag=True,
84 | )
85 | @click.argument("directory", type=click.Path())
86 | def upload(ctx, directory, **kwargs):
87 | p = Path(directory)
88 | if not p.exists():
89 | error(f"{directory} does not exist")
90 | raise click.Abort
91 |
92 | ctx.obj.update(kwargs)
93 | upload_site(directory, ctx.obj)
94 |
95 |
96 | @cli.command()
97 | @click.pass_context
98 | def version(ctx):
99 | info(pkg_resources.get_distribution("stumptown-deployer").version)
100 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # stumptown-deployer
2 |
3 | Ship a Stumptown static site for web hosting.
4 |
5 | Don't tell anyone, but for now it's all AWS as the backend but that's an
6 | implementation detail that shouldn't prevent us from one day moving to Google Cloud
7 | Platform or Azure or Fastly.
8 |
9 | ## Limitations and caveats
10 |
11 | - Redirects - in the build directory we're supposed to have `/en-us/_redirects.txt`
12 |
13 | - Preferred names - file systems might not be allowed to call a folder a certain thing
14 | but that's not necessarily what we want the key to be called in S3.
15 |
16 | - GitHub integration
17 |
18 | ## How it works
19 |
20 | This project's goal is ultimately to take a big directory of files and upload them to
21 | S3. But there are some more advanced features so as turning `_redirects.txt` files
22 | into S3 redirect keys. And there might be file system names that don't match exactly
23 | what we need the S3 key to be called exactly. Also, the directory is bound to contain
24 | "junk" that should be omitted. For example, Yari produces `index.hash` files which
25 | are used to remember the checksum when it built the `index.html`.
26 |
27 | All deployments, generally, all go into the one same S3 bucket. But in that bucket
28 | you always have a "prefix" (aka. a root folder) which gets used by CloudFront so you
29 | can have N CloudFront distributions for 1 S3 bucket. For example, one prefix might
30 | be called `master` which'll be the production site. Another prefix might be
31 | `peterbe-pr12345`.
32 |
33 | So every deployment has a prefix (aka. the "name") which can be automatically
34 | generated based on the name of the current branch, which'd be known to something
35 | like TravisCI. The first thing it does is that it downloads a complete listing of
36 | every known key in the bucket under that prefix and each key's size. (That's all
37 | you get from `bucket.list_objects_v2`). Now, it starts to walk the local directory
38 | and for each _file_ it applies the following logic:
39 |
40 | - Does it S3 key _not_ exist at all? --> Upload brand new S3 key!
41 | - Does the S3 key _exist_?
42 | - Is the file size different from the S3 key size? --> Upload changed S3 key!
43 | - Is the file size exactly the same as the S3 key size? --> Download the
44 | S3 key's `Metadata->filehash`.
45 | - Is the hash exactly the same as the file's hash? --> Do nothing!
46 | - Is the hash different? --> Upload changed S3 key!
47 |
48 | When it uploads an S3 key, _always_ compute the local file's hash and include that
49 | as a piece of S3 key Metadata.
50 |
51 | ## Getting started
52 |
53 | You can install it globally or in a virtualen environment. Whatever floats
54 | float fancy.
55 |
56 | pip install stumptown-deployer
57 | stumptown-deployer --help
58 |
59 | Please refer to the [`boto3` documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration) with regards to configuring AWS access
60 | credentials.
61 |
62 | ## Goal
63 |
64 | To be dead-easy to use and powerful at the same time.
65 |
66 | ## Contributing
67 |
68 | Clone this repo then run:
69 |
70 | pip install -e ".[dev]"
71 |
72 | That should have installed the CLI `stumptown-deployer`
73 |
74 | stumptown-deployer --help
75 |
76 | If you wanna make a PR, make sure it's formatted with `black` and passes `flake8`.
77 |
78 | You can check that all files are `flake8` fine by running:
79 |
80 | flake8 deployer
81 |
82 | And to check that all files are formatted according to `black` run:
83 |
84 | black --check deployer
85 |
86 | All of the code style stuff can be simplified by installing `therapist`. It should
87 | get installed by default, but setting it up as a `git` `pre-commit` hook is optional.
88 | Here's how you set it up once:
89 |
90 | therapist install
91 |
92 | Now, next time you try to commit a `.py` file with a `black` or `flake8` violation
93 | it will remind you and block the commit. You can override it like this:
94 |
95 | git commit -a -m "I know what I'm doing"
96 |
97 | To run _all_ code style and lint checkers you can also use `therapist` with:
98 |
99 | therapist run --use-tracked-files
100 |
101 | Some things can't be automatically fixed, but `black` violations can for example:
102 |
103 | therapist run --use-tracked-files --fix
104 |
105 | ## Contributing and using
106 |
107 | If you like to use the globally installed executable `stumptown-deployer`
108 | but don't want to depend on a new PyPI release for every change you want
109 | to try, use this:
110 |
111 | # If you use a virtualenv, deactivate it first
112 | deactive
113 | # Use the global pip (or pip3) on your system
114 | pip3 install -e .
115 |
116 | If you do this, you can use this repo to install in your system.
117 |
--------------------------------------------------------------------------------
/deployer/upload.py:
--------------------------------------------------------------------------------
1 | import concurrent.futures
2 | import datetime
3 | import getpass
4 | import hashlib
5 | import mimetypes
6 | import shutil
7 | import os
8 | import re
9 | import time
10 | from dataclasses import dataclass
11 | from pathlib import Path
12 |
13 | import boto3
14 | import git
15 | from boto3.s3.transfer import TransferConfig
16 | from botocore.exceptions import ClientError
17 | from git.exc import InvalidGitRepositoryError
18 |
19 | from .constants import (
20 | AWS_PROFILE,
21 | DEFAULT_CACHE_CONTROL,
22 | DEFAULT_NAME_PATTERN,
23 | HASHED_CACHE_CONTROL,
24 | MAX_WORKERS_PARALLEL_UPLOADS,
25 | )
26 | from .exceptions import NoGitDirectory, CantDryRunError
27 | from .utils import fmt_seconds, fmt_size, info, is_junk_file, ppath, success, warning
28 |
29 | hashed_filename_regex = re.compile(r"\.[a-f0-9]{8,32}\.")
30 |
31 |
32 | def _find_git_repo(start):
33 | if str(start) == str(start.root):
34 | raise NoGitDirectory
35 | try:
36 | return git.Repo(start)
37 | except InvalidGitRepositoryError:
38 | return _find_git_repo(Path(start).parent)
39 |
40 |
41 | def _has_hashed_filename(fn):
42 | return hashed_filename_regex.findall(os.path.basename(fn))
43 |
44 |
45 | @dataclass()
46 | class UploadTask:
47 | """All the relevant information for doing an upload"""
48 |
49 | key: str
50 | file_path: Path
51 | size: int
52 | file_hash: str
53 | needs_hash_check: bool
54 |
55 | def __repr__(self):
56 | return repr(self.key)
57 |
58 | def set_file_hash(self):
59 | with open(self.file_path, "rb") as f:
60 | self.file_hash = hashlib.md5(f.read()).hexdigest()
61 |
62 |
63 | def upload_site(directory, config):
64 | if isinstance(directory, str):
65 | directory = Path(directory)
66 | if not config.get("name"):
67 | try:
68 | repo = _find_git_repo(directory)
69 | except NoGitDirectory:
70 | raise NoGitDirectory(
71 | f"From {directory} can't find its git root directory "
72 | "which is needed to supply a default branchname."
73 | )
74 | active_branch = repo.active_branch
75 | if active_branch == "master" and config["lifecycle_days"]:
76 | warning(
77 | f"Warning! You're setting a lifecycle_days "
78 | f"({config['lifecycle_days']} days) on a build from a 'master' repo."
79 | )
80 | config["name"] = DEFAULT_NAME_PATTERN.format(
81 | username=getpass.getuser(),
82 | branchname=active_branch.name,
83 | date=datetime.datetime.utcnow().strftime("%Y%m%d"),
84 | )
85 | if not config.replace("-", "").strip():
86 | raise ValueError("Empty prefix name")
87 | info(
88 | f"About to upload {ppath(directory)} to prefix {config['name']!r} "
89 | f"into bucket {config['bucket']!r}"
90 | )
91 |
92 | session = boto3.Session(profile_name=AWS_PROFILE)
93 | s3 = session.client("s3")
94 |
95 | # First make sure the bucket exists
96 | try:
97 | s3.head_bucket(Bucket=config["bucket"])
98 | info(f"Bucket {config['bucket']!r} exists")
99 | except ClientError as error:
100 | # If a client error is thrown, then check that it was a 404 error.
101 | # If it was a 404 error, then the bucket does not exist.
102 | if error.response["Error"]["Code"] != "404":
103 | print(error.response)
104 | raise
105 |
106 | # Needs to be created.
107 | bucket_config = {}
108 | if config["bucket_location"]:
109 | bucket_config["LocationConstraint"] = config["bucket_location"]
110 | if config["dry_run"]:
111 | raise CantDryRunError(
112 | f"The bucket ({config['bucket']} doesn't exist and won't be created "
113 | "in dry-run mode. But it needs to exist to be able to find out "
114 | "what files already exist."
115 | )
116 | s3.create_bucket(
117 | Bucket=config["bucket"],
118 | ACL="public-read",
119 | CreateBucketConfiguration=bucket_config,
120 | )
121 | info(f"Bucket {config['bucket']!r} created")
122 |
123 | if config["bucket_lifecycle_days"]:
124 | # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.put_bucket_lifecycle_configuration
125 | # https://docs.aws.amazon.com/code-samples/latest/catalog/python-s3-put_bucket_lifecyle_configuration.py.html
126 | s3.put_bucket_lifecycle_configuration(
127 | Bucket=config["bucket"],
128 | LifecycleConfiguration={
129 | "Rules": [
130 | {
131 | "Expiration": {"Days": config["bucket_lifecycle_days"]},
132 | "Filter": {"Prefix": ""},
133 | "Status": "Enabled",
134 | }
135 | ]
136 | },
137 | )
138 | info(
139 | f"Bucket lifecycle expiration of "
140 | f"{config['bucket_lifecycle_days']!r} days configured."
141 | )
142 |
143 | try:
144 | website_bucket = s3.get_bucket_website(Bucket=config["bucket"])
145 | except ClientError as error:
146 | if error.response["Error"]["Code"] != "NoSuchWebsiteConfiguration":
147 | raise
148 | # Define the website configuration
149 | website_configuration = {
150 | "ErrorDocument": {"Key": "404.html"},
151 | "IndexDocument": {"Suffix": "index.html"},
152 | "RoutingRules": [
153 | {
154 | "Condition": {"KeyPrefixEquals": "/"},
155 | "Redirect": {"ReplaceKeyWith": "index.html"},
156 | }
157 | ],
158 | }
159 | website_bucket = s3.put_bucket_website(
160 | Bucket=config["bucket"], WebsiteConfiguration=website_configuration
161 | )
162 | info(f"Created bucket website configuration for {config['bucket']!r}")
163 |
164 | if config["debug"]:
165 | info(f"Website bucket: {website_bucket!r}")
166 |
167 | uploaded_already = {}
168 |
169 | if config["refresh"]:
170 | info("Refresh, so ignoring what was previously uploaded.")
171 | else:
172 | info(
173 | f"Gather complete list of existing uploads under prefix "
174 | f"{config['name']!r}..."
175 | )
176 | t0 = time.time()
177 | continuation_token = None
178 | while True:
179 | # Have to do this so that 'ContinuationToken' can be omitted if falsy
180 | list_kwargs = dict(Bucket=config["bucket"], Prefix=config["name"])
181 | if continuation_token:
182 | list_kwargs["ContinuationToken"] = continuation_token
183 | response = s3.list_objects_v2(**list_kwargs)
184 | for obj in response.get("Contents", []):
185 | uploaded_already[obj["Key"]] = obj
186 | if response["IsTruncated"]:
187 | continuation_token = response["NextContinuationToken"]
188 | else:
189 | break
190 | t1 = time.time()
191 |
192 | warning(
193 | f"{len(uploaded_already):,} files already uploaded "
194 | f"(took {fmt_seconds(t1 - t0)})."
195 | )
196 |
197 | total_todo = 0
198 | t0 = time.time()
199 | for fp in pwalk(directory):
200 | if is_junk_file(fp):
201 | continue
202 | if fp.name.startswith("_"):
203 | continue
204 | total_todo += 1
205 | t1 = time.time()
206 | warning(
207 | f"{total_todo:,} files to be (maybe) uploaded "
208 | f"(took {fmt_seconds(t1 - t0)})."
209 | )
210 |
211 | transfer_config = TransferConfig()
212 |
213 | # Number of files that don't need to be uploaded because they are already uploaded
214 | # with a difference.
215 | skipped = 0
216 |
217 | # Number of files we deliberate chose to NOT upload. Or even attempt to.
218 | ignored = 0
219 |
220 | # Use this pattern in case there's a file without extension.
221 | # for fp in directory.glob("**/*"):
222 | # if fp.is_dir():
223 | # # E.g. /pl/Web/API/docs/WindowBase64.atob/ which is
224 | # continue
225 | counts = {"uploaded": 0, "not_uploaded": 0}
226 |
227 | total_size = []
228 | total_time = []
229 |
230 | def update_uploaded_stats(stats):
231 | counts["uploaded"] += stats["counts"].get("uploaded")
232 | counts["not_uploaded"] += stats["counts"].get("not_uploaded")
233 | total_size.append(stats["total_size_uploaded"])
234 | total_time.append(stats["total_time"])
235 | if not config["no_progress_bar"]:
236 | done = counts["uploaded"] + counts["not_uploaded"]
237 | percentage = 100 * done / total_todo
238 | max_bar_width = shutil.get_terminal_size((80, 20)).columns
239 | bar_width = int(max_bar_width * done / total_todo)
240 | print(
241 | f"{done:,} of {total_todo:,}".ljust(20)
242 | + f"[{'▋' * bar_width:<{max_bar_width}}] "
243 | f"{percentage:.1f}%\r",
244 | end="",
245 | )
246 |
247 | total_count = 0
248 | batch = []
249 |
250 | if config["no_progress_bar"]:
251 | log = info
252 | else:
253 |
254 | current_log_file_name = "upload.log"
255 | info(f"Logging progress into {current_log_file_name}")
256 |
257 | def log(line):
258 | with open(current_log_file_name, "a") as f:
259 | f.write(f"{line}\n")
260 |
261 | T0 = time.time()
262 | for fp in pwalk(directory):
263 | if is_junk_file(fp):
264 | ignored += 1
265 | continue
266 | if fp.name.startswith("_"):
267 | ignored += 1
268 | continue
269 | # This assumes that it can saved in S3 as a key that is the filename.
270 | key_path = fp.relative_to(directory)
271 | # if key_path.name == "index.redirect":
272 | # # Call these index.html when they go into S3
273 | # key_path = key_path.parent / "index.html"
274 | key = f"{config['name']}/{key_path}"
275 |
276 | size = fp.stat().st_size
277 | # with open(fp, "rb") as f:
278 | # file_hash = hashlib.md5(f.read()).hexdigest()
279 | task = UploadTask(key, fp, size, None, False)
280 | if key not in uploaded_already or uploaded_already[key]["Size"] != size:
281 | # No doubt! We definitely didn't have this before or it's definitely
282 | # different.
283 | batch.append(task)
284 |
285 | else:
286 | # At this point, the key exists and the size hasn't changed.
287 | # However, for some files, that's not conclusive.
288 | # Image, a 'index.html' file might have this as its diff:
289 | #
290 | # -
291 | # +
292 | #
293 | # ...which means it definitely has changed but the file size is
294 | # exactly the same as before.
295 | # If this is the case, we're going to *maybe* upload it.
296 | # However, for files that are already digest hashed, we don't need
297 | # to bother checking.
298 | if _has_hashed_filename(key):
299 | # skipped.append(task)
300 | skipped += 1
301 | continue
302 | else:
303 | task.needs_hash_check = True
304 | batch.append(task)
305 |
306 | if len(batch) >= 1000:
307 | # Fire off these
308 | update_uploaded_stats(
309 | _start_uploads(
310 | s3,
311 | config,
312 | batch,
313 | transfer_config,
314 | log=log,
315 | dry_run=config["dry_run"],
316 | )
317 | )
318 | total_count += len(batch)
319 | batch = []
320 |
321 | if batch:
322 | update_uploaded_stats(
323 | _start_uploads(
324 | s3, config, batch, transfer_config, log=log, dry_run=config["dry_run"]
325 | )
326 | )
327 | total_count += len(batch)
328 |
329 | T1 = time.time()
330 | success(
331 | f"{counts['uploaded']:,} files uploaded, "
332 | f"{counts['not_uploaded']:,} files didn't need to be uploaded."
333 | )
334 | info(f"Total thread-pool time: {fmt_seconds(sum(total_time))}")
335 | success(f"Uploaded {fmt_size(sum(total_size))}.")
336 | if config["dry_run"]:
337 | warning("Remember! In dry-run mode")
338 | success(f"Done in {fmt_seconds(T1 - T0)}.")
339 |
340 |
341 | def _start_uploads(s3, config, batch, transfer_config, log=info, dry_run=False):
342 | T0 = time.time()
343 | futures = {}
344 | total_threadpool_time = []
345 | counts = {"uploaded": 0, "not_uploaded": 0}
346 | total_size_uploaded = 0
347 | with concurrent.futures.ThreadPoolExecutor(
348 | max_workers=MAX_WORKERS_PARALLEL_UPLOADS
349 | ) as executor:
350 | bucket_name = config["bucket"]
351 | for task in batch:
352 | futures[
353 | executor.submit(
354 | _upload_file_maybe,
355 | s3,
356 | task,
357 | bucket_name,
358 | transfer_config,
359 | log=log,
360 | dry_run=dry_run,
361 | )
362 | ] = task
363 |
364 | for future in concurrent.futures.as_completed(futures):
365 | was_uploaded, took = future.result()
366 | task = futures[future]
367 | total_threadpool_time.append(took)
368 | if was_uploaded:
369 | counts["uploaded"] += 1
370 | print(f"Adding {task.size} to total_size_uploaded")
371 | total_size_uploaded += task.size
372 | else:
373 | counts["not_uploaded"] += 1
374 |
375 | T1 = time.time()
376 |
377 | return {
378 | "counts": counts,
379 | "took": T1 - T0,
380 | "total_time": sum(total_threadpool_time),
381 | "total_size_uploaded": total_size_uploaded,
382 | }
383 |
384 |
385 | def pwalk(start):
386 | for entry in os.scandir(start):
387 | if entry.is_dir():
388 | for p in pwalk(entry):
389 | yield p
390 | else:
391 | yield Path(entry)
392 |
393 |
394 | def _upload_file_maybe(s3, task, bucket_name, transfer_config, log=info, dry_run=False):
395 | t0 = time.time()
396 | if not task.file_hash:
397 | task.set_file_hash()
398 | if task.needs_hash_check:
399 | try:
400 | object_data = s3.head_object(Bucket=bucket_name, Key=task.key)
401 | if object_data["Metadata"].get("filehash") == task.file_hash:
402 | # We can bail early!
403 | t1 = time.time()
404 | start = f"{fmt_size(task.size):} in {fmt_seconds(t1 - t0)}"
405 | log(f"Skipped {start:>19} {task.key}")
406 | return False, t1 - t0
407 | except ClientError as error:
408 | # If a client error is thrown, then check that it was a 404 error.
409 | # If it was a 404 error, then the key does not exist.
410 | if error.response["Error"]["Code"] != "404":
411 | raise
412 |
413 | # If it really was a 404, it means that the method that gathered
414 | # the existing list is out of sync.
415 |
416 | mime_type = mimetypes.guess_type(str(task.file_path))[0] or "binary/octet-stream"
417 |
418 | if os.path.basename(task.file_path) == "service-worker.js":
419 | cache_control = "no-cache"
420 | else:
421 | cache_control_seconds = DEFAULT_CACHE_CONTROL
422 | if _has_hashed_filename(task.file_path):
423 | cache_control_seconds = HASHED_CACHE_CONTROL
424 | cache_control = f"max-age={cache_control_seconds}, public"
425 |
426 | ExtraArgs = {
427 | "ACL": "public-read",
428 | "ContentType": mime_type,
429 | "CacheControl": cache_control,
430 | "Metadata": {"filehash": task.file_hash},
431 | }
432 | # if task.file_path.name == "index.redirect":
433 | # with open(task.file_path) as f:
434 | # redirect_url = f.read().strip()
435 | # ExtraArgs["WebsiteRedirectLocation"] = redirect_url
436 | if not dry_run:
437 | s3.upload_file(
438 | str(task.file_path),
439 | bucket_name,
440 | task.key,
441 | ExtraArgs=ExtraArgs,
442 | Config=transfer_config,
443 | )
444 | t1 = time.time()
445 |
446 | start = f"{fmt_size(task.size)} in {fmt_seconds(t1 - t0)}"
447 | log(f"{'Updated' if task.needs_hash_check else 'Uploaded'} {start:>20} {task.key}")
448 | return True, t1 - t0
449 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Mozilla Public License Version 2.0
2 | ==================================
3 |
4 | 1. Definitions
5 | --------------
6 |
7 | 1.1. "Contributor"
8 | means each individual or legal entity that creates, contributes to
9 | the creation of, or owns Covered Software.
10 |
11 | 1.2. "Contributor Version"
12 | means the combination of the Contributions of others (if any) used
13 | by a Contributor and that particular Contributor's Contribution.
14 |
15 | 1.3. "Contribution"
16 | means Covered Software of a particular Contributor.
17 |
18 | 1.4. "Covered Software"
19 | means Source Code Form to which the initial Contributor has attached
20 | the notice in Exhibit A, the Executable Form of such Source Code
21 | Form, and Modifications of such Source Code Form, in each case
22 | including portions thereof.
23 |
24 | 1.5. "Incompatible With Secondary Licenses"
25 | means
26 |
27 | (a) that the initial Contributor has attached the notice described
28 | in Exhibit B to the Covered Software; or
29 |
30 | (b) that the Covered Software was made available under the terms of
31 | version 1.1 or earlier of the License, but not also under the
32 | terms of a Secondary License.
33 |
34 | 1.6. "Executable Form"
35 | means any form of the work other than Source Code Form.
36 |
37 | 1.7. "Larger Work"
38 | means a work that combines Covered Software with other material, in
39 | a separate file or files, that is not Covered Software.
40 |
41 | 1.8. "License"
42 | means this document.
43 |
44 | 1.9. "Licensable"
45 | means having the right to grant, to the maximum extent possible,
46 | whether at the time of the initial grant or subsequently, any and
47 | all of the rights conveyed by this License.
48 |
49 | 1.10. "Modifications"
50 | means any of the following:
51 |
52 | (a) any file in Source Code Form that results from an addition to,
53 | deletion from, or modification of the contents of Covered
54 | Software; or
55 |
56 | (b) any new file in Source Code Form that contains any Covered
57 | Software.
58 |
59 | 1.11. "Patent Claims" of a Contributor
60 | means any patent claim(s), including without limitation, method,
61 | process, and apparatus claims, in any patent Licensable by such
62 | Contributor that would be infringed, but for the grant of the
63 | License, by the making, using, selling, offering for sale, having
64 | made, import, or transfer of either its Contributions or its
65 | Contributor Version.
66 |
67 | 1.12. "Secondary License"
68 | means either the GNU General Public License, Version 2.0, the GNU
69 | Lesser General Public License, Version 2.1, the GNU Affero General
70 | Public License, Version 3.0, or any later versions of those
71 | licenses.
72 |
73 | 1.13. "Source Code Form"
74 | means the form of the work preferred for making modifications.
75 |
76 | 1.14. "You" (or "Your")
77 | means an individual or a legal entity exercising rights under this
78 | License. For legal entities, "You" includes any entity that
79 | controls, is controlled by, or is under common control with You. For
80 | purposes of this definition, "control" means (a) the power, direct
81 | or indirect, to cause the direction or management of such entity,
82 | whether by contract or otherwise, or (b) ownership of more than
83 | fifty percent (50%) of the outstanding shares or beneficial
84 | ownership of such entity.
85 |
86 | 2. License Grants and Conditions
87 | --------------------------------
88 |
89 | 2.1. Grants
90 |
91 | Each Contributor hereby grants You a world-wide, royalty-free,
92 | non-exclusive license:
93 |
94 | (a) under intellectual property rights (other than patent or trademark)
95 | Licensable by such Contributor to use, reproduce, make available,
96 | modify, display, perform, distribute, and otherwise exploit its
97 | Contributions, either on an unmodified basis, with Modifications, or
98 | as part of a Larger Work; and
99 |
100 | (b) under Patent Claims of such Contributor to make, use, sell, offer
101 | for sale, have made, import, and otherwise transfer either its
102 | Contributions or its Contributor Version.
103 |
104 | 2.2. Effective Date
105 |
106 | The licenses granted in Section 2.1 with respect to any Contribution
107 | become effective for each Contribution on the date the Contributor first
108 | distributes such Contribution.
109 |
110 | 2.3. Limitations on Grant Scope
111 |
112 | The licenses granted in this Section 2 are the only rights granted under
113 | this License. No additional rights or licenses will be implied from the
114 | distribution or licensing of Covered Software under this License.
115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a
116 | Contributor:
117 |
118 | (a) for any code that a Contributor has removed from Covered Software;
119 | or
120 |
121 | (b) for infringements caused by: (i) Your and any other third party's
122 | modifications of Covered Software, or (ii) the combination of its
123 | Contributions with other software (except as part of its Contributor
124 | Version); or
125 |
126 | (c) under Patent Claims infringed by Covered Software in the absence of
127 | its Contributions.
128 |
129 | This License does not grant any rights in the trademarks, service marks,
130 | or logos of any Contributor (except as may be necessary to comply with
131 | the notice requirements in Section 3.4).
132 |
133 | 2.4. Subsequent Licenses
134 |
135 | No Contributor makes additional grants as a result of Your choice to
136 | distribute the Covered Software under a subsequent version of this
137 | License (see Section 10.2) or under the terms of a Secondary License (if
138 | permitted under the terms of Section 3.3).
139 |
140 | 2.5. Representation
141 |
142 | Each Contributor represents that the Contributor believes its
143 | Contributions are its original creation(s) or it has sufficient rights
144 | to grant the rights to its Contributions conveyed by this License.
145 |
146 | 2.6. Fair Use
147 |
148 | This License is not intended to limit any rights You have under
149 | applicable copyright doctrines of fair use, fair dealing, or other
150 | equivalents.
151 |
152 | 2.7. Conditions
153 |
154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
155 | in Section 2.1.
156 |
157 | 3. Responsibilities
158 | -------------------
159 |
160 | 3.1. Distribution of Source Form
161 |
162 | All distribution of Covered Software in Source Code Form, including any
163 | Modifications that You create or to which You contribute, must be under
164 | the terms of this License. You must inform recipients that the Source
165 | Code Form of the Covered Software is governed by the terms of this
166 | License, and how they can obtain a copy of this License. You may not
167 | attempt to alter or restrict the recipients' rights in the Source Code
168 | Form.
169 |
170 | 3.2. Distribution of Executable Form
171 |
172 | If You distribute Covered Software in Executable Form then:
173 |
174 | (a) such Covered Software must also be made available in Source Code
175 | Form, as described in Section 3.1, and You must inform recipients of
176 | the Executable Form how they can obtain a copy of such Source Code
177 | Form by reasonable means in a timely manner, at a charge no more
178 | than the cost of distribution to the recipient; and
179 |
180 | (b) You may distribute such Executable Form under the terms of this
181 | License, or sublicense it under different terms, provided that the
182 | license for the Executable Form does not attempt to limit or alter
183 | the recipients' rights in the Source Code Form under this License.
184 |
185 | 3.3. Distribution of a Larger Work
186 |
187 | You may create and distribute a Larger Work under terms of Your choice,
188 | provided that You also comply with the requirements of this License for
189 | the Covered Software. If the Larger Work is a combination of Covered
190 | Software with a work governed by one or more Secondary Licenses, and the
191 | Covered Software is not Incompatible With Secondary Licenses, this
192 | License permits You to additionally distribute such Covered Software
193 | under the terms of such Secondary License(s), so that the recipient of
194 | the Larger Work may, at their option, further distribute the Covered
195 | Software under the terms of either this License or such Secondary
196 | License(s).
197 |
198 | 3.4. Notices
199 |
200 | You may not remove or alter the substance of any license notices
201 | (including copyright notices, patent notices, disclaimers of warranty,
202 | or limitations of liability) contained within the Source Code Form of
203 | the Covered Software, except that You may alter any license notices to
204 | the extent required to remedy known factual inaccuracies.
205 |
206 | 3.5. Application of Additional Terms
207 |
208 | You may choose to offer, and to charge a fee for, warranty, support,
209 | indemnity or liability obligations to one or more recipients of Covered
210 | Software. However, You may do so only on Your own behalf, and not on
211 | behalf of any Contributor. You must make it absolutely clear that any
212 | such warranty, support, indemnity, or liability obligation is offered by
213 | You alone, and You hereby agree to indemnify every Contributor for any
214 | liability incurred by such Contributor as a result of warranty, support,
215 | indemnity or liability terms You offer. You may include additional
216 | disclaimers of warranty and limitations of liability specific to any
217 | jurisdiction.
218 |
219 | 4. Inability to Comply Due to Statute or Regulation
220 | ---------------------------------------------------
221 |
222 | If it is impossible for You to comply with any of the terms of this
223 | License with respect to some or all of the Covered Software due to
224 | statute, judicial order, or regulation then You must: (a) comply with
225 | the terms of this License to the maximum extent possible; and (b)
226 | describe the limitations and the code they affect. Such description must
227 | be placed in a text file included with all distributions of the Covered
228 | Software under this License. Except to the extent prohibited by statute
229 | or regulation, such description must be sufficiently detailed for a
230 | recipient of ordinary skill to be able to understand it.
231 |
232 | 5. Termination
233 | --------------
234 |
235 | 5.1. The rights granted under this License will terminate automatically
236 | if You fail to comply with any of its terms. However, if You become
237 | compliant, then the rights granted under this License from a particular
238 | Contributor are reinstated (a) provisionally, unless and until such
239 | Contributor explicitly and finally terminates Your grants, and (b) on an
240 | ongoing basis, if such Contributor fails to notify You of the
241 | non-compliance by some reasonable means prior to 60 days after You have
242 | come back into compliance. Moreover, Your grants from a particular
243 | Contributor are reinstated on an ongoing basis if such Contributor
244 | notifies You of the non-compliance by some reasonable means, this is the
245 | first time You have received notice of non-compliance with this License
246 | from such Contributor, and You become compliant prior to 30 days after
247 | Your receipt of the notice.
248 |
249 | 5.2. If You initiate litigation against any entity by asserting a patent
250 | infringement claim (excluding declaratory judgment actions,
251 | counter-claims, and cross-claims) alleging that a Contributor Version
252 | directly or indirectly infringes any patent, then the rights granted to
253 | You by any and all Contributors for the Covered Software under Section
254 | 2.1 of this License shall terminate.
255 |
256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all
257 | end user license agreements (excluding distributors and resellers) which
258 | have been validly granted by You or Your distributors under this License
259 | prior to termination shall survive termination.
260 |
261 | ************************************************************************
262 | * *
263 | * 6. Disclaimer of Warranty *
264 | * ------------------------- *
265 | * *
266 | * Covered Software is provided under this License on an "as is" *
267 | * basis, without warranty of any kind, either expressed, implied, or *
268 | * statutory, including, without limitation, warranties that the *
269 | * Covered Software is free of defects, merchantable, fit for a *
270 | * particular purpose or non-infringing. The entire risk as to the *
271 | * quality and performance of the Covered Software is with You. *
272 | * Should any Covered Software prove defective in any respect, You *
273 | * (not any Contributor) assume the cost of any necessary servicing, *
274 | * repair, or correction. This disclaimer of warranty constitutes an *
275 | * essential part of this License. No use of any Covered Software is *
276 | * authorized under this License except under this disclaimer. *
277 | * *
278 | ************************************************************************
279 |
280 | ************************************************************************
281 | * *
282 | * 7. Limitation of Liability *
283 | * -------------------------- *
284 | * *
285 | * Under no circumstances and under no legal theory, whether tort *
286 | * (including negligence), contract, or otherwise, shall any *
287 | * Contributor, or anyone who distributes Covered Software as *
288 | * permitted above, be liable to You for any direct, indirect, *
289 | * special, incidental, or consequential damages of any character *
290 | * including, without limitation, damages for lost profits, loss of *
291 | * goodwill, work stoppage, computer failure or malfunction, or any *
292 | * and all other commercial damages or losses, even if such party *
293 | * shall have been informed of the possibility of such damages. This *
294 | * limitation of liability shall not apply to liability for death or *
295 | * personal injury resulting from such party's negligence to the *
296 | * extent applicable law prohibits such limitation. Some *
297 | * jurisdictions do not allow the exclusion or limitation of *
298 | * incidental or consequential damages, so this exclusion and *
299 | * limitation may not apply to You. *
300 | * *
301 | ************************************************************************
302 |
303 | 8. Litigation
304 | -------------
305 |
306 | Any litigation relating to this License may be brought only in the
307 | courts of a jurisdiction where the defendant maintains its principal
308 | place of business and such litigation shall be governed by laws of that
309 | jurisdiction, without reference to its conflict-of-law provisions.
310 | Nothing in this Section shall prevent a party's ability to bring
311 | cross-claims or counter-claims.
312 |
313 | 9. Miscellaneous
314 | ----------------
315 |
316 | This License represents the complete agreement concerning the subject
317 | matter hereof. If any provision of this License is held to be
318 | unenforceable, such provision shall be reformed only to the extent
319 | necessary to make it enforceable. Any law or regulation which provides
320 | that the language of a contract shall be construed against the drafter
321 | shall not be used to construe this License against a Contributor.
322 |
323 | 10. Versions of the License
324 | ---------------------------
325 |
326 | 10.1. New Versions
327 |
328 | Mozilla Foundation is the license steward. Except as provided in Section
329 | 10.3, no one other than the license steward has the right to modify or
330 | publish new versions of this License. Each version will be given a
331 | distinguishing version number.
332 |
333 | 10.2. Effect of New Versions
334 |
335 | You may distribute the Covered Software under the terms of the version
336 | of the License under which You originally received the Covered Software,
337 | or under the terms of any subsequent version published by the license
338 | steward.
339 |
340 | 10.3. Modified Versions
341 |
342 | If you create software not governed by this License, and you want to
343 | create a new license for such software, you may create and use a
344 | modified version of this License if you rename the license and remove
345 | any references to the name of the license steward (except to note that
346 | such modified license differs from this License).
347 |
348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary
349 | Licenses
350 |
351 | If You choose to distribute Source Code Form that is Incompatible With
352 | Secondary Licenses under the terms of this version of the License, the
353 | notice described in Exhibit B of this License must be attached.
354 |
355 | Exhibit A - Source Code Form License Notice
356 | -------------------------------------------
357 |
358 | This Source Code Form is subject to the terms of the Mozilla Public
359 | License, v. 2.0. If a copy of the MPL was not distributed with this
360 | file, You can obtain one at http://mozilla.org/MPL/2.0/.
361 |
362 | If it is not possible or desirable to put the notice in a particular
363 | file, then You may include the notice in a location (such as a LICENSE
364 | file in a relevant directory) where a recipient would be likely to look
365 | for such a notice.
366 |
367 | You may add additional accurate notices of copyright ownership.
368 |
369 | Exhibit B - "Incompatible With Secondary Licenses" Notice
370 | ---------------------------------------------------------
371 |
372 | This Source Code Form is "Incompatible With Secondary Licenses", as
373 | defined by the Mozilla Public License, v. 2.0.
374 |
--------------------------------------------------------------------------------