├── anaconda_verify
├── __init__.py
├── const.py
├── utils.py
├── main.py
├── common.py
├── recipe.py
└── package.py
├── TODO.txt
├── .gitignore
├── setup.py
├── FAQ.md
├── LICENSE.txt
├── CHANGELOG.txt
└── README.md
/anaconda_verify/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '1.3.8'
2 |
--------------------------------------------------------------------------------
/TODO.txt:
--------------------------------------------------------------------------------
1 | TODO:
2 | * add file permission checking
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.pyc
3 | *.egg-info
4 | .cache/
5 | build/
6 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # (c) 2016-2017 Continuum Analytics, Inc. / http://continuum.io
2 | # All Rights Reserved
3 | import re
4 | from os.path import join
5 |
6 | from distutils.core import setup
7 |
8 |
9 | # read version from anaconda_verify/__init__.py
10 | pat = re.compile(r'__version__\s*=\s*(\S+)', re.M)
11 | data = open(join('anaconda_verify', '__init__.py')).read()
12 | version = eval(pat.search(data).group(1))
13 |
14 | setup(
15 | name = "anaconda-verify",
16 | version = version,
17 | author = "Ilan Schnell",
18 | author_email = "ilan@continuum.io",
19 | url = "https://github.com/ContinuumIO/anaconda-verify",
20 | license = "BSD",
21 | description = "tool for validating conda recipes and conda packages",
22 | long_description = open('README.md').read(),
23 | packages = ['anaconda_verify'],
24 | )
25 |
--------------------------------------------------------------------------------
/FAQ.md:
--------------------------------------------------------------------------------
1 | I get "found namespace .pth file". What should I do?
2 | -----------------------------------------------------
3 |
4 | The essential problem with Python namespace packages is that they require
5 | overlapping `__init__.py` files, which are shared by different projects.
6 |
7 | The most straightforward way to handle namespace packages in conda is
8 | to create a package which defines the namespace , which only contains a
9 | single empty `__init__.py` file, and then make the other packages depend
10 | on this.
11 | This is how we handle namespace packages in the Anaconda distribution.
12 | For example, the
13 |
14 | backports recipe defines the `backports` namespace, and then other
15 | packages, such as
16 |
17 | configparser depend on `backports`.
18 |
19 | There are other ways to get around this problem, e.g. by preserving the egg
20 | directory which setuptools creates, but that is not as simple and
21 | clean (from a packaging perspective), and brings along other challenges.
22 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2016, Continuum Analytics, Inc.
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 | * Redistributions of source code must retain the above copyright
7 | notice, this list of conditions and the following disclaimer.
8 | * Redistributions in binary form must reproduce the above copyright
9 | notice, this list of conditions and the following disclaimer in the
10 | documentation and/or other materials provided with the distribution.
11 | * Neither the name of Continuum Analytics, Inc. nor the
12 | names of its contributors may be used to endorse or promote products
13 | derived from this software without specific prior written permission.
14 |
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL CONTINUUM ANALYTICS BE LIABLE FOR ANY
19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |
--------------------------------------------------------------------------------
/anaconda_verify/const.py:
--------------------------------------------------------------------------------
1 | LICENSE_FAMILIES = set("""
2 | AGPL
3 | GPL2
4 | GPL3
5 | LGPL
6 | BSD
7 | MIT
8 | Apache
9 | PSF
10 | Public-Domain
11 | Proprietary
12 | Other
13 | """.split())
14 |
15 | FIELDS = {
16 | 'package': {'name', 'version'},
17 | 'source': {'fn', 'url', 'md5', 'sha1', 'sha256',
18 | 'git_url', 'git_tag', 'git_branch',
19 | 'patches', 'hg_url', 'hg_tag'},
20 | 'build': {'features', 'track_features',
21 | 'number', 'entry_points', 'osx_is_app', 'noarch',
22 | 'preserve_egg_dir', 'win_has_prefix', 'no_link',
23 | 'ignore_prefix_files', 'msvc_compiler', 'skip_compile_pyc',
24 | 'detect_binary_files_with_prefix', 'script',
25 | 'always_include_files', 'binary_relocation',
26 | 'binary_has_prefix_files'},
27 | 'requirements': {'build', 'run'},
28 | 'app': {'entry', 'icon', 'summary', 'type', 'cli_opts'},
29 | 'test': {'requires', 'commands', 'files', 'source_files', 'imports'},
30 | 'about': {'license', 'license_url', 'license_family', 'license_file',
31 | 'summary', 'description', 'home', 'doc_url', 'doc_source_url',
32 | 'dev_url'},
33 | 'extra': {'recipe-maintainers'},
34 | }
35 |
36 | MAGIC_HEADERS = {
37 | '\xca\xfe\xba\xbe': 'MachO-universal',
38 | '\xce\xfa\xed\xfe': 'MachO-i386',
39 | '\xcf\xfa\xed\xfe': 'MachO-x86_64',
40 | '\xfe\xed\xfa\xce': 'MachO-ppc',
41 | '\xfe\xed\xfa\xcf': 'MachO-ppc64',
42 | 'MZ\x90\x00': 'DLL',
43 | '\x7fELF': 'ELF',
44 | }
45 |
46 | DLL_TYPES = {
47 | 0x0: 'UNKNOWN', 0x1d3: 'AM33', 0x8664: 'AMD64', 0x1c0: 'ARM',
48 | 0xebc: 'EBC', 0x14c: 'I386', 0x200: 'IA64', 0x9041: 'M32R',
49 | 0x266: 'MIPS16', 0x366: 'MIPSFPU', 0x466: 'MIPSFPU16', 0x1f0: 'POWERPC',
50 | 0x1f1: 'POWERPCFP', 0x166: 'R4000', 0x1a2: 'SH3', 0x1a3: 'SH3DSP',
51 | 0x1a6: 'SH4', 0x1a8: 'SH5', 0x1c2: 'THUMB', 0x169: 'WCEMIPSV2',
52 | }
53 |
--------------------------------------------------------------------------------
/anaconda_verify/utils.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import collections
3 |
4 | from anaconda_verify.const import MAGIC_HEADERS, DLL_TYPES
5 |
6 |
7 |
8 | def get_object_type(data):
9 | head = data[:4]
10 | if head not in MAGIC_HEADERS:
11 | return None
12 | lookup = MAGIC_HEADERS.get(head)
13 | if lookup == 'DLL':
14 | pos = data.find('PE\0\0')
15 | if pos < 0:
16 | return ""
17 | i = ord(data[pos + 4]) + 256 * ord(data[pos + 5])
18 | return "DLL " + DLL_TYPES.get(i)
19 | elif lookup.startswith('MachO'):
20 | return lookup
21 | elif lookup == 'ELF':
22 | return "ELF" + {'\x01': '32', '\x02': '64'}.get(data[4])
23 |
24 |
25 | def get_bad_seq(s):
26 | for seq in ('--', '-.', '-_',
27 | '.-', '..', '._',
28 | '_-', '_.'): # but '__' is fine
29 | if seq in s:
30 | return seq
31 | return None
32 |
33 |
34 | def all_ascii(data, allow_CR=False):
35 | newline = [10] # LF
36 | if allow_CR:
37 | newline.append(13) # CF
38 | for c in data:
39 | n = ord(c) if sys.version_info[0] == 2 else c
40 | if not (n in newline or 32 <= n < 127):
41 | return False
42 | return True
43 |
44 |
45 | class memoized(object):
46 | """Decorator. Caches a function's return value each time it is called.
47 | If called later with the same arguments, the cached value is returned
48 | (not reevaluated).
49 | """
50 | def __init__(self, func):
51 | self.func = func
52 | self.cache = {}
53 | def __call__(self, *args):
54 | if not isinstance(args, collections.Hashable):
55 | # uncacheable. a list, for instance.
56 | # better to not cache than blow up.
57 | return self.func(*args)
58 | if args in self.cache:
59 | return self.cache[args]
60 | else:
61 | value = self.func(*args)
62 | self.cache[args] = value
63 | return value
64 |
65 |
66 | if __name__ == '__main__':
67 | print(sys.version)
68 | print(all_ascii(b'Hello\x00'), all_ascii(b"Hello World!"))
69 |
--------------------------------------------------------------------------------
/anaconda_verify/main.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division, absolute_import
2 |
3 | import sys
4 | from os.path import isfile, join
5 | from optparse import OptionParser
6 |
7 | from anaconda_verify.recipe import validate_recipe, RecipeError
8 | from anaconda_verify.package import validate_package, PackageError
9 |
10 |
11 | def main():
12 | p = OptionParser(
13 | usage="usage: %prog [options] ",
14 | description="tool for (passively) verifying conda recipes and conda "
15 | "packages for the Anaconda distribution")
16 |
17 | p.add_option('-e', "--exit",
18 | help="on error exit",
19 | action="store_true")
20 |
21 | p.add_option('-p', "--pedantic",
22 | action="store_true")
23 |
24 | p.add_option('-q', "--quiet",
25 | action="store_true")
26 |
27 | p.add_option('-V', '--version',
28 | help="display the version being used and exit",
29 | action="store_true")
30 |
31 | opts, args = p.parse_args()
32 | verbose = not opts.quiet
33 | if opts.version:
34 | from anaconda_verify import __version__
35 | print('anaconda-verify version:', __version__)
36 | return
37 |
38 | for path in args:
39 | if isfile(join(path, 'meta.yaml')):
40 | if verbose:
41 | print("==> %s <==" % path)
42 | try:
43 | validate_recipe(path, opts.pedantic)
44 | except RecipeError as e:
45 | sys.stderr.write("RecipeError: %s\n" % e)
46 | if opts.exit:
47 | sys.exit(1)
48 |
49 | elif path.endswith('.tar.bz2'):
50 | if verbose:
51 | print("==> %s <==" % path)
52 | try:
53 | validate_package(path, opts.pedantic, verbose)
54 | except PackageError as e:
55 | sys.stderr.write("PackageError: %s\n" % e)
56 | if opts.exit:
57 | sys.exit(1)
58 |
59 | else:
60 | if verbose:
61 | print("Ignoring: %s" % path)
62 |
63 |
64 | if __name__ == '__main__':
65 | main()
66 |
--------------------------------------------------------------------------------
/CHANGELOG.txt:
--------------------------------------------------------------------------------
1 | 2017-06-16 1.3.8:
2 | -------------------
3 | * allow extra/recipe-maintainers
4 |
5 |
6 | 2017-05-25 1.3.7:
7 | -------------------
8 | * allow about/doc_source_url
9 |
10 |
11 | 2017-04-17 1.3.6:
12 | -------------------
13 | * fix Py3k bug
14 |
15 |
16 | 2017-04-12 1.3.5:
17 | -------------------
18 | * disallow noarch python recipe not allowed in pedantic mode
19 | * remove warning for space in archive file path
20 |
21 |
22 | 2017-04-03 1.3.4:
23 | -------------------
24 | * add check for Menu/.json filename to correspond to conda
25 | package name
26 | * add check for build string in pedantic mode
27 |
28 |
29 | 2017-02-12 1.3.3:
30 | -------------------
31 | * error on binary placeholders Windows
32 | * disallow info/package_metadata.json (in pedantic mode)
33 |
34 |
35 | 2017-01-24 1.3.2:
36 | -------------------
37 | * add check for site-package location for Python packages
38 | * add check got non-ASCII in info/files, and tar members
39 | * allow build/binary_has_prefix_files
40 |
41 |
42 | 2016-12-22 1.3.1:
43 | -------------------
44 | * add checks for duplicate specs
45 | * allow build/allow_binary_relocation key
46 |
47 |
48 | 2016-12-10 1.3.0:
49 | -------------------
50 | * add license family check for packages
51 | * add checks for info/index.json
52 | * add checks for build and runtime requirements in recipes
53 | * allow build/skip_compile_pyc
54 |
55 |
56 | 2016-09-29 1.2.1:
57 | -------------------
58 | * fix description in setup.py
59 | * allow build/script
60 | * allow new noarch key
61 |
62 |
63 | 2016-07-17 1.2.0:
64 | -------------------
65 | * added --pedantic (-p) option to make anaconda-verify more useful for the
66 | wider community
67 | * add check for hard links in packages
68 | * check for commented selectors (--pedantic only)
69 | * improve name and version checking
70 |
71 |
72 | 2016-07-13 1.1.0:
73 | -------------------
74 | * add recipe size limit and disallow files with certain extensions
75 | * add more checking for info/has_prefix in conda packages, in particular
76 | ensure that the binary placeholder length is 255 bytes on Unix, as
77 | conda-build 2.0 will start using this value. We already use conda-build
78 | master for building packages on repo.continuum.io, and want to enforce
79 | 255 bytes for all new packages, in order to make the transition to
80 | conda-build easier for the community.
81 | * add check for header in build.sh
82 | * add checking for non-ASCII in info/index.json and info/has_prefix
83 |
84 |
85 | 2016-06-30 1.0.0:
86 | -------------------
87 | * initial release
88 |
--------------------------------------------------------------------------------
/anaconda_verify/common.py:
--------------------------------------------------------------------------------
1 | import re
2 | from collections import defaultdict
3 |
4 | from anaconda_verify.utils import get_bad_seq
5 |
6 |
7 | name_pat = re.compile(r'[a-z0-9_][a-z0-9_\-\.]*$')
8 | def check_name(name):
9 | if not name:
10 | return "package name missing"
11 | name = str(name)
12 | if not name_pat.match(name) or name.endswith(('.', '-', '_')):
13 | return "invalid package name '%s'" % name
14 | seq = get_bad_seq(name)
15 | if seq:
16 | return "'%s' is not allowed in package name: '%s'" % (seq, name)
17 | return None
18 |
19 |
20 | version_pat = re.compile(r'[\w\.]+$')
21 | def check_version(ver):
22 | if not ver:
23 | return "package version missing"
24 | ver = str(ver)
25 | if not version_pat.match(ver):
26 | return "invalid version '%s'" % ver
27 | if ver.startswith(('_', '.')) or ver.endswith(('_', '.')):
28 | return "version cannot start or end with '_' or '.': %s" % ver
29 | seq = get_bad_seq(ver)
30 | if seq:
31 | return "'%s' not allowed in version '%s'" % (seq, ver)
32 | return None
33 |
34 |
35 | def check_build_string(build):
36 | build = str(build)
37 | if not version_pat.match(build):
38 | return "invalid build string '%s'" % build
39 | return None
40 |
41 |
42 | ver_spec_pat = re.compile(r'[\w\.,=!<>\*]+$')
43 | def check_spec(spec):
44 | if not spec:
45 | return "spec missing"
46 | spec = str(spec)
47 | parts = spec.split()
48 | nparts = len(parts)
49 | if nparts == 0:
50 | return "empty spec '%s'" % spec
51 | if not name_pat.match(parts[0]):
52 | return "invalid name spec '%s'" % spec
53 | if nparts >= 2 and not ver_spec_pat.match(parts[1]):
54 | return "invalid version spec '%s'" % spec
55 | if nparts == 3 and not version_pat.match(parts[1]):
56 | return "invalid (pure) version spec '%s'" % spec
57 | if len(parts) > 3:
58 | return "invalid spec (too many parts) '%s'" % spec
59 | return None
60 |
61 |
62 | def check_specs(specs):
63 | name_specs = defaultdict(list)
64 | for spec in specs:
65 | res = check_spec(spec)
66 | if res:
67 | return res
68 | name_specs[spec.split()[0]].append(spec)
69 | for name in name_specs:
70 | specs = name_specs[name]
71 | if len(specs) > 1:
72 | return "duplicate specs: %s" % specs
73 | return None
74 |
75 |
76 | def check_build_number(bn):
77 | if not (isinstance(bn, int) and bn >= 0):
78 | return "build number '%s' (not a positive interger)" % bn
79 |
80 |
81 | def get_python_version_specs(specs):
82 | """
83 | Return the Python version (as a string "x.y") from a given list of specs.
84 | If Python is not a dependency, or if the version does not start with x.y,
85 | None is returned
86 | """
87 | pat = re.compile(r'(\d\.\d)')
88 | for spec in specs:
89 | spec = str(spec)
90 | parts = spec.split()
91 | nparts = len(parts)
92 | if nparts < 2:
93 | continue
94 | name, version = parts[:2]
95 | if name != 'python':
96 | continue
97 | m = pat.match(version)
98 | if m:
99 | return m.group(1)
100 | return None
101 |
102 |
103 | if __name__ == '__main__':
104 | import sys
105 | print(check_spec('numpy 1.2'))
106 | print(check_build_number(3))
107 | #print(get_python_version_specs(sys.argv[1:]))
108 | print(sys.argv[1])
109 | print(check_build_string(sys.argv[1]))
110 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | anaconda-verify
2 | ===============
3 |
4 | This project is now somewhat deprecated, use
5 | conda-verify
6 | instead.
7 |
8 | anaconda-verify is a tool for (passively) verifying conda recipes and
9 | conda packages.
10 |
11 | All
12 | Anaconda recipes, as well as
13 | the Anaconda packages
14 | need to pass this tool before they are made publically available.
15 |
16 | Using anaconda-verify:
17 |
18 | $ conda install anaconda-verify
19 | $ anaconda-verify -h
20 | $ anaconda-verify
21 |
22 |
23 | The purpose of this verification process is to ensure that recipes don't
24 | contain obvious bugs, and that the conda packages we distribute to millions
25 | of users meet our high quality standards.
26 |
27 | Historically, the conda packages which represent the Anaconda distribution
28 | were not created using `conda-build`, but an internal build system.
29 | In fact, `conda-build` started as a public fork of this internal system
30 | 3 years ago. At that point the Anaconda distribution had already been
31 | around for almost a year, and the only way to create conda packages
32 | was by using the internal system.
33 | While `conda-build` has made a lot of progress, the internal system basically
34 | stayed unchanged, because the needs on a system for building a distribution
35 | are quite different, and not driven by the community using `conda-build`
36 | for continuous integration and other language support (e.g. Perl, Lua), etc. .
37 | On the other hand, the internal system has been developed to support
38 | Anaconda distribution specific needs, such as MKL featured packages,
39 | source and license reference meta-data, and interoperability between
40 | collections of packages.
41 |
42 | In an effort to bridge the gap between our internal system and `conda-build`,
43 | we started using `conda-build` to create conda packages for the Anaconda
44 | distribution itself about one year ago.
45 | By now, more than 85% of the conda packages in the Anaconda distribution
46 | are created using `conda-build`.
47 | However, because the different requirements mentioned above, we only allow
48 | certain features that `conda-build` offers.
49 | This also helps to keep
50 | the Anaconda
51 | recipes simple and maintainable, and functional with the rest of the
52 | internal system which reads meta-data from the recipes.
53 | This is why we require conda recipes to be valid according to this tool.
54 |
55 |
56 | Packages
57 | --------
58 |
59 | Another aspect of `anaconda-verify` is the ability to verify conda packages.
60 | These are the most important checks `anaconda-verify` performs on conda
61 | packages, and more importantly we explain why these checks are necessary
62 | or useful.
63 |
64 | * Ensure the content of `info/files` corresponds to the actual archived
65 | files in the tarball (except the ones in `info/`, obviously). This
66 | is important, because the files listed in `info/files` determine which
67 | files are linked into the conda environment. Any mismatch here would
68 | indicate either (i) the tarball contains files which are not getting
69 | linked anywhere or (ii) files which do no exist are attempted to get
70 | linked (which would result in an error).
71 |
72 | * Check for now allowed archives in the tarball. A conda package should
73 | not contain files in the following directories `conda-meta/`,
74 | `conda-bld/`, `pkgs/`, `pkgs32/` and `envs/`, because this would (for
75 | example) allow a conda package to modify another existing environment.
76 |
77 | * Make sure the `name`, `version` and `build` values exist in
78 | `info/index.json` and that they correspond to the actual filename.
79 |
80 | * Ensure there are no files with both `.bat` and `.exe` extension. For
81 | example, if you had `Scripts/foo.bat` and `Scripts/foo.exe` one would
82 | shadow the other, and this would become confusing which one is actually
83 | executed when the user types `foo`. Although this check is always done,
84 | it is only relevant on Windows.
85 |
86 | * Ensure no `easy-install.pth` file exists. These files would cause
87 | problems as they would overlap (two or more conda packages would
88 | contain a `easy-install.pth` file, which overwrite each other when
89 | installing the package).
90 |
91 | * Ensure no "easy install scripts" exists. These are entry point scripts
92 | which setuptools creates which are extremely brittle, and should by
93 | replaced (overwritten) by the simple entry points scripts `conda-build`
94 | offers (use `build/entry_points` in your `meta.yaml`).
95 |
96 | * Ensure there are no `.pyd` or `.so` files have a `.py` file next to it.
97 | This is just confusing, as it is not obvious which one the Python
98 | interpreter will import. Under certain circumstances setuptools creates
99 | `.py` next to shared object files for obscure reasons.
100 |
101 | * For packages (other than `python`), ensure that `.pyc` are not in
102 | Python's standard library directory. This would happen when a `.pyc` file
103 | is missing from the standard library, and then created during the
104 | build process of another package.
105 |
106 | * Check for missing `.pyc` files. Missing `.pyc` files cause two types of
107 | problems: (i) When building new packages, they might get included in
108 | the new package. For example, when building scipy and numpy is missing
109 | `.pyc` files, then these (numpy `.pyc` files) get included in the scipy
110 | package (ii) There was a (buggy) Python release which would crash when
111 | `.pyc` files could not written (due to file permissions).
112 |
113 | * Ensure Windows conda packages only contain object files which have the
114 | correct architecture. There was a bug in `conda-build` which would
115 | create `64-bit` entry point executables when building `32-bit` packages
116 | on a `64-bit` system.
117 |
118 | * Ensure that `site-packages` does not contain certain directories when
119 | building packages. For example, when you build `pandas` you don't
120 | want a `numpy`, `scipy` or `setuptools` directory to be contained in
121 | the `pandas` package. This would happen when the `pandas` build
122 | dependencies have missing `.pyc` files.
123 |
124 | Here is an example of running the tool on conda packages:
125 |
126 | $ anaconda-verify bitarray-0.8.1-py35_0.tar.bz2
127 | ==> /Users/ilan/aroot/tars64/bitarray-0.8.1-py35_0.tar.bz2 <==
128 | bitarray
129 |
130 | In this case all is fine, and we see that only the `bitarray` directory is
131 | created in `site-packages`.
132 |
--------------------------------------------------------------------------------
/anaconda_verify/recipe.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division, absolute_import
2 |
3 | import os
4 | import re
5 | from os.path import basename, isfile, getsize, join
6 |
7 | import yaml
8 |
9 | from anaconda_verify.const import LICENSE_FAMILIES, FIELDS
10 | from anaconda_verify.utils import all_ascii, memoized
11 | from anaconda_verify.common import (check_name, check_version, check_specs,
12 | check_build_number)
13 |
14 | PEDANTIC = True
15 |
16 |
17 | class RecipeError(Exception):
18 | pass
19 |
20 |
21 | def ns_cfg(cfg):
22 | plat = cfg['plat']
23 | py = cfg['PY']
24 | np = cfg['NPY']
25 | for x in py, np:
26 | assert isinstance(x, int), x
27 | return dict(
28 | nomkl = False,
29 | debug = False,
30 | linux = plat.startswith('linux-'),
31 | linux32 = bool(plat == 'linux-32'),
32 | linux64 = bool(plat == 'linux-64'),
33 | armv7l = False,
34 | arm = False,
35 | ppc64le = False,
36 | osx = plat.startswith('osx-'),
37 | unix = plat.startswith(('linux-', 'osx-')),
38 | win = plat.startswith('win-'),
39 | win32 = bool(plat == 'win-32'),
40 | win64 = bool(plat == 'win-64'),
41 | x86 = plat.endswith(('-32', '-64')),
42 | x86_64 = plat.endswith('-64'),
43 | py = py,
44 | py3k = bool(30 <= py < 40),
45 | py2k = bool(20 <= py < 30),
46 | py26 = bool(py == 26),
47 | py27 = bool(py == 27),
48 | py33 = bool(py == 33),
49 | py34 = bool(py == 34),
50 | py35 = bool(py == 35),
51 | py36 = bool(py == 36),
52 | np = np,
53 | )
54 |
55 |
56 | sel_pat = re.compile(r'(.+?)\s*\[(.+)\]$')
57 | def select_lines(data, namespace):
58 | lines = []
59 | for line in data.splitlines():
60 | line = line.rstrip()
61 | m = sel_pat.match(line)
62 | if m:
63 | if PEDANTIC:
64 | x = m.group(1).strip()
65 | # error on comment, unless the whole line is a comment
66 | if '#' in x and not x.startswith('#'):
67 | raise RecipeError("found commented selector: %s" % line)
68 | cond = m.group(2)
69 | if eval(cond, namespace, {}):
70 | lines.append(m.group(1))
71 | continue
72 | lines.append(line)
73 | return '\n'.join(lines) + '\n'
74 |
75 |
76 | @memoized
77 | def yamlize(data):
78 | res = yaml.load(data)
79 | # ensure the result is a dict
80 | if res is None:
81 | res = {}
82 | return res
83 |
84 |
85 | def parse(data, cfg):
86 | if cfg is not None:
87 | data = select_lines(data, ns_cfg(cfg))
88 | # ensure we create new object, because yamlize is memoized
89 | return dict(yamlize(data))
90 |
91 |
92 | def get_field(meta, field, default=None):
93 | section, key = field.split('/')
94 | submeta = meta.get(section)
95 | if submeta is None:
96 | submeta = {}
97 | res = submeta.get(key)
98 | if res is None:
99 | res = default
100 | return res
101 |
102 |
103 | def check_requirements(meta):
104 | for field in 'requirements/build', 'requirements/run':
105 | specs = get_field(meta, field, [])
106 | res = check_specs(specs)
107 | if res:
108 | raise RecipeError(res)
109 |
110 |
111 | def check_license_family(meta):
112 | if not PEDANTIC:
113 | return
114 | lf = get_field(meta, 'about/license_family',
115 | get_field(meta, 'about/license'))
116 | if lf not in LICENSE_FAMILIES:
117 | print("""\
118 | Error: license_family is invalid: %s
119 | Note that about/license_family falls back to about/license.
120 | Allowed license families are:""" % lf)
121 | for x in LICENSE_FAMILIES:
122 | print(" - %s" % x)
123 | raise RecipeError("wrong license family")
124 |
125 |
126 | url_pat = re.compile(r'(ftp|http(s)?)://')
127 | def check_url(url):
128 | if not url_pat.match(url):
129 | raise RecipeError("not a valid URL: %s" % url)
130 |
131 |
132 | def check_about(meta):
133 | summary = get_field(meta, 'about/summary')
134 | if summary and len(summary) > 80:
135 | msg = "summary exceeds 80 characters"
136 | if PEDANTIC:
137 | raise RecipeError(msg)
138 | else:
139 | print("Warning: %s" % msg)
140 |
141 | for field in ('about/home', 'about/dev_url', 'about/doc_url',
142 | 'about/license_url'):
143 | url = get_field(meta, field)
144 | if url:
145 | check_url(url)
146 |
147 | check_license_family(meta)
148 |
149 |
150 | hash_pat = {'md5': re.compile(r'[a-f0-9]{32}$'),
151 | 'sha1': re.compile(r'[a-f0-9]{40}$'),
152 | 'sha256': re.compile(r'[a-f0-9]{64}$')}
153 | def check_source(meta):
154 | src = meta.get('source')
155 | if not src:
156 | return
157 | fn = src.get('fn')
158 | if fn:
159 | for ht in 'md5', 'sha1', 'sha256':
160 | hexgigest = src.get(ht)
161 | if hexgigest and not hash_pat[ht].match(hexgigest):
162 | raise RecipeError("invalid hash: %s" % hexgigest)
163 | url = src.get('url')
164 | if url:
165 | check_url(url)
166 |
167 | git_url = src.get('git_url')
168 | if git_url and (src.get('git_tag') and src.get('git_branch')):
169 | raise RecipeError("cannot specify both git_branch and git_tag")
170 |
171 |
172 | def validate_meta(meta):
173 | for section in meta:
174 | if PEDANTIC and section not in FIELDS:
175 | raise RecipeError("Unknown section: %s" % section)
176 | submeta = meta.get(section)
177 | if submeta is None:
178 | submeta = {}
179 | for key in submeta:
180 | if PEDANTIC and key not in FIELDS[section]:
181 | raise RecipeError("in section %r: unknown key %r" %
182 | (section, key))
183 |
184 | for res in [
185 | check_name(get_field(meta, 'package/name')),
186 | check_version(get_field(meta, 'package/version')),
187 | check_build_number(get_field(meta, 'build/number', 0)),
188 | ]:
189 | if res:
190 | raise RecipeError(res)
191 |
192 | if PEDANTIC and str(get_field(meta, 'build/noarch')).lower() == 'python':
193 | raise RecipeError("noarch python recipe not allowed in pedantic mode")
194 |
195 | check_requirements(meta)
196 | check_about(meta)
197 | check_source(meta)
198 |
199 |
200 | def validate_files(recipe_dir, meta):
201 | for field in 'test/files', 'source/patches':
202 | flst = get_field(meta, field)
203 | if not flst:
204 | continue
205 | for fn in flst:
206 | if PEDANTIC and fn.startswith('..'):
207 | raise RecipeError("path outsite recipe: %s" % fn)
208 | path = join(recipe_dir, fn)
209 | if isfile(path):
210 | continue
211 | raise RecipeError("no such file '%s'" % path)
212 |
213 |
214 | def iter_cfgs():
215 | for py in 27, 34, 35:
216 | for plat in 'linux-64', 'linux-32', 'osx-64', 'win-32', 'win-64':
217 | yield dict(plat=plat, PY=py, NPY=111)
218 |
219 |
220 | def dir_size(dir_path):
221 | return sum(sum(getsize(join(root, fn)) for fn in files)
222 | for root, unused_dirs, files in os.walk(dir_path))
223 |
224 |
225 | def check_dir_content(recipe_dir):
226 | disallowed_extensions = (
227 | '.tar', '.tar.gz', '.tar.bz2', '.tar.xz',
228 | '.so', '.dylib', '.la', '.a', '.dll', '.pyd',
229 | )
230 | for root, unused_dirs, files in os.walk(recipe_dir):
231 | for fn in files:
232 | fn_lower = fn.lower()
233 | if fn_lower.endswith(disallowed_extensions):
234 | if PEDANTIC:
235 | raise RecipeError("found: %s" % fn)
236 | else:
237 | print("Warning: found: %s" % fn)
238 | path = join(root, fn)
239 | # only allow small archives for testing
240 | if (PEDANTIC and fn_lower.endswith(('.bz2', '.gz')) and
241 | getsize(path) > 512):
242 | raise RecipeError("found: %s (too large)" % fn)
243 |
244 | if basename(recipe_dir) == 'icu':
245 | return
246 |
247 | # check total size od recipe directory (recursively)
248 | kb_size = dir_size(recipe_dir) / 1024
249 | kb_limit = 512
250 | if PEDANTIC and kb_size > kb_limit:
251 | raise RecipeError("recipe too large: %d KB (limit %d KB)" %
252 | (kb_size, kb_limit))
253 |
254 | if PEDANTIC:
255 | try:
256 | with open(join(recipe_dir, 'build.sh'), 'rb') as fi:
257 | data = fi.read()
258 | if data and not data.decode('utf-8').startswith(('#!/bin/bash\n',
259 | '#!/bin/sh\n')):
260 | raise RecipeError("not a bash script: build.sh")
261 | except IOError:
262 | pass
263 |
264 |
265 | def render_jinja2(recipe_dir):
266 | import jinja2
267 |
268 | loaders = [jinja2.FileSystemLoader(recipe_dir)]
269 | env = jinja2.Environment(loader=jinja2.ChoiceLoader(loaders))
270 | template = env.get_or_select_template('meta.yaml')
271 | return template.render(environment=env)
272 |
273 |
274 | def validate_recipe(recipe_dir, pedantic=True):
275 | global PEDANTIC
276 | PEDANTIC = bool(pedantic)
277 |
278 | meta_path = join(recipe_dir, 'meta.yaml')
279 | with open(meta_path, 'rb') as fi:
280 | data = fi.read()
281 | if PEDANTIC and not all_ascii(data):
282 | raise RecipeError("non-ASCII in: %s" % meta_path)
283 | if b'{{' in data:
284 | data = render_jinja2(recipe_dir)
285 | else:
286 | data = data.decode('utf-8')
287 |
288 | check_dir_content(recipe_dir)
289 |
290 | for cfg in iter_cfgs():
291 | meta = parse(data, cfg)
292 | validate_meta(meta)
293 | validate_files(recipe_dir, meta)
294 |
--------------------------------------------------------------------------------
/anaconda_verify/package.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division, absolute_import
2 |
3 | import re
4 | import json
5 | import shlex
6 | import tarfile
7 | from os.path import basename
8 |
9 | from anaconda_verify.const import LICENSE_FAMILIES
10 | from anaconda_verify.utils import get_object_type, all_ascii, get_bad_seq
11 | from anaconda_verify.common import (check_name, check_version, check_specs,
12 | check_build_string, check_build_number,
13 | get_python_version_specs)
14 |
15 |
16 | PEDANTIC = True
17 |
18 |
19 | class PackageError(Exception):
20 | pass
21 |
22 |
23 | def dist_fn(fn):
24 | seq = get_bad_seq(fn)
25 | if seq:
26 | raise PackageError("'%s' not allowed in file name '%s'" % (seq, fn))
27 | if fn.endswith('.tar.bz2'):
28 | return fn[:-8]
29 | if fn.endswith('.tar'):
30 | return fn[:-4]
31 | raise PackageError("did not expect filename: %s" % fn)
32 |
33 |
34 | class CondaPackageCheck(object):
35 | def __init__(self, path, verbose=False):
36 | self.verbose = verbose
37 | self.t = tarfile.open(path)
38 | self.dist = dist_fn(basename(path))
39 | self.name, self.version, self.build = self.dist.rsplit('-', 2)
40 | paths = [m.path for m in self.t.getmembers()]
41 | self.paths = set(paths)
42 | if len(paths) != len(self.paths):
43 | raise PackageError("duplicate members")
44 | raw = self.t.extractfile('info/index.json').read()
45 | self.info = json.loads(raw.decode('utf-8'))
46 | self.win_pkg = bool(self.info['platform'] == 'win')
47 | if not all_ascii(raw, self.win_pkg):
48 | raise PackageError("non-ASCII in: info/index.json")
49 |
50 | def check_members(self):
51 | for m in self.t.getmembers():
52 | path = m.path
53 | if not all_ascii(path.encode('utf-8')):
54 | raise PackageError("non-ASCII path: %r" % path)
55 |
56 | def info_files(self):
57 | raw = self.t.extractfile('info/files').read()
58 | if not all_ascii(raw, self.win_pkg):
59 | raise PackageError("non-ASCII in: info/files")
60 | lista = [p.strip() for p in raw.decode('utf-8').splitlines()]
61 | for p in lista:
62 | if p.startswith('info/'):
63 | raise PackageError("Did not expect '%s' in info/files" % p)
64 |
65 | seta = set(lista)
66 | if len(lista) != len(seta):
67 | raise PackageError('info/files: duplicates')
68 |
69 | listb = [m.path for m in self.t.getmembers()
70 | if not (m.path.startswith('info/') or m.isdir())]
71 | setb = set(listb)
72 | if len(listb) != len(setb):
73 | raise PackageError("info_files: duplicate members")
74 |
75 | if seta == setb:
76 | return
77 | for p in sorted(seta | setb):
78 | if p not in seta:
79 | print('%r not in info/files' % p)
80 | if p not in setb:
81 | print('%r not in tarball' % p)
82 | raise PackageError("info/files")
83 |
84 |
85 | def no_hardlinks(self):
86 | for m in self.t.getmembers():
87 | if m.islnk():
88 | raise PackageError('hardlink found: %s' % m.path)
89 |
90 |
91 | def not_allowed_files(self):
92 | not_allowed = {'conda-meta', 'conda-bld',
93 | 'pkgs', 'pkgs32', 'envs'}
94 | not_allowed_dirs = tuple(x + '/' for x in not_allowed)
95 | for p in self.paths:
96 | if (p.startswith(not_allowed_dirs) or
97 | p in not_allowed or
98 | p.endswith('/.DS_Store') or
99 | p.endswith('~')):
100 | raise PackageError("directory or filename not allowed: "
101 | "%s" % p)
102 | if PEDANTIC and p in ('info/package_metadata.json',
103 | 'info/link.json'):
104 | raise PackageError("file not allowed: %s" % p)
105 |
106 | def index_json(self):
107 | for varname in 'name', 'version', 'build':
108 | if self.info[varname] != getattr(self, varname):
109 | raise PackageError("info/index.json for %s: %r != %r" %
110 | (varname, self.info[varname],
111 | getattr(self, varname)))
112 | lst = [
113 | check_name(self.info['name']),
114 | check_version(self.info['version']),
115 | check_build_number(self.info['build_number']),
116 | ]
117 | if PEDANTIC:
118 | lst.append(check_build_string(self.info['build']))
119 | for res in lst:
120 | if res:
121 | raise PackageError("info/index.json: %s" % res)
122 |
123 | depends = self.info.get('depends')
124 | if depends is None:
125 | raise PackageError("info/index.json: key 'depends' missing")
126 | res = check_specs(depends)
127 | if res:
128 | raise PackageError("info/index.json: %s" % res)
129 |
130 | if PEDANTIC:
131 | lf = self.info.get('license_family', self.info.get('license'))
132 | if lf not in LICENSE_FAMILIES:
133 | raise PackageError("wrong license family: %s" % lf)
134 |
135 | def no_bat_and_exe(self):
136 | bats = {p[:-4] for p in self.paths if p.endswith('.bat')}
137 | exes = {p[:-4] for p in self.paths if p.endswith('.exe')}
138 | both = bats & exes
139 | if both:
140 | raise PackageError("Both .bat and .exe files: %s" % both)
141 |
142 |
143 | def _check_has_prefix_line(self, line):
144 | line = line.strip()
145 | try:
146 | placeholder, mode, f = [x.strip('"\'') for x in
147 | shlex.split(line, posix=False)]
148 | except ValueError:
149 | placeholder, mode, f = '//', 'text', line
150 |
151 | if f not in self.paths:
152 | raise PackageError("info/has_prefix: target '%s' not in "
153 | "package" % f)
154 |
155 | if mode == 'binary':
156 | if self.name == 'python':
157 | raise PackageError("binary placeholder not allowed in Python")
158 | if self.win_pkg:
159 | raise PackageError("binary placeholder not allowed on Windows")
160 | if PEDANTIC:
161 | print("WARNING: info/has_prefix: binary replace mode: %s" % f)
162 | return
163 | if len(placeholder) != 255:
164 | msg = ("info/has_prefix: binary placeholder not "
165 | "255 bytes, but: %d" % len(placeholder))
166 | if PEDANTIC:
167 | raise PackageError(msg)
168 | else:
169 | print("Warning: %s" % msg)
170 | elif mode == 'text':
171 | pass
172 | else:
173 | raise PackageError("info/has_prefix: invalid mode")
174 |
175 |
176 | def has_prefix(self):
177 | for m in self.t.getmembers():
178 | if m.path != 'info/has_prefix':
179 | continue
180 | if self.win_pkg:
181 | print("WARNING: %s" % m.path)
182 | data = self.t.extractfile(m.path).read()
183 | if not all_ascii(data, self.win_pkg):
184 | raise PackageError("non-ASCII in: info/has_prefix")
185 | for line in data.decode('utf-8').splitlines():
186 | self._check_has_prefix_line(line)
187 |
188 |
189 | def warn_post_link(self):
190 | for p in self.paths:
191 | if p.endswith((
192 | '-post-link.sh', '-pre-link.sh', '-pre-unlink.sh',
193 | '-post-link.bat', '-pre-link.bat', '-pre-unlink.bat',
194 | )):
195 | print("WARNING: %s" % p)
196 |
197 | def no_setuptools(self):
198 | for p in self.paths:
199 | if p.endswith('easy-install.pth'):
200 | raise PackageError("easy-install.pth file not allowed")
201 |
202 | if self.name in ('setuptools', 'distribute'):
203 | return
204 | for p in self.paths:
205 | if p.endswith(('MyPyPa-0.1.0-py2.5.egg',
206 | 'mytestegg-1.0.0-py3.4.egg')):
207 | continue
208 | if (p.endswith('.egg') or
209 | 'site-packages/pkg_resources' in p or
210 | 'site-packages/__pycache__/pkg_resources' in p or
211 | p.startswith('bin/easy_install') or
212 | p.startswith('Scripts/easy_install')):
213 | raise PackageError("file '%s' not allowed" % p)
214 |
215 | def no_easy_install_script(self):
216 | if not PEDANTIC:
217 | return
218 | for m in self.t.getmembers():
219 | if not m.name.startswith(('bin/', 'Scripts/')):
220 | continue
221 | if not m.isfile():
222 | continue
223 | data = self.t.extractfile(m.path).read(1024)
224 | if b'EASY-INSTALL-SCRIPT' in data:
225 | raise PackageError("easy install script found: %s" % m.name)
226 |
227 | def no_pth(self):
228 | for p in self.paths:
229 | if PEDANTIC and p.endswith('-nspkg.pth'):
230 | raise PackageError("found namespace .pth file '%s'" % p)
231 | if p.endswith('.pth'):
232 | print("WARNING: .pth file: %s" % p)
233 |
234 | def warn_pyo(self):
235 | if self.name == 'python':
236 | return
237 | for p in self.paths:
238 | if p.endswith('.pyo'):
239 | print("WARNING: .pyo file: %s" % p)
240 |
241 | def no_py_next_so(self):
242 | for p in self.paths:
243 | if p.endswith('.so'):
244 | root = p[:-3]
245 | elif p.endswith('.pyd'):
246 | root = p[:-4]
247 | else:
248 | continue
249 | for ext in '.py', '.pyc':
250 | if root + ext in self.paths:
251 | print("WARNING: %s next to: %s" % (ext, p))
252 |
253 | def no_pyc_in_stdlib(self):
254 | if self.name in {'python', 'scons', 'conda-build', 'dbus'}:
255 | return
256 | for p in self.paths:
257 | if p.endswith('.pyc') and not 'site-packages' in p:
258 | raise PackageError(".pyc found in stdlib: %s" % p)
259 |
260 | def no_2to3_pickle(self):
261 | if self.name == 'python':
262 | return
263 | for p in self.paths:
264 | if ('lib2to3' in p and p.endswith('.pickle')):
265 | raise PackageError("found lib2to3 .pickle: %s" % p)
266 |
267 | def pyc_files(self):
268 | if 'py3' in self.build:
269 | return
270 | for p in self.paths:
271 | if ('/site-packages/' not in p) or ('/port_v3/' in p):
272 | continue
273 | if p.endswith('.py') and (p + 'c') not in self.paths:
274 | print("WARNING: pyc missing for:", p)
275 | if not self.verbose:
276 | return
277 |
278 | def menu_names(self):
279 | if not PEDANTIC:
280 | return
281 | menu_json_files = []
282 | for p in self.paths:
283 | if p.startswith('Menu/') and p.endswith('.json'):
284 | menu_json_files.append(p)
285 | if len(menu_json_files) == 0:
286 | pass
287 | elif len(menu_json_files) == 1:
288 | fn = menu_json_files[0][5:]
289 | if fn != '%s.json' % self.name:
290 | raise PackageError("wrong Menu json file name: %s" % fn)
291 | else:
292 | raise PackageError("too many Menu json files: %r" %
293 | menu_json_files)
294 |
295 | def check_windows_arch(self):
296 | if self.name in ('python', 'conda-build', 'pip', 'xlwings',
297 | 'phantomjs', 'qt', 'graphviz', 'nsis', 'swig'):
298 | return
299 | if not self.win_pkg:
300 | return
301 | arch = self.info['arch']
302 | if arch not in ('x86', 'x86_64'):
303 | raise PackageError("Unrecognized Windows architecture: %s" %
304 | arch)
305 | for m in self.t.getmembers():
306 | if not m.name.lower().endswith(('.exe', '.dll')):
307 | continue
308 | data = self.t.extractfile(m.path).read(4096)
309 | tp = get_object_type(data)
310 | if ((arch == 'x86' and tp != 'DLL I386') or
311 | (arch == 'x86_64' and tp != 'DLL AMD64')):
312 | raise PackageError("File %s has object type %s, but info/"
313 | "index.json arch is %s" %
314 | (m.name, tp, arch))
315 |
316 | def get_sp_location(self):
317 | py_ver = get_python_version_specs(self.info['depends'])
318 | if py_ver is None:
319 | return ''
320 |
321 | if self.win_pkg:
322 | return 'Lib/site-packages'
323 | else:
324 | return 'lib/python%s/site-packages' % py_ver
325 |
326 | def list_packages(self):
327 | sp_location = self.get_sp_location()
328 | pat = re.compile(r'site-packages/([^/]+)')
329 | res = set()
330 | for p in self.paths:
331 | m = pat.search(p)
332 | if m is None:
333 | continue
334 | if not p.startswith(sp_location):
335 | print("WARNING: found %s" % p)
336 | fn = m.group(1)
337 | if '-' in fn or fn.endswith('.pyc'):
338 | continue
339 | res.add(fn)
340 | if self.verbose:
341 | for x in res:
342 | print(' %s' % x)
343 | for pkg_name in 'numpy', 'scipy':
344 | if self.name != pkg_name and pkg_name in res:
345 | raise PackageError("found %s" % pkg_name)
346 | if self.name not in ('setuptools', 'distribute', 'python'):
347 | for x in ('pkg_resources.py', 'setuptools.pth', 'easy_install.py',
348 | 'setuptools'):
349 | if x in res:
350 | raise PackageError("found %s" % x)
351 |
352 |
353 | def validate_package(path, pedantic=True, verbose=True):
354 | global PEDANTIC
355 | PEDANTIC = bool(pedantic)
356 |
357 | x = CondaPackageCheck(path, verbose)
358 | x.check_members()
359 | x.info_files()
360 | x.no_hardlinks()
361 | x.not_allowed_files()
362 | x.index_json()
363 | x.no_bat_and_exe()
364 | x.has_prefix()
365 | x.warn_post_link()
366 | x.no_setuptools()
367 | x.no_easy_install_script()
368 | x.no_pth()
369 | x.warn_pyo()
370 | x.no_py_next_so()
371 | x.no_pyc_in_stdlib()
372 | x.no_2to3_pickle()
373 | x.pyc_files()
374 | x.menu_names()
375 | x.check_windows_arch()
376 | x.list_packages()
377 | x.t.close()
378 |
--------------------------------------------------------------------------------