├── anaconda_verify ├── __init__.py ├── const.py ├── utils.py ├── main.py ├── common.py ├── recipe.py └── package.py ├── TODO.txt ├── .gitignore ├── setup.py ├── FAQ.md ├── LICENSE.txt ├── CHANGELOG.txt └── README.md /anaconda_verify/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.3.8' 2 | -------------------------------------------------------------------------------- /TODO.txt: -------------------------------------------------------------------------------- 1 | TODO: 2 | * add file permission checking 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.pyc 3 | *.egg-info 4 | .cache/ 5 | build/ 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # (c) 2016-2017 Continuum Analytics, Inc. / http://continuum.io 2 | # All Rights Reserved 3 | import re 4 | from os.path import join 5 | 6 | from distutils.core import setup 7 | 8 | 9 | # read version from anaconda_verify/__init__.py 10 | pat = re.compile(r'__version__\s*=\s*(\S+)', re.M) 11 | data = open(join('anaconda_verify', '__init__.py')).read() 12 | version = eval(pat.search(data).group(1)) 13 | 14 | setup( 15 | name = "anaconda-verify", 16 | version = version, 17 | author = "Ilan Schnell", 18 | author_email = "ilan@continuum.io", 19 | url = "https://github.com/ContinuumIO/anaconda-verify", 20 | license = "BSD", 21 | description = "tool for validating conda recipes and conda packages", 22 | long_description = open('README.md').read(), 23 | packages = ['anaconda_verify'], 24 | ) 25 | -------------------------------------------------------------------------------- /FAQ.md: -------------------------------------------------------------------------------- 1 | I get "found namespace .pth file". What should I do? 2 | ----------------------------------------------------- 3 | 4 | The essential problem with Python namespace packages is that they require 5 | overlapping `__init__.py` files, which are shared by different projects. 6 | 7 | The most straightforward way to handle namespace packages in conda is 8 | to create a package which defines the namespace , which only contains a 9 | single empty `__init__.py` file, and then make the other packages depend 10 | on this. 11 | This is how we handle namespace packages in the Anaconda distribution. 12 | For example, the 13 | 14 | backports recipe defines the `backports` namespace, and then other 15 | packages, such as 16 | 17 | configparser depend on `backports`. 18 | 19 | There are other ways to get around this problem, e.g. by preserving the egg 20 | directory which setuptools creates, but that is not as simple and 21 | clean (from a packaging perspective), and brings along other challenges. 22 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Continuum Analytics, Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of Continuum Analytics, Inc. nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL CONTINUUM ANALYTICS BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /anaconda_verify/const.py: -------------------------------------------------------------------------------- 1 | LICENSE_FAMILIES = set(""" 2 | AGPL 3 | GPL2 4 | GPL3 5 | LGPL 6 | BSD 7 | MIT 8 | Apache 9 | PSF 10 | Public-Domain 11 | Proprietary 12 | Other 13 | """.split()) 14 | 15 | FIELDS = { 16 | 'package': {'name', 'version'}, 17 | 'source': {'fn', 'url', 'md5', 'sha1', 'sha256', 18 | 'git_url', 'git_tag', 'git_branch', 19 | 'patches', 'hg_url', 'hg_tag'}, 20 | 'build': {'features', 'track_features', 21 | 'number', 'entry_points', 'osx_is_app', 'noarch', 22 | 'preserve_egg_dir', 'win_has_prefix', 'no_link', 23 | 'ignore_prefix_files', 'msvc_compiler', 'skip_compile_pyc', 24 | 'detect_binary_files_with_prefix', 'script', 25 | 'always_include_files', 'binary_relocation', 26 | 'binary_has_prefix_files'}, 27 | 'requirements': {'build', 'run'}, 28 | 'app': {'entry', 'icon', 'summary', 'type', 'cli_opts'}, 29 | 'test': {'requires', 'commands', 'files', 'source_files', 'imports'}, 30 | 'about': {'license', 'license_url', 'license_family', 'license_file', 31 | 'summary', 'description', 'home', 'doc_url', 'doc_source_url', 32 | 'dev_url'}, 33 | 'extra': {'recipe-maintainers'}, 34 | } 35 | 36 | MAGIC_HEADERS = { 37 | '\xca\xfe\xba\xbe': 'MachO-universal', 38 | '\xce\xfa\xed\xfe': 'MachO-i386', 39 | '\xcf\xfa\xed\xfe': 'MachO-x86_64', 40 | '\xfe\xed\xfa\xce': 'MachO-ppc', 41 | '\xfe\xed\xfa\xcf': 'MachO-ppc64', 42 | 'MZ\x90\x00': 'DLL', 43 | '\x7fELF': 'ELF', 44 | } 45 | 46 | DLL_TYPES = { 47 | 0x0: 'UNKNOWN', 0x1d3: 'AM33', 0x8664: 'AMD64', 0x1c0: 'ARM', 48 | 0xebc: 'EBC', 0x14c: 'I386', 0x200: 'IA64', 0x9041: 'M32R', 49 | 0x266: 'MIPS16', 0x366: 'MIPSFPU', 0x466: 'MIPSFPU16', 0x1f0: 'POWERPC', 50 | 0x1f1: 'POWERPCFP', 0x166: 'R4000', 0x1a2: 'SH3', 0x1a3: 'SH3DSP', 51 | 0x1a6: 'SH4', 0x1a8: 'SH5', 0x1c2: 'THUMB', 0x169: 'WCEMIPSV2', 52 | } 53 | -------------------------------------------------------------------------------- /anaconda_verify/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import collections 3 | 4 | from anaconda_verify.const import MAGIC_HEADERS, DLL_TYPES 5 | 6 | 7 | 8 | def get_object_type(data): 9 | head = data[:4] 10 | if head not in MAGIC_HEADERS: 11 | return None 12 | lookup = MAGIC_HEADERS.get(head) 13 | if lookup == 'DLL': 14 | pos = data.find('PE\0\0') 15 | if pos < 0: 16 | return "" 17 | i = ord(data[pos + 4]) + 256 * ord(data[pos + 5]) 18 | return "DLL " + DLL_TYPES.get(i) 19 | elif lookup.startswith('MachO'): 20 | return lookup 21 | elif lookup == 'ELF': 22 | return "ELF" + {'\x01': '32', '\x02': '64'}.get(data[4]) 23 | 24 | 25 | def get_bad_seq(s): 26 | for seq in ('--', '-.', '-_', 27 | '.-', '..', '._', 28 | '_-', '_.'): # but '__' is fine 29 | if seq in s: 30 | return seq 31 | return None 32 | 33 | 34 | def all_ascii(data, allow_CR=False): 35 | newline = [10] # LF 36 | if allow_CR: 37 | newline.append(13) # CF 38 | for c in data: 39 | n = ord(c) if sys.version_info[0] == 2 else c 40 | if not (n in newline or 32 <= n < 127): 41 | return False 42 | return True 43 | 44 | 45 | class memoized(object): 46 | """Decorator. Caches a function's return value each time it is called. 47 | If called later with the same arguments, the cached value is returned 48 | (not reevaluated). 49 | """ 50 | def __init__(self, func): 51 | self.func = func 52 | self.cache = {} 53 | def __call__(self, *args): 54 | if not isinstance(args, collections.Hashable): 55 | # uncacheable. a list, for instance. 56 | # better to not cache than blow up. 57 | return self.func(*args) 58 | if args in self.cache: 59 | return self.cache[args] 60 | else: 61 | value = self.func(*args) 62 | self.cache[args] = value 63 | return value 64 | 65 | 66 | if __name__ == '__main__': 67 | print(sys.version) 68 | print(all_ascii(b'Hello\x00'), all_ascii(b"Hello World!")) 69 | -------------------------------------------------------------------------------- /anaconda_verify/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | 3 | import sys 4 | from os.path import isfile, join 5 | from optparse import OptionParser 6 | 7 | from anaconda_verify.recipe import validate_recipe, RecipeError 8 | from anaconda_verify.package import validate_package, PackageError 9 | 10 | 11 | def main(): 12 | p = OptionParser( 13 | usage="usage: %prog [options] ", 14 | description="tool for (passively) verifying conda recipes and conda " 15 | "packages for the Anaconda distribution") 16 | 17 | p.add_option('-e', "--exit", 18 | help="on error exit", 19 | action="store_true") 20 | 21 | p.add_option('-p', "--pedantic", 22 | action="store_true") 23 | 24 | p.add_option('-q', "--quiet", 25 | action="store_true") 26 | 27 | p.add_option('-V', '--version', 28 | help="display the version being used and exit", 29 | action="store_true") 30 | 31 | opts, args = p.parse_args() 32 | verbose = not opts.quiet 33 | if opts.version: 34 | from anaconda_verify import __version__ 35 | print('anaconda-verify version:', __version__) 36 | return 37 | 38 | for path in args: 39 | if isfile(join(path, 'meta.yaml')): 40 | if verbose: 41 | print("==> %s <==" % path) 42 | try: 43 | validate_recipe(path, opts.pedantic) 44 | except RecipeError as e: 45 | sys.stderr.write("RecipeError: %s\n" % e) 46 | if opts.exit: 47 | sys.exit(1) 48 | 49 | elif path.endswith('.tar.bz2'): 50 | if verbose: 51 | print("==> %s <==" % path) 52 | try: 53 | validate_package(path, opts.pedantic, verbose) 54 | except PackageError as e: 55 | sys.stderr.write("PackageError: %s\n" % e) 56 | if opts.exit: 57 | sys.exit(1) 58 | 59 | else: 60 | if verbose: 61 | print("Ignoring: %s" % path) 62 | 63 | 64 | if __name__ == '__main__': 65 | main() 66 | -------------------------------------------------------------------------------- /CHANGELOG.txt: -------------------------------------------------------------------------------- 1 | 2017-06-16 1.3.8: 2 | ------------------- 3 | * allow extra/recipe-maintainers 4 | 5 | 6 | 2017-05-25 1.3.7: 7 | ------------------- 8 | * allow about/doc_source_url 9 | 10 | 11 | 2017-04-17 1.3.6: 12 | ------------------- 13 | * fix Py3k bug 14 | 15 | 16 | 2017-04-12 1.3.5: 17 | ------------------- 18 | * disallow noarch python recipe not allowed in pedantic mode 19 | * remove warning for space in archive file path 20 | 21 | 22 | 2017-04-03 1.3.4: 23 | ------------------- 24 | * add check for Menu/.json filename to correspond to conda 25 | package name 26 | * add check for build string in pedantic mode 27 | 28 | 29 | 2017-02-12 1.3.3: 30 | ------------------- 31 | * error on binary placeholders Windows 32 | * disallow info/package_metadata.json (in pedantic mode) 33 | 34 | 35 | 2017-01-24 1.3.2: 36 | ------------------- 37 | * add check for site-package location for Python packages 38 | * add check got non-ASCII in info/files, and tar members 39 | * allow build/binary_has_prefix_files 40 | 41 | 42 | 2016-12-22 1.3.1: 43 | ------------------- 44 | * add checks for duplicate specs 45 | * allow build/allow_binary_relocation key 46 | 47 | 48 | 2016-12-10 1.3.0: 49 | ------------------- 50 | * add license family check for packages 51 | * add checks for info/index.json 52 | * add checks for build and runtime requirements in recipes 53 | * allow build/skip_compile_pyc 54 | 55 | 56 | 2016-09-29 1.2.1: 57 | ------------------- 58 | * fix description in setup.py 59 | * allow build/script 60 | * allow new noarch key 61 | 62 | 63 | 2016-07-17 1.2.0: 64 | ------------------- 65 | * added --pedantic (-p) option to make anaconda-verify more useful for the 66 | wider community 67 | * add check for hard links in packages 68 | * check for commented selectors (--pedantic only) 69 | * improve name and version checking 70 | 71 | 72 | 2016-07-13 1.1.0: 73 | ------------------- 74 | * add recipe size limit and disallow files with certain extensions 75 | * add more checking for info/has_prefix in conda packages, in particular 76 | ensure that the binary placeholder length is 255 bytes on Unix, as 77 | conda-build 2.0 will start using this value. We already use conda-build 78 | master for building packages on repo.continuum.io, and want to enforce 79 | 255 bytes for all new packages, in order to make the transition to 80 | conda-build easier for the community. 81 | * add check for header in build.sh 82 | * add checking for non-ASCII in info/index.json and info/has_prefix 83 | 84 | 85 | 2016-06-30 1.0.0: 86 | ------------------- 87 | * initial release 88 | -------------------------------------------------------------------------------- /anaconda_verify/common.py: -------------------------------------------------------------------------------- 1 | import re 2 | from collections import defaultdict 3 | 4 | from anaconda_verify.utils import get_bad_seq 5 | 6 | 7 | name_pat = re.compile(r'[a-z0-9_][a-z0-9_\-\.]*$') 8 | def check_name(name): 9 | if not name: 10 | return "package name missing" 11 | name = str(name) 12 | if not name_pat.match(name) or name.endswith(('.', '-', '_')): 13 | return "invalid package name '%s'" % name 14 | seq = get_bad_seq(name) 15 | if seq: 16 | return "'%s' is not allowed in package name: '%s'" % (seq, name) 17 | return None 18 | 19 | 20 | version_pat = re.compile(r'[\w\.]+$') 21 | def check_version(ver): 22 | if not ver: 23 | return "package version missing" 24 | ver = str(ver) 25 | if not version_pat.match(ver): 26 | return "invalid version '%s'" % ver 27 | if ver.startswith(('_', '.')) or ver.endswith(('_', '.')): 28 | return "version cannot start or end with '_' or '.': %s" % ver 29 | seq = get_bad_seq(ver) 30 | if seq: 31 | return "'%s' not allowed in version '%s'" % (seq, ver) 32 | return None 33 | 34 | 35 | def check_build_string(build): 36 | build = str(build) 37 | if not version_pat.match(build): 38 | return "invalid build string '%s'" % build 39 | return None 40 | 41 | 42 | ver_spec_pat = re.compile(r'[\w\.,=!<>\*]+$') 43 | def check_spec(spec): 44 | if not spec: 45 | return "spec missing" 46 | spec = str(spec) 47 | parts = spec.split() 48 | nparts = len(parts) 49 | if nparts == 0: 50 | return "empty spec '%s'" % spec 51 | if not name_pat.match(parts[0]): 52 | return "invalid name spec '%s'" % spec 53 | if nparts >= 2 and not ver_spec_pat.match(parts[1]): 54 | return "invalid version spec '%s'" % spec 55 | if nparts == 3 and not version_pat.match(parts[1]): 56 | return "invalid (pure) version spec '%s'" % spec 57 | if len(parts) > 3: 58 | return "invalid spec (too many parts) '%s'" % spec 59 | return None 60 | 61 | 62 | def check_specs(specs): 63 | name_specs = defaultdict(list) 64 | for spec in specs: 65 | res = check_spec(spec) 66 | if res: 67 | return res 68 | name_specs[spec.split()[0]].append(spec) 69 | for name in name_specs: 70 | specs = name_specs[name] 71 | if len(specs) > 1: 72 | return "duplicate specs: %s" % specs 73 | return None 74 | 75 | 76 | def check_build_number(bn): 77 | if not (isinstance(bn, int) and bn >= 0): 78 | return "build number '%s' (not a positive interger)" % bn 79 | 80 | 81 | def get_python_version_specs(specs): 82 | """ 83 | Return the Python version (as a string "x.y") from a given list of specs. 84 | If Python is not a dependency, or if the version does not start with x.y, 85 | None is returned 86 | """ 87 | pat = re.compile(r'(\d\.\d)') 88 | for spec in specs: 89 | spec = str(spec) 90 | parts = spec.split() 91 | nparts = len(parts) 92 | if nparts < 2: 93 | continue 94 | name, version = parts[:2] 95 | if name != 'python': 96 | continue 97 | m = pat.match(version) 98 | if m: 99 | return m.group(1) 100 | return None 101 | 102 | 103 | if __name__ == '__main__': 104 | import sys 105 | print(check_spec('numpy 1.2')) 106 | print(check_build_number(3)) 107 | #print(get_python_version_specs(sys.argv[1:])) 108 | print(sys.argv[1]) 109 | print(check_build_string(sys.argv[1])) 110 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | anaconda-verify 2 | =============== 3 | 4 | This project is now somewhat deprecated, use 5 | conda-verify 6 | instead. 7 | 8 | anaconda-verify is a tool for (passively) verifying conda recipes and 9 | conda packages. 10 | 11 | All 12 | Anaconda recipes, as well as 13 | the Anaconda packages 14 | need to pass this tool before they are made publically available. 15 | 16 | Using anaconda-verify: 17 | 18 | $ conda install anaconda-verify 19 | $ anaconda-verify -h 20 | $ anaconda-verify 21 | 22 | 23 | The purpose of this verification process is to ensure that recipes don't 24 | contain obvious bugs, and that the conda packages we distribute to millions 25 | of users meet our high quality standards. 26 | 27 | Historically, the conda packages which represent the Anaconda distribution 28 | were not created using `conda-build`, but an internal build system. 29 | In fact, `conda-build` started as a public fork of this internal system 30 | 3 years ago. At that point the Anaconda distribution had already been 31 | around for almost a year, and the only way to create conda packages 32 | was by using the internal system. 33 | While `conda-build` has made a lot of progress, the internal system basically 34 | stayed unchanged, because the needs on a system for building a distribution 35 | are quite different, and not driven by the community using `conda-build` 36 | for continuous integration and other language support (e.g. Perl, Lua), etc. . 37 | On the other hand, the internal system has been developed to support 38 | Anaconda distribution specific needs, such as MKL featured packages, 39 | source and license reference meta-data, and interoperability between 40 | collections of packages. 41 | 42 | In an effort to bridge the gap between our internal system and `conda-build`, 43 | we started using `conda-build` to create conda packages for the Anaconda 44 | distribution itself about one year ago. 45 | By now, more than 85% of the conda packages in the Anaconda distribution 46 | are created using `conda-build`. 47 | However, because the different requirements mentioned above, we only allow 48 | certain features that `conda-build` offers. 49 | This also helps to keep 50 | the Anaconda 51 | recipes simple and maintainable, and functional with the rest of the 52 | internal system which reads meta-data from the recipes. 53 | This is why we require conda recipes to be valid according to this tool. 54 | 55 | 56 | Packages 57 | -------- 58 | 59 | Another aspect of `anaconda-verify` is the ability to verify conda packages. 60 | These are the most important checks `anaconda-verify` performs on conda 61 | packages, and more importantly we explain why these checks are necessary 62 | or useful. 63 | 64 | * Ensure the content of `info/files` corresponds to the actual archived 65 | files in the tarball (except the ones in `info/`, obviously). This 66 | is important, because the files listed in `info/files` determine which 67 | files are linked into the conda environment. Any mismatch here would 68 | indicate either (i) the tarball contains files which are not getting 69 | linked anywhere or (ii) files which do no exist are attempted to get 70 | linked (which would result in an error). 71 | 72 | * Check for now allowed archives in the tarball. A conda package should 73 | not contain files in the following directories `conda-meta/`, 74 | `conda-bld/`, `pkgs/`, `pkgs32/` and `envs/`, because this would (for 75 | example) allow a conda package to modify another existing environment. 76 | 77 | * Make sure the `name`, `version` and `build` values exist in 78 | `info/index.json` and that they correspond to the actual filename. 79 | 80 | * Ensure there are no files with both `.bat` and `.exe` extension. For 81 | example, if you had `Scripts/foo.bat` and `Scripts/foo.exe` one would 82 | shadow the other, and this would become confusing which one is actually 83 | executed when the user types `foo`. Although this check is always done, 84 | it is only relevant on Windows. 85 | 86 | * Ensure no `easy-install.pth` file exists. These files would cause 87 | problems as they would overlap (two or more conda packages would 88 | contain a `easy-install.pth` file, which overwrite each other when 89 | installing the package). 90 | 91 | * Ensure no "easy install scripts" exists. These are entry point scripts 92 | which setuptools creates which are extremely brittle, and should by 93 | replaced (overwritten) by the simple entry points scripts `conda-build` 94 | offers (use `build/entry_points` in your `meta.yaml`). 95 | 96 | * Ensure there are no `.pyd` or `.so` files have a `.py` file next to it. 97 | This is just confusing, as it is not obvious which one the Python 98 | interpreter will import. Under certain circumstances setuptools creates 99 | `.py` next to shared object files for obscure reasons. 100 | 101 | * For packages (other than `python`), ensure that `.pyc` are not in 102 | Python's standard library directory. This would happen when a `.pyc` file 103 | is missing from the standard library, and then created during the 104 | build process of another package. 105 | 106 | * Check for missing `.pyc` files. Missing `.pyc` files cause two types of 107 | problems: (i) When building new packages, they might get included in 108 | the new package. For example, when building scipy and numpy is missing 109 | `.pyc` files, then these (numpy `.pyc` files) get included in the scipy 110 | package (ii) There was a (buggy) Python release which would crash when 111 | `.pyc` files could not written (due to file permissions). 112 | 113 | * Ensure Windows conda packages only contain object files which have the 114 | correct architecture. There was a bug in `conda-build` which would 115 | create `64-bit` entry point executables when building `32-bit` packages 116 | on a `64-bit` system. 117 | 118 | * Ensure that `site-packages` does not contain certain directories when 119 | building packages. For example, when you build `pandas` you don't 120 | want a `numpy`, `scipy` or `setuptools` directory to be contained in 121 | the `pandas` package. This would happen when the `pandas` build 122 | dependencies have missing `.pyc` files. 123 | 124 | Here is an example of running the tool on conda packages: 125 | 126 | $ anaconda-verify bitarray-0.8.1-py35_0.tar.bz2 127 | ==> /Users/ilan/aroot/tars64/bitarray-0.8.1-py35_0.tar.bz2 <== 128 | bitarray 129 | 130 | In this case all is fine, and we see that only the `bitarray` directory is 131 | created in `site-packages`. 132 | -------------------------------------------------------------------------------- /anaconda_verify/recipe.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | 3 | import os 4 | import re 5 | from os.path import basename, isfile, getsize, join 6 | 7 | import yaml 8 | 9 | from anaconda_verify.const import LICENSE_FAMILIES, FIELDS 10 | from anaconda_verify.utils import all_ascii, memoized 11 | from anaconda_verify.common import (check_name, check_version, check_specs, 12 | check_build_number) 13 | 14 | PEDANTIC = True 15 | 16 | 17 | class RecipeError(Exception): 18 | pass 19 | 20 | 21 | def ns_cfg(cfg): 22 | plat = cfg['plat'] 23 | py = cfg['PY'] 24 | np = cfg['NPY'] 25 | for x in py, np: 26 | assert isinstance(x, int), x 27 | return dict( 28 | nomkl = False, 29 | debug = False, 30 | linux = plat.startswith('linux-'), 31 | linux32 = bool(plat == 'linux-32'), 32 | linux64 = bool(plat == 'linux-64'), 33 | armv7l = False, 34 | arm = False, 35 | ppc64le = False, 36 | osx = plat.startswith('osx-'), 37 | unix = plat.startswith(('linux-', 'osx-')), 38 | win = plat.startswith('win-'), 39 | win32 = bool(plat == 'win-32'), 40 | win64 = bool(plat == 'win-64'), 41 | x86 = plat.endswith(('-32', '-64')), 42 | x86_64 = plat.endswith('-64'), 43 | py = py, 44 | py3k = bool(30 <= py < 40), 45 | py2k = bool(20 <= py < 30), 46 | py26 = bool(py == 26), 47 | py27 = bool(py == 27), 48 | py33 = bool(py == 33), 49 | py34 = bool(py == 34), 50 | py35 = bool(py == 35), 51 | py36 = bool(py == 36), 52 | np = np, 53 | ) 54 | 55 | 56 | sel_pat = re.compile(r'(.+?)\s*\[(.+)\]$') 57 | def select_lines(data, namespace): 58 | lines = [] 59 | for line in data.splitlines(): 60 | line = line.rstrip() 61 | m = sel_pat.match(line) 62 | if m: 63 | if PEDANTIC: 64 | x = m.group(1).strip() 65 | # error on comment, unless the whole line is a comment 66 | if '#' in x and not x.startswith('#'): 67 | raise RecipeError("found commented selector: %s" % line) 68 | cond = m.group(2) 69 | if eval(cond, namespace, {}): 70 | lines.append(m.group(1)) 71 | continue 72 | lines.append(line) 73 | return '\n'.join(lines) + '\n' 74 | 75 | 76 | @memoized 77 | def yamlize(data): 78 | res = yaml.load(data) 79 | # ensure the result is a dict 80 | if res is None: 81 | res = {} 82 | return res 83 | 84 | 85 | def parse(data, cfg): 86 | if cfg is not None: 87 | data = select_lines(data, ns_cfg(cfg)) 88 | # ensure we create new object, because yamlize is memoized 89 | return dict(yamlize(data)) 90 | 91 | 92 | def get_field(meta, field, default=None): 93 | section, key = field.split('/') 94 | submeta = meta.get(section) 95 | if submeta is None: 96 | submeta = {} 97 | res = submeta.get(key) 98 | if res is None: 99 | res = default 100 | return res 101 | 102 | 103 | def check_requirements(meta): 104 | for field in 'requirements/build', 'requirements/run': 105 | specs = get_field(meta, field, []) 106 | res = check_specs(specs) 107 | if res: 108 | raise RecipeError(res) 109 | 110 | 111 | def check_license_family(meta): 112 | if not PEDANTIC: 113 | return 114 | lf = get_field(meta, 'about/license_family', 115 | get_field(meta, 'about/license')) 116 | if lf not in LICENSE_FAMILIES: 117 | print("""\ 118 | Error: license_family is invalid: %s 119 | Note that about/license_family falls back to about/license. 120 | Allowed license families are:""" % lf) 121 | for x in LICENSE_FAMILIES: 122 | print(" - %s" % x) 123 | raise RecipeError("wrong license family") 124 | 125 | 126 | url_pat = re.compile(r'(ftp|http(s)?)://') 127 | def check_url(url): 128 | if not url_pat.match(url): 129 | raise RecipeError("not a valid URL: %s" % url) 130 | 131 | 132 | def check_about(meta): 133 | summary = get_field(meta, 'about/summary') 134 | if summary and len(summary) > 80: 135 | msg = "summary exceeds 80 characters" 136 | if PEDANTIC: 137 | raise RecipeError(msg) 138 | else: 139 | print("Warning: %s" % msg) 140 | 141 | for field in ('about/home', 'about/dev_url', 'about/doc_url', 142 | 'about/license_url'): 143 | url = get_field(meta, field) 144 | if url: 145 | check_url(url) 146 | 147 | check_license_family(meta) 148 | 149 | 150 | hash_pat = {'md5': re.compile(r'[a-f0-9]{32}$'), 151 | 'sha1': re.compile(r'[a-f0-9]{40}$'), 152 | 'sha256': re.compile(r'[a-f0-9]{64}$')} 153 | def check_source(meta): 154 | src = meta.get('source') 155 | if not src: 156 | return 157 | fn = src.get('fn') 158 | if fn: 159 | for ht in 'md5', 'sha1', 'sha256': 160 | hexgigest = src.get(ht) 161 | if hexgigest and not hash_pat[ht].match(hexgigest): 162 | raise RecipeError("invalid hash: %s" % hexgigest) 163 | url = src.get('url') 164 | if url: 165 | check_url(url) 166 | 167 | git_url = src.get('git_url') 168 | if git_url and (src.get('git_tag') and src.get('git_branch')): 169 | raise RecipeError("cannot specify both git_branch and git_tag") 170 | 171 | 172 | def validate_meta(meta): 173 | for section in meta: 174 | if PEDANTIC and section not in FIELDS: 175 | raise RecipeError("Unknown section: %s" % section) 176 | submeta = meta.get(section) 177 | if submeta is None: 178 | submeta = {} 179 | for key in submeta: 180 | if PEDANTIC and key not in FIELDS[section]: 181 | raise RecipeError("in section %r: unknown key %r" % 182 | (section, key)) 183 | 184 | for res in [ 185 | check_name(get_field(meta, 'package/name')), 186 | check_version(get_field(meta, 'package/version')), 187 | check_build_number(get_field(meta, 'build/number', 0)), 188 | ]: 189 | if res: 190 | raise RecipeError(res) 191 | 192 | if PEDANTIC and str(get_field(meta, 'build/noarch')).lower() == 'python': 193 | raise RecipeError("noarch python recipe not allowed in pedantic mode") 194 | 195 | check_requirements(meta) 196 | check_about(meta) 197 | check_source(meta) 198 | 199 | 200 | def validate_files(recipe_dir, meta): 201 | for field in 'test/files', 'source/patches': 202 | flst = get_field(meta, field) 203 | if not flst: 204 | continue 205 | for fn in flst: 206 | if PEDANTIC and fn.startswith('..'): 207 | raise RecipeError("path outsite recipe: %s" % fn) 208 | path = join(recipe_dir, fn) 209 | if isfile(path): 210 | continue 211 | raise RecipeError("no such file '%s'" % path) 212 | 213 | 214 | def iter_cfgs(): 215 | for py in 27, 34, 35: 216 | for plat in 'linux-64', 'linux-32', 'osx-64', 'win-32', 'win-64': 217 | yield dict(plat=plat, PY=py, NPY=111) 218 | 219 | 220 | def dir_size(dir_path): 221 | return sum(sum(getsize(join(root, fn)) for fn in files) 222 | for root, unused_dirs, files in os.walk(dir_path)) 223 | 224 | 225 | def check_dir_content(recipe_dir): 226 | disallowed_extensions = ( 227 | '.tar', '.tar.gz', '.tar.bz2', '.tar.xz', 228 | '.so', '.dylib', '.la', '.a', '.dll', '.pyd', 229 | ) 230 | for root, unused_dirs, files in os.walk(recipe_dir): 231 | for fn in files: 232 | fn_lower = fn.lower() 233 | if fn_lower.endswith(disallowed_extensions): 234 | if PEDANTIC: 235 | raise RecipeError("found: %s" % fn) 236 | else: 237 | print("Warning: found: %s" % fn) 238 | path = join(root, fn) 239 | # only allow small archives for testing 240 | if (PEDANTIC and fn_lower.endswith(('.bz2', '.gz')) and 241 | getsize(path) > 512): 242 | raise RecipeError("found: %s (too large)" % fn) 243 | 244 | if basename(recipe_dir) == 'icu': 245 | return 246 | 247 | # check total size od recipe directory (recursively) 248 | kb_size = dir_size(recipe_dir) / 1024 249 | kb_limit = 512 250 | if PEDANTIC and kb_size > kb_limit: 251 | raise RecipeError("recipe too large: %d KB (limit %d KB)" % 252 | (kb_size, kb_limit)) 253 | 254 | if PEDANTIC: 255 | try: 256 | with open(join(recipe_dir, 'build.sh'), 'rb') as fi: 257 | data = fi.read() 258 | if data and not data.decode('utf-8').startswith(('#!/bin/bash\n', 259 | '#!/bin/sh\n')): 260 | raise RecipeError("not a bash script: build.sh") 261 | except IOError: 262 | pass 263 | 264 | 265 | def render_jinja2(recipe_dir): 266 | import jinja2 267 | 268 | loaders = [jinja2.FileSystemLoader(recipe_dir)] 269 | env = jinja2.Environment(loader=jinja2.ChoiceLoader(loaders)) 270 | template = env.get_or_select_template('meta.yaml') 271 | return template.render(environment=env) 272 | 273 | 274 | def validate_recipe(recipe_dir, pedantic=True): 275 | global PEDANTIC 276 | PEDANTIC = bool(pedantic) 277 | 278 | meta_path = join(recipe_dir, 'meta.yaml') 279 | with open(meta_path, 'rb') as fi: 280 | data = fi.read() 281 | if PEDANTIC and not all_ascii(data): 282 | raise RecipeError("non-ASCII in: %s" % meta_path) 283 | if b'{{' in data: 284 | data = render_jinja2(recipe_dir) 285 | else: 286 | data = data.decode('utf-8') 287 | 288 | check_dir_content(recipe_dir) 289 | 290 | for cfg in iter_cfgs(): 291 | meta = parse(data, cfg) 292 | validate_meta(meta) 293 | validate_files(recipe_dir, meta) 294 | -------------------------------------------------------------------------------- /anaconda_verify/package.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | 3 | import re 4 | import json 5 | import shlex 6 | import tarfile 7 | from os.path import basename 8 | 9 | from anaconda_verify.const import LICENSE_FAMILIES 10 | from anaconda_verify.utils import get_object_type, all_ascii, get_bad_seq 11 | from anaconda_verify.common import (check_name, check_version, check_specs, 12 | check_build_string, check_build_number, 13 | get_python_version_specs) 14 | 15 | 16 | PEDANTIC = True 17 | 18 | 19 | class PackageError(Exception): 20 | pass 21 | 22 | 23 | def dist_fn(fn): 24 | seq = get_bad_seq(fn) 25 | if seq: 26 | raise PackageError("'%s' not allowed in file name '%s'" % (seq, fn)) 27 | if fn.endswith('.tar.bz2'): 28 | return fn[:-8] 29 | if fn.endswith('.tar'): 30 | return fn[:-4] 31 | raise PackageError("did not expect filename: %s" % fn) 32 | 33 | 34 | class CondaPackageCheck(object): 35 | def __init__(self, path, verbose=False): 36 | self.verbose = verbose 37 | self.t = tarfile.open(path) 38 | self.dist = dist_fn(basename(path)) 39 | self.name, self.version, self.build = self.dist.rsplit('-', 2) 40 | paths = [m.path for m in self.t.getmembers()] 41 | self.paths = set(paths) 42 | if len(paths) != len(self.paths): 43 | raise PackageError("duplicate members") 44 | raw = self.t.extractfile('info/index.json').read() 45 | self.info = json.loads(raw.decode('utf-8')) 46 | self.win_pkg = bool(self.info['platform'] == 'win') 47 | if not all_ascii(raw, self.win_pkg): 48 | raise PackageError("non-ASCII in: info/index.json") 49 | 50 | def check_members(self): 51 | for m in self.t.getmembers(): 52 | path = m.path 53 | if not all_ascii(path.encode('utf-8')): 54 | raise PackageError("non-ASCII path: %r" % path) 55 | 56 | def info_files(self): 57 | raw = self.t.extractfile('info/files').read() 58 | if not all_ascii(raw, self.win_pkg): 59 | raise PackageError("non-ASCII in: info/files") 60 | lista = [p.strip() for p in raw.decode('utf-8').splitlines()] 61 | for p in lista: 62 | if p.startswith('info/'): 63 | raise PackageError("Did not expect '%s' in info/files" % p) 64 | 65 | seta = set(lista) 66 | if len(lista) != len(seta): 67 | raise PackageError('info/files: duplicates') 68 | 69 | listb = [m.path for m in self.t.getmembers() 70 | if not (m.path.startswith('info/') or m.isdir())] 71 | setb = set(listb) 72 | if len(listb) != len(setb): 73 | raise PackageError("info_files: duplicate members") 74 | 75 | if seta == setb: 76 | return 77 | for p in sorted(seta | setb): 78 | if p not in seta: 79 | print('%r not in info/files' % p) 80 | if p not in setb: 81 | print('%r not in tarball' % p) 82 | raise PackageError("info/files") 83 | 84 | 85 | def no_hardlinks(self): 86 | for m in self.t.getmembers(): 87 | if m.islnk(): 88 | raise PackageError('hardlink found: %s' % m.path) 89 | 90 | 91 | def not_allowed_files(self): 92 | not_allowed = {'conda-meta', 'conda-bld', 93 | 'pkgs', 'pkgs32', 'envs'} 94 | not_allowed_dirs = tuple(x + '/' for x in not_allowed) 95 | for p in self.paths: 96 | if (p.startswith(not_allowed_dirs) or 97 | p in not_allowed or 98 | p.endswith('/.DS_Store') or 99 | p.endswith('~')): 100 | raise PackageError("directory or filename not allowed: " 101 | "%s" % p) 102 | if PEDANTIC and p in ('info/package_metadata.json', 103 | 'info/link.json'): 104 | raise PackageError("file not allowed: %s" % p) 105 | 106 | def index_json(self): 107 | for varname in 'name', 'version', 'build': 108 | if self.info[varname] != getattr(self, varname): 109 | raise PackageError("info/index.json for %s: %r != %r" % 110 | (varname, self.info[varname], 111 | getattr(self, varname))) 112 | lst = [ 113 | check_name(self.info['name']), 114 | check_version(self.info['version']), 115 | check_build_number(self.info['build_number']), 116 | ] 117 | if PEDANTIC: 118 | lst.append(check_build_string(self.info['build'])) 119 | for res in lst: 120 | if res: 121 | raise PackageError("info/index.json: %s" % res) 122 | 123 | depends = self.info.get('depends') 124 | if depends is None: 125 | raise PackageError("info/index.json: key 'depends' missing") 126 | res = check_specs(depends) 127 | if res: 128 | raise PackageError("info/index.json: %s" % res) 129 | 130 | if PEDANTIC: 131 | lf = self.info.get('license_family', self.info.get('license')) 132 | if lf not in LICENSE_FAMILIES: 133 | raise PackageError("wrong license family: %s" % lf) 134 | 135 | def no_bat_and_exe(self): 136 | bats = {p[:-4] for p in self.paths if p.endswith('.bat')} 137 | exes = {p[:-4] for p in self.paths if p.endswith('.exe')} 138 | both = bats & exes 139 | if both: 140 | raise PackageError("Both .bat and .exe files: %s" % both) 141 | 142 | 143 | def _check_has_prefix_line(self, line): 144 | line = line.strip() 145 | try: 146 | placeholder, mode, f = [x.strip('"\'') for x in 147 | shlex.split(line, posix=False)] 148 | except ValueError: 149 | placeholder, mode, f = '//', 'text', line 150 | 151 | if f not in self.paths: 152 | raise PackageError("info/has_prefix: target '%s' not in " 153 | "package" % f) 154 | 155 | if mode == 'binary': 156 | if self.name == 'python': 157 | raise PackageError("binary placeholder not allowed in Python") 158 | if self.win_pkg: 159 | raise PackageError("binary placeholder not allowed on Windows") 160 | if PEDANTIC: 161 | print("WARNING: info/has_prefix: binary replace mode: %s" % f) 162 | return 163 | if len(placeholder) != 255: 164 | msg = ("info/has_prefix: binary placeholder not " 165 | "255 bytes, but: %d" % len(placeholder)) 166 | if PEDANTIC: 167 | raise PackageError(msg) 168 | else: 169 | print("Warning: %s" % msg) 170 | elif mode == 'text': 171 | pass 172 | else: 173 | raise PackageError("info/has_prefix: invalid mode") 174 | 175 | 176 | def has_prefix(self): 177 | for m in self.t.getmembers(): 178 | if m.path != 'info/has_prefix': 179 | continue 180 | if self.win_pkg: 181 | print("WARNING: %s" % m.path) 182 | data = self.t.extractfile(m.path).read() 183 | if not all_ascii(data, self.win_pkg): 184 | raise PackageError("non-ASCII in: info/has_prefix") 185 | for line in data.decode('utf-8').splitlines(): 186 | self._check_has_prefix_line(line) 187 | 188 | 189 | def warn_post_link(self): 190 | for p in self.paths: 191 | if p.endswith(( 192 | '-post-link.sh', '-pre-link.sh', '-pre-unlink.sh', 193 | '-post-link.bat', '-pre-link.bat', '-pre-unlink.bat', 194 | )): 195 | print("WARNING: %s" % p) 196 | 197 | def no_setuptools(self): 198 | for p in self.paths: 199 | if p.endswith('easy-install.pth'): 200 | raise PackageError("easy-install.pth file not allowed") 201 | 202 | if self.name in ('setuptools', 'distribute'): 203 | return 204 | for p in self.paths: 205 | if p.endswith(('MyPyPa-0.1.0-py2.5.egg', 206 | 'mytestegg-1.0.0-py3.4.egg')): 207 | continue 208 | if (p.endswith('.egg') or 209 | 'site-packages/pkg_resources' in p or 210 | 'site-packages/__pycache__/pkg_resources' in p or 211 | p.startswith('bin/easy_install') or 212 | p.startswith('Scripts/easy_install')): 213 | raise PackageError("file '%s' not allowed" % p) 214 | 215 | def no_easy_install_script(self): 216 | if not PEDANTIC: 217 | return 218 | for m in self.t.getmembers(): 219 | if not m.name.startswith(('bin/', 'Scripts/')): 220 | continue 221 | if not m.isfile(): 222 | continue 223 | data = self.t.extractfile(m.path).read(1024) 224 | if b'EASY-INSTALL-SCRIPT' in data: 225 | raise PackageError("easy install script found: %s" % m.name) 226 | 227 | def no_pth(self): 228 | for p in self.paths: 229 | if PEDANTIC and p.endswith('-nspkg.pth'): 230 | raise PackageError("found namespace .pth file '%s'" % p) 231 | if p.endswith('.pth'): 232 | print("WARNING: .pth file: %s" % p) 233 | 234 | def warn_pyo(self): 235 | if self.name == 'python': 236 | return 237 | for p in self.paths: 238 | if p.endswith('.pyo'): 239 | print("WARNING: .pyo file: %s" % p) 240 | 241 | def no_py_next_so(self): 242 | for p in self.paths: 243 | if p.endswith('.so'): 244 | root = p[:-3] 245 | elif p.endswith('.pyd'): 246 | root = p[:-4] 247 | else: 248 | continue 249 | for ext in '.py', '.pyc': 250 | if root + ext in self.paths: 251 | print("WARNING: %s next to: %s" % (ext, p)) 252 | 253 | def no_pyc_in_stdlib(self): 254 | if self.name in {'python', 'scons', 'conda-build', 'dbus'}: 255 | return 256 | for p in self.paths: 257 | if p.endswith('.pyc') and not 'site-packages' in p: 258 | raise PackageError(".pyc found in stdlib: %s" % p) 259 | 260 | def no_2to3_pickle(self): 261 | if self.name == 'python': 262 | return 263 | for p in self.paths: 264 | if ('lib2to3' in p and p.endswith('.pickle')): 265 | raise PackageError("found lib2to3 .pickle: %s" % p) 266 | 267 | def pyc_files(self): 268 | if 'py3' in self.build: 269 | return 270 | for p in self.paths: 271 | if ('/site-packages/' not in p) or ('/port_v3/' in p): 272 | continue 273 | if p.endswith('.py') and (p + 'c') not in self.paths: 274 | print("WARNING: pyc missing for:", p) 275 | if not self.verbose: 276 | return 277 | 278 | def menu_names(self): 279 | if not PEDANTIC: 280 | return 281 | menu_json_files = [] 282 | for p in self.paths: 283 | if p.startswith('Menu/') and p.endswith('.json'): 284 | menu_json_files.append(p) 285 | if len(menu_json_files) == 0: 286 | pass 287 | elif len(menu_json_files) == 1: 288 | fn = menu_json_files[0][5:] 289 | if fn != '%s.json' % self.name: 290 | raise PackageError("wrong Menu json file name: %s" % fn) 291 | else: 292 | raise PackageError("too many Menu json files: %r" % 293 | menu_json_files) 294 | 295 | def check_windows_arch(self): 296 | if self.name in ('python', 'conda-build', 'pip', 'xlwings', 297 | 'phantomjs', 'qt', 'graphviz', 'nsis', 'swig'): 298 | return 299 | if not self.win_pkg: 300 | return 301 | arch = self.info['arch'] 302 | if arch not in ('x86', 'x86_64'): 303 | raise PackageError("Unrecognized Windows architecture: %s" % 304 | arch) 305 | for m in self.t.getmembers(): 306 | if not m.name.lower().endswith(('.exe', '.dll')): 307 | continue 308 | data = self.t.extractfile(m.path).read(4096) 309 | tp = get_object_type(data) 310 | if ((arch == 'x86' and tp != 'DLL I386') or 311 | (arch == 'x86_64' and tp != 'DLL AMD64')): 312 | raise PackageError("File %s has object type %s, but info/" 313 | "index.json arch is %s" % 314 | (m.name, tp, arch)) 315 | 316 | def get_sp_location(self): 317 | py_ver = get_python_version_specs(self.info['depends']) 318 | if py_ver is None: 319 | return '' 320 | 321 | if self.win_pkg: 322 | return 'Lib/site-packages' 323 | else: 324 | return 'lib/python%s/site-packages' % py_ver 325 | 326 | def list_packages(self): 327 | sp_location = self.get_sp_location() 328 | pat = re.compile(r'site-packages/([^/]+)') 329 | res = set() 330 | for p in self.paths: 331 | m = pat.search(p) 332 | if m is None: 333 | continue 334 | if not p.startswith(sp_location): 335 | print("WARNING: found %s" % p) 336 | fn = m.group(1) 337 | if '-' in fn or fn.endswith('.pyc'): 338 | continue 339 | res.add(fn) 340 | if self.verbose: 341 | for x in res: 342 | print(' %s' % x) 343 | for pkg_name in 'numpy', 'scipy': 344 | if self.name != pkg_name and pkg_name in res: 345 | raise PackageError("found %s" % pkg_name) 346 | if self.name not in ('setuptools', 'distribute', 'python'): 347 | for x in ('pkg_resources.py', 'setuptools.pth', 'easy_install.py', 348 | 'setuptools'): 349 | if x in res: 350 | raise PackageError("found %s" % x) 351 | 352 | 353 | def validate_package(path, pedantic=True, verbose=True): 354 | global PEDANTIC 355 | PEDANTIC = bool(pedantic) 356 | 357 | x = CondaPackageCheck(path, verbose) 358 | x.check_members() 359 | x.info_files() 360 | x.no_hardlinks() 361 | x.not_allowed_files() 362 | x.index_json() 363 | x.no_bat_and_exe() 364 | x.has_prefix() 365 | x.warn_post_link() 366 | x.no_setuptools() 367 | x.no_easy_install_script() 368 | x.no_pth() 369 | x.warn_pyo() 370 | x.no_py_next_so() 371 | x.no_pyc_in_stdlib() 372 | x.no_2to3_pickle() 373 | x.pyc_files() 374 | x.menu_names() 375 | x.check_windows_arch() 376 | x.list_packages() 377 | x.t.close() 378 | --------------------------------------------------------------------------------