├── MANIFEST.in ├── .gitignore ├── .travis.yml ├── tox.ini ├── setup.py ├── README.md ├── redisbayes.py └── ez_setup.py /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | /build 3 | /dist 4 | *.egg-info 5 | .tox 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.6" 4 | - "2.7" 5 | services: 6 | - redis-server 7 | install: 8 | - pip install . 9 | script: 10 | - python redisbayes.py 11 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # Tox (http://tox.testrun.org/) is a tool for running tests 2 | # in multiple virtualenvs. This configuration file will run the 3 | # test suite on all supported python versions. To use it, "pip install tox" 4 | # and then run "tox" from this directory. 5 | 6 | [tox] 7 | envlist = py26, py27 8 | 9 | [testenv] 10 | commands = python redisbayes.py -v 11 | deps = redis 12 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # this is not a love song 3 | # http://packages.python.org/distribute/setuptools.html 4 | # http://diveintopython3.org/packaging.html 5 | # http://wiki.python.org/moin/CheeseShopTutorial 6 | # http://pypi.python.org/pypi?:action=list_classifiers 7 | 8 | from ez_setup import use_setuptools 9 | use_setuptools(version="0.6c11") 10 | 11 | import os 12 | from setuptools import setup 13 | 14 | def read(fname): 15 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 16 | 17 | setup( 18 | name = "redisbayes", 19 | version = __import__("redisbayes").__version__, 20 | description = u"Naïve Bayesian Text Classifier on Redis", 21 | long_description = read("README.md"), 22 | author = "Justine Tunney", 23 | author_email = "jtunney@gmail.com", 24 | url = "https://github.com/jart/redisbayes", 25 | license = "MIT", 26 | install_requires = ["redis"], 27 | py_modules = ["redisbayes", "ez_setup"], 28 | classifiers = [ 29 | "Development Status :: 5 - Production/Stable", 30 | "License :: OSI Approved :: MIT License", 31 | "Intended Audience :: Developers", 32 | "Programming Language :: Python", 33 | "Topic :: Database", 34 | "Topic :: Communications :: Email", 35 | ], 36 | ) 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # redisbayes [![Build Status](https://travis-ci.org/jart/redisbayes.svg?branch=master)](https://travis-ci.org/jart/redisbayes) 2 | 3 | 4 | ## What Is This? 5 | 6 | It's a Naïve Bayesian Text Classifier on Redis (aka spam filter.) I wrote this to filter spammy comments from a high traffic forum website and it worked pretty well. It can work for you too :) It's not tied to any particular format like email, it just deals with the raw text. 7 | 8 | This is probably the only spam filtering library you'll find for Python that's simple (170 lines of code), works (30 lines of test code), and doesn't suck. 9 | 10 | 11 | ## Installation 12 | 13 | From folder: 14 | 15 | sudo python setup.py install 16 | 17 | From cheeseshop: 18 | 19 | sudo pip install redisbayes 20 | 21 | From git: 22 | 23 | sudo pip install git+git://github.com/jart/redisbayes.git 24 | 25 | 26 | ## Basic Usage 27 | 28 | import redis, redisbayes 29 | rb = redisbayes.RedisBayes(redis=redis.Redis()) 30 | 31 | rb.train('good', 'sunshine God love sex lobster sloth') 32 | rb.train('bad', 'fear death horror government zombie') 33 | 34 | assert rb.classify('sloths are so cute i love them') == 'good' 35 | assert rb.classify('i am a zombie and love the government') == 'bad' 36 | 37 | print rb.score('i fear God and hate the government') 38 | 39 | rb.untrain('good', 'sunshine God love sex lobster sloth') 40 | rb.untrain('bad', 'fear death horror government zombie') 41 | -------------------------------------------------------------------------------- /redisbayes.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # redisbayes - Naïve Bayesian Text Classifier on Redis 4 | # Copyright (c) 2012 Justine Alexandra Roberts Tunney 5 | # 6 | # Permission is hereby granted, free of charge, to any person 7 | # obtaining a copy of this software and associated documentation 8 | # files (the "Software"), to deal in the Software without 9 | # restriction, including without limitation the rights to use, copy, 10 | # modify, merge, publish, distribute, sublicense, and/or sell copies 11 | # of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be 15 | # included in all copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | # 26 | ur""" 27 | 28 | redisbayes 29 | ~~~~~~~~~~ 30 | 31 | Naïve Bayesian Text Classifier on Redis. 32 | 33 | I wrote this to filter spammy comments from a high traffic forum website 34 | and it worked pretty well. It can work for you too :) 35 | 36 | For example:: 37 | 38 | >>> import redis 39 | >>> import redisbayes 40 | >>> rb = redisbayes.RedisBayes(redis.Redis(), prefix='bayes:test:') 41 | >>> rb.flush() 42 | >>> rb.classify('nothing trained yet') is None 43 | True 44 | >>> rb.train('good', 'sunshine God love sex lobster sloth') 45 | >>> rb.train('bad', 'fear death horror government zombie') 46 | >>> rb.classify('sloths are so cute i love them') 47 | 'good' 48 | >>> rb.classify('i am a zombie and love the government') 49 | 'bad' 50 | >>> int(rb.score('i am a zombie and love the government')['bad']) 51 | -7 52 | >>> int(rb.score('i am a zombie and love the government')['good']) 53 | -9 54 | >>> rb.untrain('good', 'sunshine God love sex lobster sloth') 55 | >>> rb.untrain('bad', 'fear death horror government zombie') 56 | >>> rb.score('lolcat') 57 | {} 58 | 59 | Words are lowercased and unicode is supported:: 60 | 61 | >>> print english_tokenizer("Æther")[0] 62 | æther 63 | 64 | Common english words and 1-2 character words are ignored:: 65 | 66 | >>> english_tokenizer("greetings mary a b aa bb") 67 | [u'mary'] 68 | 69 | Some characters are removed:: 70 | 71 | >>> print english_tokenizer("contraction's")[0] 72 | contraction's 73 | >>> print english_tokenizer("what|is|goth")[0] 74 | goth 75 | 76 | """ 77 | 78 | import re 79 | import math 80 | 81 | 82 | __version__ = "0.1.3" 83 | 84 | english_ignore = set(""" 85 | a able about above abroad according accordingly across actually adj after 86 | afterwards again against ago ahead ain't all allow allows almost alone along 87 | alongside already also although always am amid amidst among amongst an and 88 | another any anybody anyhow anyone anything anyway anyways anywhere apart 89 | appear appreciate appropriate are aren't around as a's aside ask asking 90 | associated at available away awfully b back backward backwards be became 91 | because become becomes becoming been before beforehand begin behind being 92 | believe below beside besides best better between beyond both brief but by c 93 | came can cannot cant can't caption cause causes certain certainly changes 94 | clearly c'mon co co. com come comes concerning consequently consider 95 | considering contain containing contains corresponding could couldn't course 96 | c's currently d dare daren't definitely described despite did didn't different 97 | directly do does doesn't doing done don't down downwards during e each edu eg 98 | eight eighty either else elsewhere end ending enough entirely especially et 99 | etc even ever evermore every everybody everyone everything everywhere ex 100 | exactly example except f fairly far farther few fewer fifth first five 101 | followed following follows for forever former formerly forth forward found 102 | four from further furthermore g get gets getting given gives go goes going 103 | gone got gotten greetings h had hadn't half happens hardly has hasn't have 104 | haven't having he he'd he'll hello help hence her here hereafter hereby herein 105 | here's hereupon hers herself he's hi him himself his hither hopefully how 106 | howbeit however hundred i i'd ie if ignored i'll i'm immediate in inasmuch inc 107 | inc. indeed indicate indicated indicates inner inside insofar instead into 108 | inward is isn't it it'd it'll its it's itself i've j just k keep keeps kept 109 | know known knows l last lately later latter latterly least less lest let let's 110 | like liked likely likewise little look looking looks low lower ltd m made 111 | mainly make makes many may maybe mayn't me mean meantime meanwhile merely 112 | might mightn't mine minus miss more moreover most mostly mr mrs much must 113 | mustn't my myself n name namely nd near nearly necessary need needn't needs 114 | neither never neverf neverless nevertheless new next nine ninety no nobody non 115 | none nonetheless noone no-one nor normally not nothing notwithstanding novel 116 | now nowhere o obviously of off often oh ok okay old on once one ones one's 117 | only onto opposite or other others otherwise ought oughtn't our ours ourselves 118 | out outside over overall own p particular particularly past per perhaps placed 119 | please plus possible presumably probably provided provides q que quite qv r 120 | rather rd re really reasonably recent recently regarding regardless regards 121 | relatively respectively right round s said same saw say saying says second 122 | secondly see seeing seem seemed seeming seems seen self selves sensible sent 123 | serious seriously seven several shall shan't she she'd she'll she's should 124 | shouldn't since six so some somebody someday somehow someone something 125 | sometime sometimes somewhat somewhere soon sorry specified specify specifying 126 | still sub such sup sure t take taken taking tell tends th than thank thanks 127 | thanx that that'll thats that's that've the their theirs them themselves then 128 | thence there thereafter thereby there'd therefore therein there'll there're 129 | theres there's thereupon there've these they they'd they'll they're they've 130 | thing things think third thirty this thorough thoroughly those though three 131 | through throughout thru thus till to together too took toward towards tried 132 | tries truly try trying t's twice two u un under underneath undoing 133 | unfortunately unless unlike unlikely until unto up upon upwards us use used 134 | useful uses using usually v value various versus very via viz vs w want wants 135 | was wasn't way we we'd welcome well we'll went were we're weren't we've what 136 | whatever what'll what's what've when whence whenever where whereafter whereas 137 | whereby wherein where's whereupon wherever whether which whichever while 138 | whilst whither who who'd whoever whole who'll whom whomever who's whose why 139 | will willing wish with within without wonder won't would wouldn't x y yes yet 140 | you you'd you'll your you're yours yourself yourselves you've z zero 141 | successful greatest began including being all for close but 142 | """.split()) 143 | 144 | 145 | def tidy(text): 146 | if not isinstance(text, basestring): 147 | text = str(text) 148 | if not isinstance(text, unicode): 149 | text = text.decode('utf8') 150 | text = text.lower() 151 | return re.sub(r'[\_.,<>:;~+|\[\]?`"!@#$%^&*()\s]', ' ', text, re.UNICODE) 152 | 153 | 154 | def english_tokenizer(text): 155 | words = tidy(text).split() 156 | return [w for w in words if len(w) > 2 and w not in english_ignore] 157 | 158 | 159 | def occurances(words): 160 | counts = {} 161 | for word in words: 162 | if word in counts: 163 | counts[word] += 1 164 | else: 165 | counts[word] = 1 166 | return counts 167 | 168 | 169 | class RedisBayes(object): 170 | def __init__(self, redis=None, prefix='bayes:', correction=0.1, 171 | tokenizer=None): 172 | self.redis = redis 173 | self.prefix = prefix 174 | self.correction = correction 175 | self.tokenizer = tokenizer or english_tokenizer 176 | if not self.redis: 177 | from redis import Redis 178 | self.redis = Redis() 179 | 180 | def flush(self): 181 | for cat in self.redis.smembers(self.prefix + 'categories'): 182 | self.redis.delete(self.prefix + cat) 183 | self.redis.delete(self.prefix + 'categories') 184 | 185 | def train(self, category, text): 186 | self.redis.sadd(self.prefix + 'categories', category) 187 | for word, count in occurances(self.tokenizer(text)).iteritems(): 188 | self.redis.hincrby(self.prefix + category, word, count) 189 | 190 | def untrain(self, category, text): 191 | for word, count in occurances(self.tokenizer(text)).iteritems(): 192 | cur = self.redis.hget(self.prefix + category, word) 193 | if cur: 194 | new = int(cur) - count 195 | if new > 0: 196 | self.redis.hset(self.prefix + category, word, new) 197 | else: 198 | self.redis.hdel(self.prefix + category, word) 199 | if self.tally(category) == 0: 200 | self.redis.delete(self.prefix + category) 201 | self.redis.srem(self.prefix + 'categories', category) 202 | 203 | def classify(self, text): 204 | score = self.score(text) 205 | if not score: 206 | return None 207 | return sorted(score.iteritems(), key=lambda v: v[1])[-1][0] 208 | 209 | def score(self, text): 210 | occurs = occurances(self.tokenizer(text)) 211 | scores = {} 212 | for category in self.redis.smembers(self.prefix + 'categories'): 213 | tally = self.tally(category) 214 | if tally == 0: 215 | continue 216 | scores[category] = 0.0 217 | for word, count in occurs.iteritems(): 218 | score = self.redis.hget(self.prefix + category, word) 219 | assert not score or score > 0, "corrupt bayesian database" 220 | score = score or self.correction 221 | scores[category] += math.log(float(score) / tally) 222 | return scores 223 | 224 | def tally(self, category): 225 | tally = sum(int(x) for x in self.redis.hvals(self.prefix + category)) 226 | assert tally >= 0, "corrupt bayesian database" 227 | return tally 228 | 229 | 230 | if __name__ == '__main__': 231 | import doctest, sys 232 | sys.exit(doctest.testmod()[0]) 233 | -------------------------------------------------------------------------------- /ez_setup.py: -------------------------------------------------------------------------------- 1 | #!python 2 | """Bootstrap setuptools installation 3 | 4 | If you want to use setuptools in your package's setup.py, just include this 5 | file in the same directory with it, and add this to the top of your setup.py:: 6 | 7 | from ez_setup import use_setuptools 8 | use_setuptools() 9 | 10 | If you want to require a specific version of setuptools, set a download 11 | mirror, or use an alternate download directory, you can do so by supplying 12 | the appropriate options to ``use_setuptools()``. 13 | 14 | This file can also be run as a script to install or upgrade setuptools. 15 | """ 16 | import sys 17 | DEFAULT_VERSION = "0.6c11" 18 | DEFAULT_URL = "http://pypi.python.org/packages/%s/s/setuptools/" % sys.version[:3] 19 | 20 | md5_data = { 21 | 'setuptools-0.6b1-py2.3.egg': '8822caf901250d848b996b7f25c6e6ca', 22 | 'setuptools-0.6b1-py2.4.egg': 'b79a8a403e4502fbb85ee3f1941735cb', 23 | 'setuptools-0.6b2-py2.3.egg': '5657759d8a6d8fc44070a9d07272d99b', 24 | 'setuptools-0.6b2-py2.4.egg': '4996a8d169d2be661fa32a6e52e4f82a', 25 | 'setuptools-0.6b3-py2.3.egg': 'bb31c0fc7399a63579975cad9f5a0618', 26 | 'setuptools-0.6b3-py2.4.egg': '38a8c6b3d6ecd22247f179f7da669fac', 27 | 'setuptools-0.6b4-py2.3.egg': '62045a24ed4e1ebc77fe039aa4e6f7e5', 28 | 'setuptools-0.6b4-py2.4.egg': '4cb2a185d228dacffb2d17f103b3b1c4', 29 | 'setuptools-0.6c1-py2.3.egg': 'b3f2b5539d65cb7f74ad79127f1a908c', 30 | 'setuptools-0.6c1-py2.4.egg': 'b45adeda0667d2d2ffe14009364f2a4b', 31 | 'setuptools-0.6c10-py2.3.egg': 'ce1e2ab5d3a0256456d9fc13800a7090', 32 | 'setuptools-0.6c10-py2.4.egg': '57d6d9d6e9b80772c59a53a8433a5dd4', 33 | 'setuptools-0.6c10-py2.5.egg': 'de46ac8b1c97c895572e5e8596aeb8c7', 34 | 'setuptools-0.6c10-py2.6.egg': '58ea40aef06da02ce641495523a0b7f5', 35 | 'setuptools-0.6c11-py2.3.egg': '2baeac6e13d414a9d28e7ba5b5a596de', 36 | 'setuptools-0.6c11-py2.4.egg': 'bd639f9b0eac4c42497034dec2ec0c2b', 37 | 'setuptools-0.6c11-py2.5.egg': '64c94f3bf7a72a13ec83e0b24f2749b2', 38 | 'setuptools-0.6c11-py2.6.egg': 'bfa92100bd772d5a213eedd356d64086', 39 | 'setuptools-0.6c2-py2.3.egg': 'f0064bf6aa2b7d0f3ba0b43f20817c27', 40 | 'setuptools-0.6c2-py2.4.egg': '616192eec35f47e8ea16cd6a122b7277', 41 | 'setuptools-0.6c3-py2.3.egg': 'f181fa125dfe85a259c9cd6f1d7b78fa', 42 | 'setuptools-0.6c3-py2.4.egg': 'e0ed74682c998bfb73bf803a50e7b71e', 43 | 'setuptools-0.6c3-py2.5.egg': 'abef16fdd61955514841c7c6bd98965e', 44 | 'setuptools-0.6c4-py2.3.egg': 'b0b9131acab32022bfac7f44c5d7971f', 45 | 'setuptools-0.6c4-py2.4.egg': '2a1f9656d4fbf3c97bf946c0a124e6e2', 46 | 'setuptools-0.6c4-py2.5.egg': '8f5a052e32cdb9c72bcf4b5526f28afc', 47 | 'setuptools-0.6c5-py2.3.egg': 'ee9fd80965da04f2f3e6b3576e9d8167', 48 | 'setuptools-0.6c5-py2.4.egg': 'afe2adf1c01701ee841761f5bcd8aa64', 49 | 'setuptools-0.6c5-py2.5.egg': 'a8d3f61494ccaa8714dfed37bccd3d5d', 50 | 'setuptools-0.6c6-py2.3.egg': '35686b78116a668847237b69d549ec20', 51 | 'setuptools-0.6c6-py2.4.egg': '3c56af57be3225019260a644430065ab', 52 | 'setuptools-0.6c6-py2.5.egg': 'b2f8a7520709a5b34f80946de5f02f53', 53 | 'setuptools-0.6c7-py2.3.egg': '209fdf9adc3a615e5115b725658e13e2', 54 | 'setuptools-0.6c7-py2.4.egg': '5a8f954807d46a0fb67cf1f26c55a82e', 55 | 'setuptools-0.6c7-py2.5.egg': '45d2ad28f9750e7434111fde831e8372', 56 | 'setuptools-0.6c8-py2.3.egg': '50759d29b349db8cfd807ba8303f1902', 57 | 'setuptools-0.6c8-py2.4.egg': 'cba38d74f7d483c06e9daa6070cce6de', 58 | 'setuptools-0.6c8-py2.5.egg': '1721747ee329dc150590a58b3e1ac95b', 59 | 'setuptools-0.6c9-py2.3.egg': 'a83c4020414807b496e4cfbe08507c03', 60 | 'setuptools-0.6c9-py2.4.egg': '260a2be2e5388d66bdaee06abec6342a', 61 | 'setuptools-0.6c9-py2.5.egg': 'fe67c3e5a17b12c0e7c541b7ea43a8e6', 62 | 'setuptools-0.6c9-py2.6.egg': 'ca37b1ff16fa2ede6e19383e7b59245a', 63 | } 64 | 65 | import sys, os 66 | try: from hashlib import md5 67 | except ImportError: from md5 import md5 68 | 69 | def _validate_md5(egg_name, data): 70 | if egg_name in md5_data: 71 | digest = md5(data).hexdigest() 72 | if digest != md5_data[egg_name]: 73 | print >>sys.stderr, ( 74 | "md5 validation of %s failed! (Possible download problem?)" 75 | % egg_name 76 | ) 77 | sys.exit(2) 78 | return data 79 | 80 | def use_setuptools( 81 | version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir, 82 | download_delay=15 83 | ): 84 | """Automatically find/download setuptools and make it available on sys.path 85 | 86 | `version` should be a valid setuptools version number that is available 87 | as an egg for download under the `download_base` URL (which should end with 88 | a '/'). `to_dir` is the directory where setuptools will be downloaded, if 89 | it is not already available. If `download_delay` is specified, it should 90 | be the number of seconds that will be paused before initiating a download, 91 | should one be required. If an older version of setuptools is installed, 92 | this routine will print a message to ``sys.stderr`` and raise SystemExit in 93 | an attempt to abort the calling script. 94 | """ 95 | was_imported = 'pkg_resources' in sys.modules or 'setuptools' in sys.modules 96 | def do_download(): 97 | egg = download_setuptools(version, download_base, to_dir, download_delay) 98 | sys.path.insert(0, egg) 99 | import setuptools; setuptools.bootstrap_install_from = egg 100 | try: 101 | import pkg_resources 102 | except ImportError: 103 | return do_download() 104 | try: 105 | pkg_resources.require("setuptools>="+version); return 106 | except pkg_resources.VersionConflict, e: 107 | if was_imported: 108 | print >>sys.stderr, ( 109 | "The required version of setuptools (>=%s) is not available, and\n" 110 | "can't be installed while this script is running. Please install\n" 111 | " a more recent version first, using 'easy_install -U setuptools'." 112 | "\n\n(Currently using %r)" 113 | ) % (version, e.args[0]) 114 | sys.exit(2) 115 | except pkg_resources.DistributionNotFound: 116 | pass 117 | 118 | del pkg_resources, sys.modules['pkg_resources'] # reload ok 119 | return do_download() 120 | 121 | def download_setuptools( 122 | version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir, 123 | delay = 15 124 | ): 125 | """Download setuptools from a specified location and return its filename 126 | 127 | `version` should be a valid setuptools version number that is available 128 | as an egg for download under the `download_base` URL (which should end 129 | with a '/'). `to_dir` is the directory where the egg will be downloaded. 130 | `delay` is the number of seconds to pause before an actual download attempt. 131 | """ 132 | import urllib2, shutil 133 | egg_name = "setuptools-%s-py%s.egg" % (version,sys.version[:3]) 134 | url = download_base + egg_name 135 | saveto = os.path.join(to_dir, egg_name) 136 | src = dst = None 137 | if not os.path.exists(saveto): # Avoid repeated downloads 138 | try: 139 | from distutils import log 140 | if delay: 141 | log.warn(""" 142 | --------------------------------------------------------------------------- 143 | This script requires setuptools version %s to run (even to display 144 | help). I will attempt to download it for you (from 145 | %s), but 146 | you may need to enable firewall access for this script first. 147 | I will start the download in %d seconds. 148 | 149 | (Note: if this machine does not have network access, please obtain the file 150 | 151 | %s 152 | 153 | and place it in this directory before rerunning this script.) 154 | ---------------------------------------------------------------------------""", 155 | version, download_base, delay, url 156 | ); from time import sleep; sleep(delay) 157 | log.warn("Downloading %s", url) 158 | src = urllib2.urlopen(url) 159 | # Read/write all in one block, so we don't create a corrupt file 160 | # if the download is interrupted. 161 | data = _validate_md5(egg_name, src.read()) 162 | dst = open(saveto,"wb"); dst.write(data) 163 | finally: 164 | if src: src.close() 165 | if dst: dst.close() 166 | return os.path.realpath(saveto) 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | def main(argv, version=DEFAULT_VERSION): 204 | """Install or upgrade setuptools and EasyInstall""" 205 | try: 206 | import setuptools 207 | except ImportError: 208 | egg = None 209 | try: 210 | egg = download_setuptools(version, delay=0) 211 | sys.path.insert(0,egg) 212 | from setuptools.command.easy_install import main 213 | return main(list(argv)+[egg]) # we're done here 214 | finally: 215 | if egg and os.path.exists(egg): 216 | os.unlink(egg) 217 | else: 218 | if setuptools.__version__ == '0.0.1': 219 | print >>sys.stderr, ( 220 | "You have an obsolete version of setuptools installed. Please\n" 221 | "remove it from your system entirely before rerunning this script." 222 | ) 223 | sys.exit(2) 224 | 225 | req = "setuptools>="+version 226 | import pkg_resources 227 | try: 228 | pkg_resources.require(req) 229 | except pkg_resources.VersionConflict: 230 | try: 231 | from setuptools.command.easy_install import main 232 | except ImportError: 233 | from easy_install import main 234 | main(list(argv)+[download_setuptools(delay=0)]) 235 | sys.exit(0) # try to force an exit 236 | else: 237 | if argv: 238 | from setuptools.command.easy_install import main 239 | main(argv) 240 | else: 241 | print "Setuptools version",version,"or greater has been installed." 242 | print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)' 243 | 244 | def update_md5(filenames): 245 | """Update our built-in md5 registry""" 246 | 247 | import re 248 | 249 | for name in filenames: 250 | base = os.path.basename(name) 251 | f = open(name,'rb') 252 | md5_data[base] = md5(f.read()).hexdigest() 253 | f.close() 254 | 255 | data = [" %r: %r,\n" % it for it in md5_data.items()] 256 | data.sort() 257 | repl = "".join(data) 258 | 259 | import inspect 260 | srcfile = inspect.getsourcefile(sys.modules[__name__]) 261 | f = open(srcfile, 'rb'); src = f.read(); f.close() 262 | 263 | match = re.search("\nmd5_data = {\n([^}]+)}", src) 264 | if not match: 265 | print >>sys.stderr, "Internal error!" 266 | sys.exit(2) 267 | 268 | src = src[:match.start(1)] + repl + src[match.end(1):] 269 | f = open(srcfile,'w') 270 | f.write(src) 271 | f.close() 272 | 273 | 274 | if __name__=='__main__': 275 | if len(sys.argv)>2 and sys.argv[1]=='--md5update': 276 | update_md5(sys.argv[2:]) 277 | else: 278 | main(sys.argv[1:]) 279 | 280 | 281 | 282 | 283 | 284 | 285 | --------------------------------------------------------------------------------