├── .gitignore ├── README.md ├── pysnowflake.py └── idhandler.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .DS_Store 3 | ._* 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pysnowflake 2 | =========== 3 | 4 | pysnowflake is a Python implementation of Twitter's snowflake service - https://github.com/twitter/snowflake 5 | 6 | This is based on the pysnowflak python trift service that Erans did - https://github.com/erans/pysnowflake 7 | 8 | Our use case wasn't thrift oriented but HTTP based, so this is a service that runs under the 9 | Tornado web framework. Which makes it possible to only have one depenancy (tornado). 10 | 11 | Due to various reasons this implementation does not reach the performance indicated in the original 12 | Snowflake implementation, however it is good enough in most cases and can be combined with the 13 | help of a software load balancer such as HAProxy to run multiple processes to get higher performance. 14 | 15 | Installation 16 | ------------ 17 | 18 | * Install Tornado 19 | * Run the service 20 | 21 | Usage 22 | ----- 23 | usage: pysnowflake.py [--debug] [--port=9000] [--datacenter=DC_ID] [--worker=WORKER_ID] 24 | 25 | Python based Snowflake server over HTTP 26 | 27 | 28 | See the original snowflake server docs for a detailed description, but the bottom line is 29 | unique numbers will insure unique identifers generated. 30 | WORKER_ID is the identifier for this process 31 | DATACENTER_ID is the identifier for this datacenter 32 | 33 | API (aka URLs): 34 | ---- 35 | 36 | /id/ -- get a unique ID, is provided for metrics purposes 37 | /timestamp/ -- get the current timestamp for this host 38 | /datacenter/ -- get the data center identifier for this process 39 | /worker/ -- get the data center identifier for this process 40 | 41 | 42 | Issues 43 | ------ 44 | 45 | Please report any issues via [github issues](https://github.com/koblas/pysnowflake/issues) 46 | -------------------------------------------------------------------------------- /pysnowflake.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # tornado 4 | import tornado.httpserver 5 | import tornado.ioloop 6 | import tornado.web 7 | import idhandler 8 | from tornado.options import define, options, parse_command_line 9 | 10 | ################################################################################ 11 | 12 | define("debug", default=False, help="run in debug mode", type=bool) 13 | define("port", default=9000, help="run on the given port", type=int) 14 | define("prefork", default=False, help="pre-fork across all CPUs", type=bool) 15 | define("datacenter", default=0, help="Datacenter Identifier", type=int) 16 | define("worker", default=0, help="Worker Identifier", type=int) 17 | 18 | class Application(tornado.web.Application): 19 | def __init__(self, xsrf_cookies=True): 20 | handlers = [ 21 | (r'/id/(.*)', idhandler.IdHandler), 22 | (r'/timestamp/', idhandler.TimestampHandler), 23 | (r'/worker/', idhandler.WorkerHandler), 24 | (r'/datacenter/', idhandler.DatacenterHandler), 25 | ] 26 | 27 | app_settings = { 28 | 'debug': options.debug, 29 | } 30 | 31 | self.idworker = idhandler.IdWorker(data_center_id=options.datacenter, worker_id=options.worker) 32 | 33 | super(Application, self).__init__(handlers, **app_settings) 34 | 35 | def main(): # pragma: no cover 36 | parse_command_line() 37 | http_server = tornado.httpserver.HTTPServer(Application()) 38 | 39 | print "Starting tornado on port", options.port 40 | if options.prefork: 41 | print "\tpre-forking" 42 | http_server.bind(options.port) 43 | http_server.start() 44 | else: 45 | http_server.listen(options.port) 46 | 47 | try: 48 | tornado.ioloop.IOLoop.instance().start() 49 | except KeyboardInterrupt: 50 | pass 51 | 52 | if __name__ == "__main__": 53 | main() 54 | -------------------------------------------------------------------------------- /idhandler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # Copyright (c) 2011 Eran Sandler (eran@sandler.co.il), http://eran.sandler.co.il, http://forecastcloudy.net 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining 6 | # a copy of this software and associated documentation files (the 7 | # "Software"), to deal in the Software without restriction, including 8 | # without limitation the rights to use, copy, modify, merge, publish, 9 | # distribute, sublicense, and/or sell copies of the Software, and to 10 | # permit persons to whom the Software is furnished to do so, subject to 11 | # the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be 14 | # included in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | # 24 | 25 | import time 26 | import logging 27 | import re 28 | import tornado.web 29 | 30 | class InputError(Exception): 31 | pass 32 | class InvalidSystemClock(Exception): 33 | pass 34 | class InvalidUserAgentError(Exception): 35 | pass 36 | 37 | class IdWorker(object): 38 | def __init__(self, worker_id=0, data_center_id=0): 39 | self.worker_id = worker_id 40 | self.data_center_id = data_center_id 41 | 42 | self.user_agent_parser = re.compile("^[a-zA-Z][a-zA-Z\-0-9]*$") 43 | self.logger = logging.getLogger("idworker") 44 | 45 | # stats 46 | self.ids_generated = 0 47 | 48 | # Tue, 21 Mar 2006 20:50:14.000 GMT 49 | self.twepoch = 1142974214000L 50 | 51 | self.sequence = 0L 52 | self.worker_id_bits = 5L 53 | self.data_center_id_bits = 5L 54 | self.max_worker_id = -1L ^ (-1L << self.worker_id_bits) 55 | self.max_data_center_id = -1L ^ (-1L << self.data_center_id_bits) 56 | self.sequence_bits = 12L 57 | 58 | self.worker_id_shift = self.sequence_bits 59 | self.data_center_id_shift = self.sequence_bits + self.worker_id_bits 60 | self.timestamp_left_shift = self.sequence_bits + self.worker_id_bits + self.data_center_id_bits 61 | self.sequence_mask = -1L ^ (-1L << self.sequence_bits) 62 | 63 | self.last_timestamp = -1L 64 | 65 | # Sanity check for worker_id 66 | if self.worker_id > self.max_worker_id or self.worker_id < 0: 67 | raise InputError("worker_id", "worker id can't be greater than %i or less than 0" % self.max_worker_id) 68 | 69 | if self.data_center_id > self.max_data_center_id or self.data_center_id < 0: 70 | raise InputError("data_center_id", "data center id can't be greater than %i or less than 0" % self.max_data_center_id) 71 | 72 | self.logger.info("worker starting. timestamp left shift %d, data center id bits %d, worker id bits %d, sequence bits %d, worker id %d" % (self.timestamp_left_shift, self.data_center_id_bits, self.worker_id_bits, self.sequence_bits, self.worker_id)) 73 | 74 | def _time_gen(self): 75 | return long(int(time.time() * 1000)) 76 | 77 | def _till_next_millis(self, last_timestamp): 78 | timestamp = self._time_gen() 79 | while last_timestamp <= timestamp: 80 | timestamp = self._time_gen() 81 | 82 | return timestamp 83 | 84 | def _next_id(self): 85 | timestamp = self._time_gen() 86 | 87 | if self.last_timestamp > timestamp: 88 | self.logger.warning("clock is moving backwards. Rejecting request until %i" % self.last_timestamp) 89 | raise InvalidSystemClock("Clock moved backwards. Refusing to generate id for %i milliseocnds" % self.last_timestamp) 90 | 91 | if self.last_timestamp == timestamp: 92 | self.sequence = (self.sequence + 1) & self.sequence_mask 93 | if self.sequence == 0: 94 | timestamp = self._till_next_millis(self.last_timestamp) 95 | else: 96 | self.sequence = 0 97 | 98 | self.last_timestamp = timestamp 99 | 100 | new_id = ((timestamp - self.twepoch) << self.timestamp_left_shift) | (self.data_center_id << self.data_center_id_shift) | (self.worker_id << self.worker_id_shift) | self.sequence 101 | self.ids_generated += 1 102 | return new_id 103 | 104 | def _valid_user_agent(self, user_agent): 105 | return self.user_agent_parser.search(user_agent) is not None 106 | 107 | def get_worker_id(self): 108 | return self.worker_id 109 | 110 | def get_timestamp(self): 111 | return self._time_gen() 112 | 113 | def get_id(self, useragent): 114 | if not self._valid_user_agent(useragent): 115 | self.logger.error("Invalid useragent: %s" % useragent) 116 | raise InvalidUserAgentError() 117 | 118 | new_id = self._next_id() 119 | self.logger.debug("id: %i user_agent: %s worker_id: %i data_center_id: %i" % (new_id, useragent, self.worker_id, self.data_center_id)) 120 | return new_id 121 | 122 | def get_datacenter_id(self): 123 | return self.data_center_id 124 | 125 | # Handlers 126 | 127 | class IdHandler(tornado.web.RequestHandler): 128 | def get(self, useragent): 129 | try: 130 | self.finish("%i" % self.application.idworker.get_id(useragent)) 131 | except InvalidUserAgentError as e: 132 | self.send_error(400) 133 | 134 | class TimestampHandler(tornado.web.RequestHandler): 135 | def get(self): 136 | self.finish("%i" % self.application.idworker.get_timestamp()) 137 | 138 | class WorkerHandler(tornado.web.RequestHandler): 139 | def get(self): 140 | self.finish("%i" % self.application.idworker.get_worker_id()) 141 | 142 | class DatacenterHandler(tornado.web.RequestHandler): 143 | def get(self): 144 | self.finish("%i" % self.application.idworker.get_datacenter_id()) 145 | --------------------------------------------------------------------------------