├── .gitignore
├── MIT.LICENSE
├── README.md
├── autocomplete
    ├── __init__.py
    ├── index.py
    └── utils.py
├── setup.py
└── test
    ├── input.json
    └── test.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[co]
 2 | 
 3 | # Packages
 4 | *.egg
 5 | *.egg-info
 6 | dist
 7 | build
 8 | eggs
 9 | parts
10 | bin
11 | var
12 | sdist
13 | develop-eggs
14 | .installed.cfg
15 | 
16 | # Installer logs
17 | pip-log.txt
18 | 
19 | # Unit test / coverage reports
20 | .coverage
21 | .tox
22 | 
23 | #Translations
24 | *.mo
25 | 
26 | #Mr Developer
27 | .mr.developer.cfg
28 | 


--------------------------------------------------------------------------------
/MIT.LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2012 Feng LI
 2 | <okidogii@gmail.com>
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining
 5 | a copy of this software and associated documentation files (the
 6 | "Software"), to deal in the Software without restriction, including
 7 | without limitation the rights to use, copy, modify, merge, publish,
 8 | distribute, sublicense, and/or sell copies of the Software, and to
 9 | permit persons to whom the Software is furnished to do so, subject to
10 | the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be
13 | included in all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | autocomplete-redis
 2 | ============
 3 | 
 4 | autocomplete-redis is a quora like automatic autocompletion based on redis.
 5 | 
 6 | Installation
 7 | ---------
 8 | 
 9 | * install pip (if you haven't yet) `easy_install pip`
10 | 
11 | * install pymmseg (support for Chinese Characters)： `pip install pymmseg`
12 | 
13 | * install autocomplete-redis： `pip install -e git+https://github.com/fengli/autocomplete-redis.git#egg=autocomplete-dev` 
14 | 
15 | Quick start
16 | ----------
17 | * Assume you have few items to index.
18 | 
19 | ```python
20 | items=[{"uid":'1', "score":9, "term": u"hello world, that's great"},
21 |        {"uid":'2', "score":10, "term": u"what the hell or yell"},
22 |        {"uid":'3', "score":8.5, "term":u"World is like a box of chocolate"},
23 |       ]
24 | ```
25 | 
26 | The code for build the index and search is simple:
27 | 
28 | ```python
29 | from autocomplete import Autocomplete
30 | 
31 | #build index
32 | au = Autocomplete ("scope")
33 | for item in items:
34 |   au.add_item (item)
35 | #search
36 | restuls = au.search_query (u'hel')
37 | 
38 | print results
39 | [{'term': 'what the hell or yell', 'score': 10, 'uid': '2'}, {'term': "hello world, that's great", 'score': 9, 'uid': '1'}]
40 | ```
41 | 
42 | 
43 | API
44 | ---------------
45 | 
46 | * Convention: the item you pass to `autocomplete` should have at least `"uid"` and `"term"`, `"score"` is optional, but it's important if you want to return based on ranking. And you could have other fields as you like.
47 | 
48 | ```python
49 | {"uid":'1', "score":9, "term": u"hello world, that's great", 'meta':"1992"}
50 | ```
51 |   * `uid`: the unique identifier for your item
52 |   * `score`: the returned items sorted by this value.
53 |   * `term`: the string to be indexed.
54 | 
55 | * `def __init__ (self, scope, redisaddr="localhost", limits=5, cached=True)`
56 | 
57 |   * scope: Scope allows you to index multiple independent indexes. 
58 |   * redisaddr: your redis address
59 |   * limits: How many results you want to get.
60 |   * cached: Cache multiple keys combination?
61 | 
62 | * `def del_index (self)`
63 | 
64 | Delete all the indexes. Warning: all data will be deleted.
65 | 
66 | * `def add_item (self,item)`
67 | 
68 | Add item to index.
69 | 
70 | * `def del_item (self,item)`
71 | 
72 | Delete item from index.
73 | 
74 | * `def update_item (self, item)`
75 | 
76 | Update item indexed with item['uid'] with the new version.
77 | 
78 | * `def search_query (self,prefix)`
79 | 
80 | Search in database for all items that `item['term']` included `PREFIX`
81 | 
82 | Bring to you by:
83 | ----------------
84 | 
85 | * http://readpi.com
86 | 


--------------------------------------------------------------------------------
/autocomplete/__init__.py:
--------------------------------------------------------------------------------
1 | #-*- coding:utf-8 -*-
2 | 
3 | from index import *
4 | 
5 | 


--------------------------------------------------------------------------------
/autocomplete/index.py:
--------------------------------------------------------------------------------
  1 | #-*- coding:utf-8 -*-
  2 | import redis
  3 | 
  4 | try:
  5 |   import simplejson
  6 | except:
  7 |   from django.utils import simplejson
  8 | 
  9 | try:
 10 |   from django.core import serializers
 11 |   from django.db.models.loading import get_model
 12 | except:
 13 |   pass
 14 | 
 15 | import mmseg
 16 | from autocomplete.utils import queryset_iterator
 17 | 
 18 | class Autocomplete (object):
 19 |   """
 20 |   autocomplete.
 21 |   """
 22 | 
 23 |   def __init__ (self, scope, redisaddr="localhost", limits=5, cached=True):
 24 |     self.r = redis.Redis (redisaddr)
 25 |     self.scope = scope
 26 |     self.cached=cached
 27 |     self.limits = limits
 28 |     self.database = "database:%s" % scope
 29 |     self.indexbase = "indexbase:%s" % scope
 30 |     mmseg.Dictionary.load_dictionaries ()
 31 | 
 32 |   def _get_index_key (self, key):
 33 |     return "%s:%s" % (self.indexbase, key)
 34 | 
 35 |   def del_index (self):
 36 |     prefixs = self.r.smembers (self.indexbase)
 37 |     for prefix in prefixs:
 38 |       self.r.delete(self._get_index_key(prefix))
 39 |     self.r.delete(self.indexbase)
 40 |     self.r.delete(self.database)
 41 | 
 42 |   def sanity_check (self, item):
 43 |     """
 44 |     Make sure item has key that's needed.
 45 |     """
 46 |     for key in ("uid","term"):
 47 |       if not item.has_key (key):
 48 |         raise Exception ("Item should have key %s"%key )
 49 | 
 50 |   def add_item (self,item):
 51 |     """
 52 |     Create index for ITEM.
 53 |     """
 54 |     self.sanity_check (item)
 55 |     self.r.hset (self.database, item.get('uid'), simplejson.dumps(item))
 56 |     for prefix in self.prefixs_for_term (item['term']):
 57 |       self.r.sadd (self.indexbase, prefix)
 58 |       self.r.zadd (self._get_index_key(prefix),item.get('uid'), item.get('score',0))
 59 | 
 60 |   def del_item (self,item):
 61 |     """
 62 |     Delete ITEM from the index
 63 |     """
 64 |     for prefix in self.prefixs_for_term (item['term']):
 65 |       self.r.zrem (self._get_index_key(prefix), item.get('uid'))
 66 |       if not self.r.zcard (self._get_index_key(prefix)):
 67 |         self.r.delete (self._get_index_key(prefix))
 68 |         self.r.srem (self.indexbase, prefix)
 69 | 
 70 |   def update_item (self, item):
 71 |     self.del_item (item)
 72 |     self.add_item (item)
 73 | 
 74 |   def prefixs_for_term (self,term):
 75 |     """
 76 |     Get prefixs for TERM.
 77 |     """
 78 |     # Normalization
 79 |     term=term.lower()
 80 | 
 81 |     # Prefixs for term
 82 |     prefixs=[]
 83 |     tokens=mmseg.Algorithm(term)
 84 |     for token in tokens:
 85 |       word = token.text
 86 |       for i in xrange (1,len(word)+1):
 87 |         prefixs.append(word[:i])
 88 | 
 89 |     return prefixs
 90 | 
 91 |   def normalize (self,prefix):
 92 |     """
 93 |     Normalize the search string.
 94 |     """
 95 |     tokens = mmseg.Algorithm(prefix.lower())
 96 |     return [token.text for token in tokens]
 97 | 
 98 |   def search_query (self,prefix):
 99 |     search_strings = self.normalize (prefix)
100 | 
101 |     if not search_strings: return []
102 | 
103 |     cache_key = self._get_index_key (('|').join(search_strings))
104 | 
105 |     if not self.cached or not self.r.exists (cache_key):
106 |       self.r.zinterstore (cache_key, map (lambda x: self._get_index_key(x), search_strings))
107 |       self.r.expire (cache_key, 10 * 60)
108 | 
109 |     ids=self.r.zrevrange (cache_key, 0, self.limits)
110 |     if not ids: return ids
111 |     return map(lambda x:simplejson.loads(x),
112 |                self.r.hmget(self.database, *ids))
113 | 


--------------------------------------------------------------------------------
/autocomplete/utils.py:
--------------------------------------------------------------------------------
 1 | import gc
 2 | 
 3 | def queryset_iterator(queryset, chunksize=1000):
 4 |   '''''
 5 |   Iterate over a Django Queryset ordered by the primary key
 6 | 
 7 |   This method loads a maximum of chunksize (default: 1000) rows in it's
 8 |   memory at the same time while django normally would load all rows in it's
 9 |   memory. Using the iterator() method only causes it to not preload all the
10 |   classes.
11 | 
12 |   Note that the implementation of the iterator does not support ordered query sets.
13 |   '''
14 |   pk = 0
15 |   last_pk = queryset.order_by('-pk')[0].pk
16 |   queryset = queryset.order_by('pk')
17 |   while pk < last_pk:
18 |     for row in queryset.filter(pk__gt=pk)[:chunksize]:
19 |       pk = row.pk
20 |       yield row
21 |     gc.collect()
22 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | long_description = open('README.md').read()
 3 | VERSION = '1.0'
 4 | 
 5 | setup(
 6 |     name='autocomplete',
 7 |     version=VERSION,
 8 |     packages=['autocomplete', 
 9 |               ],
10 |     description='Redis based autocompletion (build index and search query).',
11 |     long_description=long_description,
12 |     author='Feng Li',
13 |     author_email='okidogii@gmail.com',
14 |     license='MIT License',
15 |     url='https://github.com/fengli/autocomplete-redis.git',
16 |     platforms=["any"],
17 |     classifiers=[
18 |         'Development Status :: 1 - Beta',
19 |         'Intended Audience :: Developers',
20 |         'License :: OSI Approved :: MIT License',
21 |         'Natural Language :: English',
22 |         'Operating System :: OS Independent',
23 |         'Programming Language :: Python',
24 |     ],
25 | )
26 | 


--------------------------------------------------------------------------------
/test/input.json:
--------------------------------------------------------------------------------
1 | {"score": "9", "id": "1", "term": "轻轻地你走了"}
2 | {"score": "8", "id": "2", "term": "正如你轻轻地来"}
3 | {"score": "8.5", "id": "3", "term": "你挥一挥衣袖，不带走一片云彩"}
4 | 


--------------------------------------------------------------------------------
/test/test.py:
--------------------------------------------------------------------------------
 1 | #-*-coding:utf-8-*-
 2 | from autocomplete import Autocomplete
 3 | import os
 4 | import unittest
 5 | 
 6 | class testAutocomplete (unittest.TestCase):
 7 |   def setUp (self):
 8 |     self.items=[{"uid":'1', "score":9, "term": u"轻轻地你走了"},
 9 |                 {"uid":'2', "score":10, "term": u"正如你轻轻地来"},
10 |                 {"uid":'3', "score":8.5, "term":u"你挥一挥衣袖，不带走一片云彩"},
11 |                 ]
12 | 
13 |     self.a=Autocomplete("scope")
14 |     self.a.del_index()
15 |     for item in self.items:
16 |       self.a.add_item (item)
17 | 
18 |   def test_search_query2 (self):
19 |     results=self.a.search_query (u'轻轻')
20 |     self.assertEqual(len(results),2)
21 |     self.assertEqual(results[0]['uid'],'2')
22 |     self.assertEqual(results[1]['uid'],'1')
23 | 
24 |   def test_search_query3 (self):
25 |     results=self.a.search_query (u'你 带走')
26 |     self.assertEqual(len(results),1)
27 |     self.assertEqual(results[0]['uid'],'3')
28 | 
29 |   def test_search_query4 (self):
30 |     results=self.a.search_query (u'你挥一挥衣袖，不带走一片云彩')
31 |     self.assertEqual(len(results),1)
32 |     self.assertEqual(results[0]['uid'],'3')
33 | 
34 |   def test_update_item (self):
35 |     item = {"uid":'1', "score":13, "term": u"轻轻地你走了"}
36 |     self.a.update_item (item)
37 |     results=self.a.search_query (u'轻轻')
38 |     self.assertEqual(len(results),2)
39 |     self.assertEqual(results[0]['uid'],'1')
40 |     self.assertEqual(results[1]['uid'],'2')
41 | 
42 |   def test_del_item (self):
43 |     item = {"uid":'1', "score":9, "term": u"轻轻地你走了"}
44 |     self.a.del_item (item)
45 |     results=self.a.search_query (u'轻轻')
46 |     self.assertEqual(len(results),1)
47 |     self.assertEqual(results[0]['uid'],'2')
48 | 
49 | if __name__=='__main__':
50 |   unittest.main ()
51 | 


--------------------------------------------------------------------------------