├── .gitignore
├── .gitmodules
├── app.yaml
├── index.yaml
├── main.py
├── search
    └── __init__.py
├── static
    ├── favicon.ico
    └── robots.txt
└── tests
    ├── HOW-TO-TEST
    ├── __init__.py
    ├── roget.txt
    ├── test_app.py
    └── test_search.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .DS_Store
3 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "search/pyporter2"]
2 | 	path = search/pyporter2
3 | 	url = git://github.com/mdirolf/pyporter2.git
4 | 


--------------------------------------------------------------------------------
/app.yaml:
--------------------------------------------------------------------------------
 1 | application: billkatz-test
 2 | version: 7-16-2009-1
 3 | runtime: python
 4 | api_version: 1
 5 | 
 6 | handlers:
 7 | - url: .*/favicon\.ico
 8 |   static_files: static/favicon.ico
 9 |   upload: static/favicon.ico
10 | 
11 | - url: /robots\.txt
12 |   static_files: static/robots.txt
13 |   upload: static/robots.txt
14 | 
15 | - url: /static
16 |   static_dir: static
17 | 
18 | - url: .*
19 |   script: main.py
20 | 
21 | skip_files: |
22 |  ^(.*/)?(
23 |  (app\.yaml)|
24 |  (app\.yml)|
25 |  (index\.yaml)|
26 |  (index\.yml)|
27 |  (#.*#)|
28 |  (.*~)|
29 |  (.*\.py[co])|
30 |  (.*/RCS/.*)|
31 |  (\..*)|
32 |  (tests/.*)
33 |  )$
34 | 


--------------------------------------------------------------------------------
/index.yaml:
--------------------------------------------------------------------------------
 1 | indexes:
 2 | 
 3 | - kind: StemmedIndex
 4 |   ancestor: yes
 5 | 
 6 | - kind: StemmedIndex
 7 |   properties:
 8 |   - name: parent_kind
 9 |   - name: phrases
10 | 
11 | - kind: LiteralIndex
12 |   ancestor: yes
13 | 
14 | - kind: LiteralIndex
15 |   properties:
16 |   - name: parent_kind
17 |   - name: phrases
18 | 
19 | # AUTOGENERATED
20 | 
21 | # This index.yaml is automatically updated whenever the dev_appserver
22 | # detects that a new type of query is run.  If you want to manage the
23 | # index.yaml file manually, remove the above marker line (the line
24 | # saying "# AUTOGENERATED").  If you want to manage some indexes
25 | # manually, move them above the marker line.  The index.yaml file is
26 | # automatically uploaded to the admin console when you next deploy
27 | # your application using appcfg.py.
28 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # The MIT License
  4 | # 
  5 | # Copyright (c) 2009 William T. Katz
  6 | # Website/Contact: http://www.billkatz.com
  7 | # 
  8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | # of this software and associated documentation files (the "Software"), to 
 10 | # deal in the Software without restriction, including without limitation 
 11 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 12 | # and/or sell copies of the Software, and to permit persons to whom the 
 13 | # Software is furnished to do so, subject to the following conditions:
 14 | # 
 15 | # The above copyright notice and this permission notice shall be included in
 16 | # all copies or substantial portions of the Software.
 17 | # 
 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 23 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 24 | # DEALINGS IN THE SOFTWARE.
 25 | 
 26 | """A super simple Google App Engine text posting app.
 27 | 
 28 | Logged in visitors can add some test and search for keywords across all 
 29 | added pages.  It demos a simple full text search module.
 30 | """
 31 | __author__ = 'William T. Katz'
 32 | 
 33 | import cgi
 34 | import logging
 35 | 
 36 | from google.appengine.api import users
 37 | from google.appengine.ext import db
 38 | from google.appengine.ext import webapp
 39 | from google.appengine.ext.webapp.util import run_wsgi_app
 40 | 
 41 | # The following are necessary for full-text search demo
 42 | import search
 43 | INDEXING_URL = '/tasks/searchindexing'
 44 | 
 45 | class Page(search.Searchable, db.Model):
 46 |     user = db.UserProperty()
 47 |     title = db.StringProperty()
 48 |     content = db.TextProperty()
 49 |     created = db.DateTimeProperty(auto_now=True)
 50 |     INDEX_TITLE_FROM_PROP = 'title'
 51 |     # INDEX_USES_MULTI_ENTITIES = False
 52 | 
 53 | class SimplePage(webapp.RequestHandler):
 54 |     def render(self, html):
 55 |         user = users.get_current_user()
 56 |         page = '<html><body><div style="display:inline"><a href="/">Add Page</a> | '
 57 |         if user:
 58 |             page += 'Logged in as %s ' % (user.nickname())
 59 |             logout_url = users.create_logout_url(self.request.uri)
 60 |             page += '| <a href="%s">Logout</a>' % (logout_url)
 61 |         else:
 62 |             login_url = users.create_login_url(self.request.uri)
 63 |             page += '<a href="%s">Google login</a>' % (login_url)
 64 |         page += """</div>
 65 |         <hr>
 66 |         <h3>Full Text Search Test</h3>
 67 |         <p>This app tests a full text search module for Google App Engine.
 68 |         Once you are logged in, you can add text pages that will be indexed via
 69 |         Task Queue API tasks.  The search indices are efficiently stored using
 70 |         "Relation Index" entities as described in 
 71 |         <a href="http://code.google.com/events/io/sessions/BuildingScalableComplexApps.html">
 72 |         this Google I/O talk.</a></p>
 73 |         <p>My blog has an
 74 |         <a href="http://www.billkatz.com/2009/6/Simple-Full-Text-Search-for-App-Engine">
 75 |         article on this appengine-search module</a>.  You can download the code from the
 76 |         <a href="http://github.com/DocSavage/appengine-search">appengine-search
 77 |         github repository</a> under a liberal open source (MIT) license.</p>
 78 |         <form action="/search" method="get">
 79 |             Search for phrase (e.g., 'lorem ipsum'):
 80 |         """
 81 |         page += '<input name="phrase"'
 82 |         phrase = self.request.get('phrase')
 83 |         if phrase:
 84 |             page += ' value="%s">' % (phrase)
 85 |         page += '<input type="submit" name="submitbtn" value="Return Pages">'
 86 |         page += '<input type="submit" name="submitbtn" value="Return Keys Only">'
 87 |         page += """
 88 |         <p><strong>Return Pages</strong> retrieves the entire Page entities.<br />
 89 |            <strong>Return Keys Only</strong> retrieves just the keys but uses
 90 |            intelligent key naming to transmit "Title" data via the key names.</p>
 91 |         """
 92 |         page += '</form>'
 93 |         page += html
 94 |         page += '</body></html>'
 95 |         self.response.out.write(page)
 96 | 
 97 | class MainPage(SimplePage):
 98 |     def get(self):
 99 |         user = users.get_current_user()
100 |         if not user:
101 |             html = '<h4>Please login to add a page.</h4>'
102 |         else:
103 |             import time
104 |             time_string = time.strftime('Page submitted %X on %x')
105 |             html = """
106 |             <h4>Add a text page below:</h4>
107 |             <form action="/" method="post">
108 |                 <div>Title: <input type="text" size="40" name="title" 
109 |             """
110 |             html += 'value="' + time_string + '" />'
111 |             html += """
112 |                 <em>This data will be encoded in the key names of index entities.</em></div>
113 |                 <div><textarea name="content" rows="10" cols="60"></textarea></div>
114 |                 <div><input type="submit" value="Add Page" /></div>
115 |             </form>
116 |             """
117 |         self.render(html)
118 | 
119 |     def post(self):
120 |         user = users.get_current_user()
121 |         content = self.request.get('content')
122 |         title = self.request.get('title')
123 |         if not user:
124 |             self.redirect('/?msg=You+must+be+logged+in')
125 |         elif not content:
126 |             self.redirect('/')
127 |         else:
128 |             page = Page(content=content, title=title, user=user)
129 |             page.put()
130 |             page.enqueue_indexing(url=INDEXING_URL)
131 |             html = "<div>Thanks for entering the following text:</div>"
132 |             html += "<pre>%s</pre>" % (cgi.escape(content))
133 |             self.render(html)
134 | 
135 | class SearchPage(SimplePage):
136 |     def get(self):
137 |         submitbtn = self.request.get('submitbtn')
138 |         phrase = self.request.get('phrase')
139 |         html = "<h4>'" + phrase + "' was found on these pages:</h4>"
140 |         if submitbtn == 'Return Keys Only':
141 |             key_list = Page.search(phrase, keys_only=True)
142 |             for key_and_title in key_list:
143 |                 html += "<div><p>Title: %s</p></div>" % key_and_title[1]
144 |         else:
145 |             pages = Page.search(phrase)
146 |             for page in pages:
147 |                 html += "<div><p>Title: %s</p><p>User: %s, Created: %s</p><pre>%s</pre></div>" \
148 |                         % (page.title, str(page.user), str(page.created), cgi.escape(page.content))
149 |         self.render(html)
150 | 
151 | application = webapp.WSGIApplication([
152 |         ('/', MainPage),
153 |         ('/search', SearchPage),
154 |         (INDEXING_URL, search.SearchIndexing)], debug=True)
155 | 
156 | def main():
157 |     run_wsgi_app(application)
158 | 
159 | if __name__ == '__main__':
160 |   main()
161 | 


--------------------------------------------------------------------------------
/search/__init__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # The MIT License
  4 | # 
  5 | # Copyright (c) 2009 William T. Katz
  6 | # Website/Contact: http://www.billkatz.com
  7 | # 
  8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | # of this software and associated documentation files (the "Software"), to 
 10 | # deal in the Software without restriction, including without limitation 
 11 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 12 | # and/or sell copies of the Software, and to permit persons to whom the 
 13 | # Software is furnished to do so, subject to the following conditions:
 14 | # 
 15 | # The above copyright notice and this permission notice shall be included in
 16 | # all copies or substantial portions of the Software.
 17 | # 
 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 23 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 24 | # DEALINGS IN THE SOFTWARE.
 25 | 
 26 | """A simple full-text search system
 27 | 
 28 | This module lets you designate particular entities for full text search
 29 | indexing.  It uses the Task Queue API to schedule search indexing and
 30 | relation index entities (as described in Brett Slatkin's 'Building Scalable,
 31 | Complex Apps on App Engine' talk at Google I/O, 2009).
 32 | 
 33 | The keyword extraction code was slightly modified from Ryan Barrett's
 34 | SearchableModel implementation.
 35 | """
 36 | __author__ = 'William T. Katz'
 37 | 
 38 | import logging
 39 | import re
 40 | import string
 41 | import sys
 42 | 
 43 | from google.appengine.api import datastore
 44 | from google.appengine.api import datastore_types
 45 | from google.appengine.ext import db
 46 | from google.appengine.ext import webapp
 47 | 
 48 | # TODO -- This will eventually be moved out of labs namespace
 49 | from google.appengine.api.labs import taskqueue
 50 | 
 51 | # Use python port of Porter2 stemmer.
 52 | from search.pyporter2 import Stemmer
 53 | 
 54 | class Error(Exception):
 55 |     """Base search module error type."""
 56 | 
 57 | class IndexTitleError(Error):
 58 |     """Raised when INDEX_TITLE_FROM_PROP or title alterations are incorrect."""
 59 | 
 60 | # Following module-level constants are cached in instance
 61 | 
 62 | KEY_NAME_DELIMITER = '||'  # Used to hold arbitrary strings in key names.
 63 |                            # Should not be contained in derived class key names.
 64 | 
 65 | MAX_ENTITY_SEARCH_PHRASES = datastore._MAX_INDEXED_PROPERTIES - 1
 66 | 
 67 | SEARCH_PHRASE_MIN_LENGTH = 4
 68 | 
 69 | STOP_WORDS = frozenset([
 70 |  'a', 'about', 'according', 'accordingly', 'affected', 'affecting', 'after',
 71 |  'again', 'against', 'all', 'almost', 'already', 'also', 'although',
 72 |  'always', 'am', 'among', 'an', 'and', 'any', 'anyone', 'apparently', 'are',
 73 |  'arise', 'as', 'aside', 'at', 'away', 'be', 'became', 'because', 'become',
 74 |  'becomes', 'been', 'before', 'being', 'between', 'both', 'briefly', 'but',
 75 |  'by', 'came', 'can', 'cannot', 'certain', 'certainly', 'could', 'did', 'do',
 76 |  'does', 'done', 'during', 'each', 'either', 'else', 'etc', 'ever', 'every',
 77 |  'following', 'for', 'found', 'from', 'further', 'gave', 'gets', 'give',
 78 |  'given', 'giving', 'gone', 'got', 'had', 'hardly', 'has', 'have', 'having',
 79 |  'here', 'how', 'however', 'i', 'if', 'in', 'into', 'is', 'it', 'itself',
 80 |  'just', 'keep', 'kept', 'knowledge', 'largely', 'like', 'made', 'mainly',
 81 |  'make', 'many', 'might', 'more', 'most', 'mostly', 'much', 'must', 'nearly',
 82 |  'necessarily', 'neither', 'next', 'no', 'none', 'nor', 'normally', 'not',
 83 |  'noted', 'now', 'obtain', 'obtained', 'of', 'often', 'on', 'only', 'or',
 84 |  'other', 'our', 'out', 'owing', 'particularly', 'past', 'perhaps', 'please',
 85 |  'poorly', 'possible', 'possibly', 'potentially', 'predominantly', 'present',
 86 |  'previously', 'primarily', 'probably', 'prompt', 'promptly', 'put',
 87 |  'quickly', 'quite', 'rather', 'readily', 'really', 'recently', 'regarding',
 88 |  'regardless', 'relatively', 'respectively', 'resulted', 'resulting',
 89 |  'results', 'said', 'same', 'seem', 'seen', 'several', 'shall', 'should',
 90 |  'show', 'showed', 'shown', 'shows', 'significantly', 'similar', 'similarly',
 91 |  'since', 'slightly', 'so', 'some', 'sometime', 'somewhat', 'soon',
 92 |  'specifically', 'state', 'states', 'strongly', 'substantially',
 93 |  'successfully', 'such', 'sufficiently', 'than', 'that', 'the', 'their',
 94 |  'theirs', 'them', 'then', 'there', 'therefore', 'these', 'they', 'this',
 95 |  'those', 'though', 'through', 'throughout', 'to', 'too', 'toward', 'under',
 96 |  'unless', 'until', 'up', 'upon', 'use', 'used', 'usefully', 'usefulness',
 97 |  'using', 'usually', 'various', 'very', 'was', 'we', 'were', 'what', 'when',
 98 |  'where', 'whether', 'which', 'while', 'who', 'whose', 'why', 'widely',
 99 |  'will', 'with', 'within', 'without', 'would', 'yet', 'you'])
100 | 
101 | PUNCTUATION_REGEX = re.compile('[' + re.escape(string.punctuation) + ']')
102 | 
103 | # Rather than have an extra property name to distinguish stemmed from
104 | # non-stemmed index entities, we use different Models that are
105 | # identical to a base index entity.
106 | class SearchIndex(db.Model):
107 |     """Holds full text indexing on an entity.
108 |     
109 |     This model is used by the Searchable mix-in to hold full text
110 |     indexes of a parent entity.
111 |     """
112 |     @staticmethod
113 |     def get_index_key_name(parent, index_num=1):
114 |         key = parent.key()
115 |         title = key.kind() + ' ' + str(key.id_or_name())
116 |         uniq_key = title + KEY_NAME_DELIMITER + str(index_num)
117 |         if hasattr(parent, 'INDEX_TITLE_FROM_PROP'):
118 |             logging.debug("Getting key name from property '%s'", parent.INDEX_TITLE_FROM_PROP)
119 |             if hasattr(parent, parent.INDEX_TITLE_FROM_PROP):
120 |                 title = getattr(parent, parent.INDEX_TITLE_FROM_PROP) or title
121 |         return uniq_key + KEY_NAME_DELIMITER + title
122 | 
123 |     @staticmethod
124 |     def get_title(key_name=''):
125 |         frags = key_name.split(KEY_NAME_DELIMITER)
126 |         if len(frags) < 3:
127 |             return 'Unknown Title'
128 |         else:
129 |             return frags[2]
130 | 
131 |     @staticmethod
132 |     def get_index_num(key_name=''):
133 |         frags = key_name.split(KEY_NAME_DELIMITER)
134 |         if len(frags) < 2:
135 |             return '1'
136 |         else:
137 |             return frags[1]
138 | 
139 |     @classmethod
140 |     def put_index(cls, parent, phrases, index_num=1):
141 |         parent_key = parent.key()
142 |         args = {'key_name': cls.get_index_key_name(parent, index_num),
143 |                 'parent': parent_key, 'parent_kind': parent_key.kind(), 
144 |                 'phrases': phrases }
145 |         return cls(**args).put()
146 | 
147 | 
148 | class LiteralIndex(SearchIndex):
149 |     """Index model for non-inflected search phrases."""
150 |     parent_kind = db.StringProperty(required=True)
151 |     phrases = db.StringListProperty(required=True)
152 | 
153 | 
154 | class StemmedIndex(SearchIndex):
155 |     """Index model for stemmed (inflected) search phrases."""
156 |     parent_kind = db.StringProperty(required=True)
157 |     phrases = db.StringListProperty(required=True)
158 | 
159 | 
160 | class Searchable(object):
161 |     """A class that supports full text indexing and search on entities.
162 |     
163 |     Add this class to your model's inheritance declaration like this:
164 |     
165 |         class Page(Searchable, db.Model):
166 |             title = db.StringProperty()
167 |             author_name = db.StringProperty()
168 |             content = db.TextProperty()
169 |             INDEX_TITLE_FROM_PROP = 'title'
170 |             # INDEX_STEMMING = False
171 |             # INDEX_USES_MULTI_ENTITIES = False
172 |             # INDEX_MULTI_WORD = False
173 |             # INDEX_ONLY = ['content']
174 | 
175 |     There are a few class variables that can be overridden by your Model.
176 |     The settings were made class variables because their use should be
177 |     declared at Model definition.
178 | 
179 |     You can declare a string property to be stowed in index key names by
180 |     using the INDEX_TITLE_FROM_PROP variable.  This allows you to retrieve
181 |     useful labels on key-only searches without doing a get() on the whole 
182 |     entity.
183 | 
184 |     Defaults are for searches to use stemming, multiple index entities,
185 |     and index all basestring-derived properties.  Also, two and three-word
186 |     phrases are inserted into the index, which can be disable by setting
187 |     INDEX_MULTI_WORD to False.
188 | 
189 |     Stemming is on by default but can be toggled off by setting INDEX_STEMMING
190 |     to False in your class declaration.
191 | 
192 |     You can set a class variable INDEX_ONLY to a list of property names
193 |     for indexing.  If INDEX_ONLY is not None, only those properties named
194 |     in the list will be indexed.
195 | 
196 |     Because most search phrase lists generated from an entity will be under
197 |     the approximately 5000 indexed property limit, you can make indexing
198 |     more efficient by setting INDEX_USES_MULTI_ENTITIES to False if you know
199 |     your indexed content will be relatively small (or you don't care about
200 |     some false negatives).  When INDEX_USES_MULTI_ENTITIES is True (default),
201 |     there is slight overhead on every indexing operation because
202 |     we must query for all index entities and delete unused ones.  In the
203 |     case of a single index entity, it can be simply overwritten.
204 | 
205 |     The enqueue_indexing() method should be called after your model is created or
206 |     edited:
207 | 
208 |         myPage = Page(author_name='John Doe', content='My amazing content!')
209 |         myPage.put()
210 |         myPage.enqueue_indexing(url='/tasks/searchindexing')
211 | 
212 |     Note that a url must be included that corresponds with the url mapped
213 |     to search.LiteralIndexing controller.
214 | 
215 |     You can limit the properties indexed by passing in a list of 
216 |     property names:
217 | 
218 |         myPage.enqueue_indexing(url='/foo', only_index=['content'])
219 | 
220 |     If you want to risk getting a timeout during indexing, you could
221 |     index immediately after putting your model and forego task queueing:
222 | 
223 |         myPage.put()
224 |         myPage.index()
225 | 
226 |     After your model has been indexed, you may use the search() method:
227 | 
228 |         Page.search('search phrase')          # -> Returns Page entities
229 |         Page.search('stuff', keys_only=True)  # -> Returns Page keys
230 | 
231 |     In the case of multi-word search phrases like the first example above,
232 |     the search will first list keys that match the full phrase and then
233 |     list keys that match the AND of individual keywords.  Note that when
234 |     INDEX_USES_MULTI_ENTITIES is True (default), if a Page's index is spread
235 |     over multiple index entities, the keyword AND may fail portion of the
236 |     search may fail, i.e., there will be false negative search results.
237 | 
238 |     You can use the full_text_search() static method to return all entities,
239 |     not just a particular kind, that have been indexed:
240 | 
241 |         Searchable.full_text_search('stuff')  # -> Returns any entities
242 |         Searchable.full_text_search('stuff', stemming=False)
243 | 
244 |     Because stemming can be toggled for any particular Model, only entities will
245 |     be returned that match indexing style (i.e., stemming on or off).
246 |     """
247 | 
248 |     INDEX_ONLY = None           # Can set to list of property names to index.
249 |     INDEX_STEMMING = True       # Allow stemming to be turned off per subclass.
250 |     INDEX_MULTI_WORD = True     # Add two and three-word phrases to index.
251 | 
252 |     # If TRUE, incurs additional query/delete overhead on indexing but will workaround
253 |     # indexed properties limit (MAX_ENTITY_SEARCH_PHRASES)
254 |     INDEX_USES_MULTI_ENTITIES = True
255 | 
256 |     @staticmethod
257 |     def full_text_search(phrase, limit=10, 
258 |                          kind=None, 
259 |                          stemming=INDEX_STEMMING,
260 |                          multi_word_literal=INDEX_MULTI_WORD):
261 |         """Queries search indices for phrases using a merge-join.
262 |         
263 |         Args:
264 |             phrase: String.  Search phrase.
265 |             kind: String.  Returned keys/entities are restricted to this kind.
266 | 
267 |         Returns:
268 |             A list of (key, title) tuples corresponding to the indexed entities.  
269 |             Multi-word literal matches are returned first.
270 | 
271 |         TODO -- Should provide feedback if input search phrase has stop words, etc.
272 |         """
273 |         index_keys = []
274 |         keywords = PUNCTUATION_REGEX.sub(' ', phrase).lower().split()
275 |         if stemming:
276 |             stemmer = Stemmer.Stemmer('english')
277 |             klass = StemmedIndex
278 |         else:
279 |             klass = LiteralIndex
280 | 
281 |         if len(keywords) > 1 and multi_word_literal:
282 |             # Try to match literal multi-word phrases first
283 |             if len(keywords) == 2:
284 |                 search_phrases = [' '.join(keywords)]
285 |             else:
286 |                 search_phrases = []
287 |                 sub_strings = len(keywords) - 2
288 |                 keyword_not_stop_word = map(lambda x: x not in STOP_WORDS, keywords)
289 |                 for pos in xrange(0, sub_strings):
290 |                     if keyword_not_stop_word[pos] and keyword_not_stop_word[pos+2]:
291 |                         search_phrases.append(' '.join(keywords[pos:pos+3]))
292 |             query = klass.all(keys_only=True)
293 |             for phrase in search_phrases:
294 |                 if stemming:
295 |                     phrase = stemmer.stemWord(phrase)
296 |                 query = query.filter('phrases =', phrase)
297 |             if kind:
298 |                 query = query.filter('parent_kind =', kind)
299 |             index_keys = query.fetch(limit=limit)
300 | 
301 |         if len(index_keys) < limit:
302 |             new_limit = limit - len(index_keys)
303 |             keywords = filter(lambda x: len(x) >= SEARCH_PHRASE_MIN_LENGTH, keywords)
304 |             if stemming:
305 |                 keywords = stemmer.stemWords(keywords)
306 |             query = klass.all(keys_only=True)
307 |             for keyword in keywords:
308 |                 query = query.filter('phrases =', keyword)
309 |             if kind:
310 |                 query = query.filter('parent_kind =', kind)
311 |             single_word_matches = [key for key in query.fetch(limit=new_limit) \
312 |                                    if key not in index_keys]
313 |             index_keys.extend(single_word_matches)
314 | 
315 |         return [(key.parent(), SearchIndex.get_title(key.name())) for key in index_keys]
316 | 
317 |     @classmethod
318 |     def get_simple_search_phraseset(cls, text):
319 |         """Returns a simple set of keywords from given text.
320 | 
321 |         Args:
322 |             text: String.
323 | 
324 |         Returns:
325 |             A set of keywords that aren't stop words and meet length requirement.
326 | 
327 |         >>> Searchable.get_simple_search_phraseset('I shall return.')
328 |         set(['return'])
329 |         """
330 |         if text:
331 |             datastore_types.ValidateString(text, 'text', max_len=sys.maxint)
332 |             text = PUNCTUATION_REGEX.sub(' ', text)
333 |             words = text.lower().split()
334 |             words = set(words)
335 |             words -= STOP_WORDS
336 |             for word in list(words):
337 |                 if len(word) < SEARCH_PHRASE_MIN_LENGTH:
338 |                     words.remove(word)
339 |         else:
340 |             words = set()
341 |         return words
342 | 
343 |     @classmethod
344 |     def get_search_phraseset(cls, text):
345 |         """Returns set of phrases, including two and three adjacent word phrases 
346 |            not spanning punctuation or stop words.
347 | 
348 |         Args:
349 |             text: String with punctuation.
350 | 
351 |         Returns:
352 |             A set of search terms that aren't stop words and meet length 
353 |             requirement.  Set includes phrases of adjacent words that
354 |             aren't stop words.  (Stop words are allowed in middle of three-word
355 |             phrases like "Statue of Liberty".)
356 | 
357 |         >>> Searchable.get_search_phraseset('You look through rosy-colored glasses.')
358 |         set(['look through rosy', 'rosy colored', 'colored', 'colored glasses', 'rosy', 'rosy colored glasses', 'glasses', 'look'])
359 |         >>> Searchable.get_search_phraseset('I saw the Statue of Liberty.')
360 |         set(['saw the statue', 'statue of liberty', 'liberty', 'statue'])
361 |         >>> Searchable.get_search_phraseset('Recalling friends, past and present.')
362 |         set(['recalling', 'recalling friends', 'friends'])
363 |         """
364 |         if text:
365 |             datastore_types.ValidateString(text, 'text', max_len=sys.maxint)
366 |             text = text.lower()
367 |             phrases = []
368 |             two_words = []
369 |             three_words = ['', '']
370 |             three_words_no_stop = [False, False]
371 |             text = text.replace('-', ' ')
372 |             fragments = text.split()
373 |             for frag in fragments:
374 |                 word, replaced = PUNCTUATION_REGEX.subn('', frag)
375 |                 not_end_punctuation = (replaced > 1 or frag[-1] not in string.punctuation)
376 |                 if replaced and not_end_punctuation:
377 |                     two_words = []
378 |                     three_words = ['', '']
379 |                 three_words.append(word)  # We allow stop words in middle
380 |                 if word in STOP_WORDS:
381 |                     two_words = []
382 |                     three_words_no_stop.append(False)
383 |                 else:
384 |                     two_words.append(word)
385 |                     three_words_no_stop.append(True)
386 |                     if len(word) >= SEARCH_PHRASE_MIN_LENGTH:
387 |                         phrases.append(word)
388 |                     if len(two_words) == 2:
389 |                         phrases.append(' '.join(two_words))
390 |                         del two_words[0]
391 |                     if len(three_words) == 3 and three_words_no_stop[0]:
392 |                         phrases.append(' '.join(three_words))
393 |                 del three_words[0]
394 |                 del three_words_no_stop[0]
395 |             phrases = set(phrases)
396 |         else:
397 |             phrases = set()
398 |         return phrases
399 | 
400 |     @classmethod
401 |     def search(cls, phrase, limit=10, keys_only=False):
402 |         """Queries search indices for phrases using a merge-join.
403 |         
404 |         Use of this class method lets you easily restrict searches to a kind
405 |         and retrieve entities or keys.
406 | 
407 |         Args:
408 |             phrase: Search phrase (string)
409 |             limit: Number of entities or keys to return.
410 |             keys_only: If True, return only keys with title of parent entity.
411 |         
412 |         Returns:
413 |             A list.  If keys_only is True, the list holds (key, title) tuples.
414 |             If keys_only is False, the list holds Model instances.
415 |         """
416 |         key_list = Searchable.full_text_search(
417 |                         phrase, limit=limit, kind=cls.kind(),
418 |                         stemming=cls.INDEX_STEMMING, 
419 |                         multi_word_literal=cls.INDEX_MULTI_WORD)
420 |         if keys_only:
421 |             logging.debug("key_list: %s", key_list)
422 |             return key_list
423 |         else:
424 |             return [cls.get(key_and_title[0]) for key_and_title in key_list]
425 | 
426 |     def indexed_title_changed(self):
427 |         """Renames index entities for this model to match new title."""
428 |         klass = StemmedIndex if self.INDEX_STEMMING else LiteralIndex
429 |         query = klass.all(keys_only=True).ancestor(self.key())
430 |         old_index_keys = query.fetch(1000)
431 |         if not hasattr(self, 'INDEX_TITLE_FROM_PROP'):
432 |             raise IndexTitleError('Must declare a property name via INDEX_TITLE_FROM_PROP')
433 |         new_keys = []
434 |         for old_key in old_index_keys:
435 |             old_index = db.get(old_key)
436 |             index_num = SearchIndex.get_index_num(old_key.name())
437 |             index_key = klass.put_index(parent=self, index_num=index_num,
438 |                                         phrases=old_index.phrases)
439 |             new_keys.append(index_key)
440 |         delete_keys = filter(lambda key: key not in new_keys, old_index_keys)
441 |         db.delete(delete_keys)
442 | 
443 |     def get_search_phrases(self, indexing_func=None):
444 |         """Returns search phrases from properties in a given Model instance.
445 | 
446 |         Args (optional):
447 |             only_index: List of strings.  Restricts indexing to these property names.
448 |             indexing_func: A function that returns a set of keywords or phrases.
449 | 
450 |         Note that the indexing_func can be passed in to allow more customized
451 |         search phrase generation.
452 | 
453 |         Two model variables influence the output of this method:
454 |             INDEX_ONLY: If None, all indexable properties are indexed.
455 |                 If a list of property names, only those properties are indexed.
456 |             INDEX_MULTI_WORD: Class variable that allows multi-word search
457 |                 phrases like "statue of liberty."
458 |             INDEX_STEMMING: Returns stemmed phrases.
459 |         """
460 |         if not indexing_func:
461 |             klass = self.__class__
462 |             if klass.INDEX_MULTI_WORD:
463 |                 indexing_func = klass.get_search_phraseset
464 |             else:
465 |                 indexing_func = klass.get_simple_search_phraseset
466 |         if self.INDEX_STEMMING:
467 |             stemmer = Stemmer.Stemmer('english')
468 |         phrases = set()
469 |         for prop_name, prop_value in self.properties().iteritems():
470 |             if (not self.INDEX_ONLY) or (prop_name in self.INDEX_ONLY):
471 |                 values = prop_value.get_value_for_datastore(self)
472 |                 if not isinstance(values, list):
473 |                     values = [values]
474 |                 if (isinstance(values[0], basestring) and
475 |                         not isinstance(values[0], datastore_types.Blob)):
476 |                     for value in values:
477 |                         words = indexing_func(value)
478 |                         if self.INDEX_STEMMING:
479 |                             stemmed_words = set(stemmer.stemWords(words))
480 |                             phrases.update(stemmed_words)
481 |                         else:
482 |                             phrases.update(words)
483 |         return list(phrases)
484 | 
485 |     def index(self, indexing_func=None):
486 |         """Generates or replaces a search entities for a Model instance.
487 | 
488 |         Args (optional):
489 |             indexing_func: A function that returns a set of keywords or phrases.
490 | 
491 |         Note that the indexing_func can be passed in to allow more customized
492 |         search phrase generation.
493 |         """
494 |         search_phrases = self.get_search_phrases(indexing_func=indexing_func)
495 | 
496 |         key = self.key()
497 |         klass = StemmedIndex if self.INDEX_STEMMING else LiteralIndex
498 | 
499 |         if self.__class__.INDEX_USES_MULTI_ENTITIES:
500 |             query = klass.all(keys_only=True).ancestor(key)
501 |             previous_index_keys = query.fetch(1000)
502 |         num_phrases = len(search_phrases)
503 | 
504 |         start_index = 0
505 |         entity_num = 1      # Appended to key name of index entity
506 |         index_keys = []
507 |         while (num_phrases > 0):
508 |             cur_num_phrases = min(num_phrases, MAX_ENTITY_SEARCH_PHRASES)
509 |             end_index = start_index + cur_num_phrases
510 |             num_indices = (num_phrases - 1) / MAX_ENTITY_SEARCH_PHRASES + 1
511 |             index_key = klass.put_index(parent=self, index_num=entity_num,
512 |                                         phrases=search_phrases[start_index:end_index])
513 |             index_keys.append(index_key)
514 |             if self.__class__.INDEX_USES_MULTI_ENTITIES:
515 |                 start_index = end_index
516 |                 num_phrases -= cur_num_phrases
517 |                 entity_num += 1
518 |             else:
519 |                 num_phrases = 0    # Only write one index entity
520 |         if self.__class__.INDEX_USES_MULTI_ENTITIES:
521 |             delete_keys = []
522 |             for key in previous_index_keys:
523 |                 if key not in index_keys:
524 |                     delete_keys.append(key)
525 |             db.delete(delete_keys)
526 | 
527 |     def enqueue_indexing(self, url, only_index=None):
528 |         """Adds an indexing task to the default task queue.
529 |         
530 |         Args:
531 |             url: String. The url associated with LiteralIndexing handler.
532 |             only_index: List of strings.  Restricts indexing to these prop names.
533 |         """
534 |         if url:
535 |             params = {'key': str(self.key())}
536 |             if only_index:
537 |                 params['only_index'] = ' '.join(only_index)
538 |             taskqueue.add(url=url, params=params)
539 | 
540 | class SearchIndexing(webapp.RequestHandler):
541 |     """Handler for full text indexing task."""
542 |     def post(self):
543 |         key_str = self.request.get('key')
544 |         only_index_str = self.request.get('only_index')
545 |         if key_str:
546 |             key = db.Key(key_str)
547 |             entity = db.get(key)
548 |             if not entity:
549 |                 self.response.set_status(200)   # Clear task because it's a bad key
550 |             else:
551 |                 only_index = only_index_str.split(',') if only_index_str else None
552 |                 entity.index()
553 | 
554 | 


--------------------------------------------------------------------------------
/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DocSavage/appengine-search/daf2d12bbbb30d1ee5871c3d3c791625ac2c77c8/static/favicon.ico


--------------------------------------------------------------------------------
/static/robots.txt:
--------------------------------------------------------------------------------
1 | User-agent: *
2 | Disallow:


--------------------------------------------------------------------------------
/tests/HOW-TO-TEST:
--------------------------------------------------------------------------------
 1 | Testing a Google App Engine app
 2 | ===============================
 3 | 
 4 | This app uses the nose testing framework.  To get started, make sure
 5 | you've installed the necessary components:
 6 | 
 7 | sudo easy_install nose
 8 | sudo easy_install NoseGAE
 9 | 
10 | Download WebTest (http://pythonpaste.org/webtest).
11 | cd into webtest directory then:
12 | sudo python setup.py build
13 | sudo python setup.py install
14 | 
15 | If all the components have been downloaded and installed correctly,
16 | you'll be able to run tests from the command line:
17 | 
18 | nosetests -v --with-gae --with-doctest
19 | 
20 | The above will run doctests embedded in source files as well as
21 | test scripts in the /tests directory.


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | # This setup is necessary to prevent exception from being thrown by users API.
 4 | # Eventually, it should be incorporated into NoseGAE plugin.
 5 | def setup():
 6 |     os.environ['AUTH_DOMAIN'] = 'example.org'
 7 |     os.environ['USER_EMAIL'] = ''
 8 | 
 9 | def teardown():
10 |     pass
11 | 
12 | 


--------------------------------------------------------------------------------
/tests/test_app.py:
--------------------------------------------------------------------------------
1 | from webtest import TestApp
2 | from main import application
3 | 
4 | app = TestApp(application)
5 | 
6 | def test_index():
7 |     response = app.get('/')
8 |     assert 'Full Text Search Test' in str(response)
9 | 


--------------------------------------------------------------------------------
/tests/test_search.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # The MIT License
  4 | # 
  5 | # Copyright (c) 2009 William T. Katz
  6 | # Website/Contact: http://www.billkatz.com
  7 | # 
  8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | # of this software and associated documentation files (the "Software"), to 
 10 | # deal in the Software without restriction, including without limitation 
 11 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 12 | # and/or sell copies of the Software, and to permit persons to whom the 
 13 | # Software is furnished to do so, subject to the following conditions:
 14 | # 
 15 | # The above copyright notice and this permission notice shall be included in
 16 | # all copies or substantial portions of the Software.
 17 | # 
 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 23 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 24 | # DEALINGS IN THE SOFTWARE.
 25 | 
 26 | import re
 27 | import os
 28 | 
 29 | LOREM_IPSUM = """
 30 | Lorem ipsum dolor sit amet, consectetur adipisicing elit, 
 31 | sed do eiusmod tempor incididunt ut labore et dolore magna 
 32 | aliqua. Ut enim ad minim veniam, quis nostrud exercitation 
 33 | ullamco laboris nisi ut aliquip ex ea commodo consequat. 
 34 | Duis aute irure dolor in reprehenderit in voluptate velit 
 35 | esse cillum dolore eu fugiat nulla pariatur. Excepteur sint 
 36 | occaecat cupidatat non proident, sunt in culpa qui officia 
 37 | deserunt mollit anim id est laborum.  Encrusted.
 38 | """
 39 | 
 40 | INFLECTION_TEST = """
 41 | Guido ran up slippery ruby-encrusted monoliths in search of
 42 | the serpentine mascot.  The pythonic creatures skulked away.
 43 | How quickly did they forget their master?  Guido was
 44 | challenged by the excessively poor storyline in this fictional
 45 | tale, but alas, what could he do?  He was one of many fixtures
 46 | in ornately narrated prose doomed to be read only by
 47 | computerized algorithms implementing text processing!
 48 | """
 49 | 
 50 | from google.appengine.ext import db
 51 | import search
 52 | 
 53 | from google.appengine.api import apiproxy_stub_map
 54 | from google.appengine.api import datastore_file_stub
 55 | 
 56 | def clear_datastore():
 57 |     """Clear datastore.  Can be used between tests to insure empty datastore.
 58 |     
 59 |     See code.google.com/p/nose-gae/issues/detail?id=16
 60 |     Note: the appid passed to DatastoreFileStub should match the app id in your app.yaml.
 61 |     """
 62 |     apiproxy_stub_map.apiproxy = apiproxy_stub_map.APIProxyStubMap()
 63 |     stub = datastore_file_stub.DatastoreFileStub('billkatz-test', '/dev/null', '/dev/null')
 64 |     apiproxy_stub_map.apiproxy.RegisterStub('datastore_v3', stub)
 65 | 
 66 | class Page(search.Searchable, db.Model):
 67 |     author_name = db.StringProperty()
 68 |     title = db.StringProperty()
 69 |     content = db.TextProperty()
 70 |     INDEX_TITLE_FROM_PROP = 'title'
 71 | 
 72 | class NoninflectedPage(search.Searchable, db.Model):
 73 |     """Used to test search without stemming, e.g. for precise, non-inflected words"""
 74 |     author_name = db.StringProperty()
 75 |     content = db.TextProperty()
 76 |     INDEX_STEMMING = False
 77 |     INDEX_ONLY = ['content']
 78 | 
 79 | class TestMisc:
 80 |     def setup(self):
 81 |         clear_datastore()
 82 | 
 83 |     def test_appostrophed_key(self):
 84 |         page = Page(key_name="Show Don't Tell", author_name="Pro Author",
 85 |                     content="You should always show and not tell through dialogue or narration.")
 86 |         key = page.put()
 87 |         assert str(key.name()) == "Show Don't Tell"
 88 | 
 89 | class TestLoremIpsum:
 90 |     def setup(self):
 91 |         clear_datastore()
 92 |         page = NoninflectedPage(author_name='John Doe', content=LOREM_IPSUM)
 93 |         page.put()
 94 |         page.index()
 95 |         assert search.LiteralIndex.all().count() == 1
 96 |         page = NoninflectedPage(author_name='Jon Favreau', 
 97 |                                 content='A director that works well with writers.')
 98 |         page.put()
 99 |         page.index()
100 |         assert search.LiteralIndex.all().count() == 2
101 | 
102 |     def teardown(self):
103 |         pass
104 | 
105 |     def test_only_index(self):
106 |         returned_pages = NoninflectedPage.search('John')  # Only 'content' is indexed.
107 |         assert not returned_pages
108 |         returned_pages = NoninflectedPage.search('lorem ipsum')
109 |         assert returned_pages
110 | 
111 |     def test_two_word_search(self):
112 |         returned_pages = NoninflectedPage.search('LoReM IpSuM')
113 |         assert returned_pages and len(returned_pages) == 1
114 |         lmatch = re.search(r'lorem', returned_pages[0].content, re.IGNORECASE)
115 |         imatch = re.search(r'ipsum', returned_pages[0].content, re.IGNORECASE)
116 |         assert lmatch and imatch
117 | 
118 |     def test_key_only_search(self):
119 |         key_list = NoninflectedPage.search('LoReM ipsum', keys_only=True)
120 |         assert isinstance(key_list, list) and len(key_list) == 1
121 |         assert isinstance(key_list[0][0], db.Key)
122 |         assert isinstance(key_list[0][1], basestring)
123 | 
124 |     def test_search_miss(self):
125 |         returned_pages = NoninflectedPage.search('NowhereInDoc')
126 |         assert not returned_pages
127 |         returned_pages = NoninflectedPage.search('director')
128 |         assert returned_pages
129 |         lmatch = re.search(r'lorem', returned_pages[0].content, re.IGNORECASE)
130 |         imatch = re.search(r'ipsum', returned_pages[0].content, re.IGNORECASE)
131 |         assert not lmatch and not imatch
132 | 
133 |     def test_not_inflected(self):
134 |         returned_pages = NoninflectedPage.search('encrust')
135 |         assert not returned_pages
136 |         returned_pages = NoninflectedPage.search('encrusted')
137 |         assert returned_pages
138 | 
139 | class TestInflection:
140 |     def setup(self):
141 |         clear_datastore()
142 |         page = Page(author_name='John Doe', content=INFLECTION_TEST)
143 |         page.put()
144 |         page.index()
145 |         assert search.StemmedIndex.all().count() == 1
146 |         page = Page(author_name='Jon Favreau', content='A director that works well with writers.')
147 |         page.put()
148 |         page.index()
149 |         assert search.StemmedIndex.all().count() == 2
150 | 
151 |     def test_inflections(self):
152 |         def check_inflection(word1, word2):
153 |             returned_pages = Page.search(word1)
154 |             assert returned_pages
155 |             assert re.search(word2, returned_pages[0].content, re.IGNORECASE)
156 |         check_inflection('algorithm', 'algorithms')
157 |         check_inflection('python', 'pythonic')
158 |         check_inflection('rubies', 'ruby')
159 |         check_inflection('encrust', 'encrusted')
160 | 
161 | class TestBigIndex:
162 |     def setup(self):
163 |         clear_datastore()
164 | 
165 |     def test_multientity_index(self):
166 |         curdir = os.path.abspath(os.path.dirname(__file__))
167 |         bigtextfile = os.path.join(curdir, 'roget.txt')
168 |         import codecs
169 |         bigfile = codecs.open(bigtextfile, 'r', 'utf-8')
170 |         bigtext = bigfile.read()
171 |         words_to_use = 4 * search.MAX_ENTITY_SEARCH_PHRASES
172 |         words = bigtext.split()
173 |         Page.INDEX_USES_MULTI_ENTITIES = True
174 |         page = Page(key_name="Foo", content=' '.join(words[0:words_to_use]))
175 |         page.put()
176 |         page.index()
177 |         assert search.StemmedIndex.all().count() > 1
178 |         page = Page(key_name="Foo", content=INFLECTION_TEST)
179 |         page.put()
180 |         page.index()
181 |         assert search.StemmedIndex.all().count() == 1
182 | 
183 | class TestKeyOnlySearch:
184 |     def setup(self):
185 |         clear_datastore()
186 |         self.pages = [{
187 |             'key_name': 'test1',
188 |             'content': 'This post has no title at all.'
189 |         }, {
190 |             'key_name': 'test2',
191 |             'title': 'Second Post',
192 |             'content': 'This is some text for the second post.'
193 |         }, {
194 |             'key_name': 'test3',
195 |             'title': 'Third Post',
196 |             'content': 'This is some text for the third post.  The last post.'
197 |         }]
198 |         for page_dict in self.pages:
199 |             page = Page(**page_dict)
200 |             page.put()
201 |             page.index()
202 |         assert search.StemmedIndex.all().count() == 3
203 | 
204 |     def test_default_titling(self):
205 |         page_list = Page.search('no title', keys_only=True)
206 |         assert len(page_list) == 1
207 |         assert page_list[0][0].name() == 'test1'
208 |         assert page_list[0][1] == 'Page test1'  # Default titling
209 | 
210 |     def test_title_from_parent(self):
211 |         page_list = Page.search('last', keys_only=True)
212 |         assert len(page_list) == 1
213 |         assert page_list[0][0].name() == 'test3'
214 |         assert page_list[0][1] == 'Third Post'
215 | 
216 |     def test_title_change(self):
217 |         pages = Page.search('second post')
218 |         assert len(pages) == 1
219 |         page = pages[0]
220 |         page.title = 'My Great New Title'
221 |         old_key = page.put()
222 |         page.indexed_title_changed()
223 |         assert search.StemmedIndex.all().count() == 3
224 |         page_list = Page.search('second post', keys_only=True)
225 |         assert len(page_list) == 1
226 |         assert page_list[0][1] == 'My Great New Title'
227 |         assert page_list[0][0].id_or_name() == old_key.id_or_name()
228 | 
229 | class TestMultiWordSearch:
230 |     def setup(self):
231 |         clear_datastore()
232 |         page = Page(key_name='doetext', author_name='John Doe', 
233 |                     content=INFLECTION_TEST)
234 |         page.put()
235 |         page.index()
236 |         assert search.StemmedIndex.all().count() == 1
237 |         page = Page(key_name="statuetext", 
238 |                     author_name='Other Guy', content="""
239 |         This is the time for all good python programmers to check,
240 |         to test, to go forward and throw junk at the code, and in
241 |         so doing, try to find errors.
242 |           -- Unheralded inscription at base of Statue of Liberty
243 |         """)
244 |         page.put()
245 |         page.index()
246 |         assert search.StemmedIndex.all().count() == 2
247 |         page = Page(key_name="statuetext2", 
248 |                     author_name='Another Guy', content="""
249 |         I have seen a statue and it declares there should be
250 |         liberty in the world.
251 |         """)
252 |         page.put()
253 |         page.index()
254 |         assert search.StemmedIndex.all().count() == 3
255 | 
256 |     def test_multiword_search_order(self):
257 |         returned_pages = Page.search('statue of liberty')
258 |         assert len(returned_pages) == 2
259 |         print "Returned pages: %s" % [page.key().name() for page in returned_pages]
260 |         assert returned_pages[0].key().name() == u'statuetext'
261 |         assert returned_pages[1].key().name() == u'statuetext2'
262 | 
263 |     def test_multiword_search_fail(self):
264 |         returned_pages = Page.search('statue of liberty biggy word')
265 |         assert not returned_pages
266 |         
267 |     def test_multiword_search_and(self):
268 |         returned_pages = Page.search('statue of liberty python')
269 |         assert len(returned_pages) == 1
270 |         assert returned_pages[0].key().name() == u'statuetext'
271 | 
272 |     def test_two_word_search(self):
273 |         returned_pages = Page.search('ornately narrated')
274 |         assert len(returned_pages) == 1
275 |         assert returned_pages[0].key().name() == u'doetext'
276 | 
277 |      


--------------------------------------------------------------------------------