├── .gitignore ├── LICENSE ├── README ├── README.md ├── api.py ├── app.yaml.example ├── appengine_console.py ├── avatar.py ├── backstage.py ├── blog.py ├── config.py.example ├── css.py ├── data.py ├── docs └── zh-Hans │ └── Installation.md ├── favorite.py ├── feed.py ├── html5lib ├── __init__.py ├── constants.py ├── filters │ ├── __init__.py │ ├── _base.py │ ├── formfiller.py │ ├── inject_meta_charset.py │ ├── lint.py │ ├── optionaltags.py │ ├── sanitizer.py │ └── whitespace.py ├── html5parser.py ├── ihatexml.py ├── inputstream.py ├── sanitizer.py ├── serializer │ ├── __init__.py │ ├── htmlserializer.py │ └── xhtmlserializer.py ├── tests │ ├── __init__.py │ ├── mockParser.py │ ├── runparsertests.py │ ├── runtests.py │ ├── support.py │ ├── test_encoding.py │ ├── test_formfiller.py │ ├── test_parser.py │ ├── test_parser2.py │ ├── test_sanitizer.py │ ├── test_serializer.py │ ├── test_stream.py │ ├── test_tokenizer.py │ ├── test_treewalkers.py │ ├── test_whitespace_filter.py │ ├── testdata │ │ ├── encoding │ │ │ ├── test-yahoo-jp.dat │ │ │ ├── tests1.dat │ │ │ └── tests2.dat │ │ ├── sanitizer │ │ │ └── tests1.dat │ │ ├── serializer │ │ │ ├── core.test │ │ │ ├── injectmeta.test │ │ │ ├── optionaltags.test │ │ │ ├── options.test │ │ │ └── whitespace.test │ │ ├── sniffer │ │ │ └── htmlOrFeed.json │ │ ├── tokenizer │ │ │ ├── contentModelFlags.test │ │ │ ├── domjs.test │ │ │ ├── entities.test │ │ │ ├── escapeFlag.test │ │ │ ├── namedEntities.test │ │ │ ├── numericEntities.test │ │ │ ├── pendingSpecChanges.test │ │ │ ├── test1.test │ │ │ ├── test2.test │ │ │ ├── test3.test │ │ │ ├── test4.test │ │ │ ├── unicodeChars.test │ │ │ ├── unicodeCharsProblematic.test │ │ │ └── xmlViolation.test │ │ └── tree-construction │ │ │ ├── adoption01.dat │ │ │ ├── adoption02.dat │ │ │ ├── comments01.dat │ │ │ ├── doctype01.dat │ │ │ ├── domjs-unsafe.dat │ │ │ ├── entities01.dat │ │ │ ├── entities02.dat │ │ │ ├── html5test-com.dat │ │ │ ├── inbody01.dat │ │ │ ├── isindex.dat │ │ │ ├── pending-spec-changes-plain-text-unsafe.dat │ │ │ ├── pending-spec-changes.dat │ │ │ ├── plain-text-unsafe.dat │ │ │ ├── scriptdata01.dat │ │ │ ├── tables01.dat │ │ │ ├── tests1.dat │ │ │ ├── tests10.dat │ │ │ ├── tests11.dat │ │ │ ├── tests12.dat │ │ │ ├── tests14.dat │ │ │ ├── tests15.dat │ │ │ ├── tests16.dat │ │ │ ├── tests17.dat │ │ │ ├── tests18.dat │ │ │ ├── tests19.dat │ │ │ ├── tests2.dat │ │ │ ├── tests20.dat │ │ │ ├── tests21.dat │ │ │ ├── tests22.dat │ │ │ ├── tests23.dat │ │ │ ├── tests24.dat │ │ │ ├── tests25.dat │ │ │ ├── tests26.dat │ │ │ ├── tests3.dat │ │ │ ├── tests4.dat │ │ │ ├── tests5.dat │ │ │ ├── tests6.dat │ │ │ ├── tests7.dat │ │ │ ├── tests8.dat │ │ │ ├── tests9.dat │ │ │ ├── tests_innerHTML_1.dat │ │ │ ├── tricky01.dat │ │ │ ├── webkit01.dat │ │ │ └── webkit02.dat │ └── tokenizertotree.py ├── tokenizer.py ├── treebuilders │ ├── __init__.py │ ├── _base.py │ ├── dom.py │ ├── etree.py │ ├── etree_lxml.py │ ├── simpletree.py │ └── soup.py ├── treewalkers │ ├── __init__.py │ ├── _base.py │ ├── dom.py │ ├── etree.py │ ├── genshistream.py │ ├── lxmletree.py │ ├── pulldom.py │ ├── simpletree.py │ └── soup.py └── utils.py ├── images.py ├── index.yaml ├── mail.py ├── main.py ├── mapreduce.yaml ├── mapreduce ├── __init__.py ├── base_handler.py ├── context.py ├── control.py ├── handlers.py ├── input_readers.py ├── lib │ ├── __init__.py │ ├── blobstore │ │ ├── __init__.py │ │ └── blobstore.py │ ├── graphy │ │ ├── README │ │ ├── __init__.py │ │ ├── backends │ │ │ ├── __init__.py │ │ │ └── google_chart_api │ │ │ │ ├── __init__.py │ │ │ │ ├── encoders.py │ │ │ │ └── util.py │ │ ├── bar_chart.py │ │ ├── common.py │ │ ├── formatters.py │ │ ├── line_chart.py │ │ ├── pie_chart.py │ │ └── util.py │ ├── key_range │ │ └── __init__.py │ └── simplejson │ │ ├── README │ │ ├── __init__.py │ │ ├── decoder.py │ │ ├── encoder.py │ │ └── scanner.py ├── main.py ├── model.py ├── operation │ ├── __init__.py │ ├── counters.py │ └── db.py ├── quota.py ├── static │ ├── base.css │ ├── detail.html │ ├── jquery-1.4.2.min.js │ ├── overview.html │ └── status.js ├── status.py └── util.py ├── member.py ├── misc.py ├── money.py ├── my.py ├── notes.py ├── notifications.py ├── page.py ├── place.py ├── queue.py ├── sso.py ├── static ├── css │ ├── desktop │ │ ├── cheat.css │ │ └── style.css │ └── mobile │ │ └── style.css ├── img │ ├── app24.png │ ├── app48.png │ ├── apple-touch-icon.png │ ├── avatar_large.png │ ├── avatar_mini.png │ ├── avatar_normal.png │ ├── bg.jpg │ ├── bg.png │ ├── bg_apple.jpg │ ├── bg_blended.png │ ├── bg_grid.jpg │ ├── bg_grid.png │ ├── bg_grid_2.png │ ├── bg_light.png │ ├── bg_mobile_sponsored.png │ ├── bg_section.png │ ├── bg_top_black.png │ ├── bg_top_light.png │ ├── bird.png │ ├── btc.png │ ├── burn24.png │ ├── burn48.png │ ├── chat.png │ ├── cloud.png │ ├── cloud24.png │ ├── cloud48.png │ ├── cloudapp.png │ ├── dino24.png │ ├── dot.png │ ├── dot_gray.png │ ├── dot_green.png │ ├── dot_orange.png │ ├── dot_red.png │ ├── download.png │ ├── eject24.png │ ├── eject48.png │ ├── empty.png │ ├── favicon.ico │ ├── gear.png │ ├── gear24.png │ ├── gear48.png │ ├── github.png │ ├── google.png │ ├── heart.png │ ├── info24.png │ ├── info_128.png │ ├── linode.png │ ├── location.png │ ├── logo.png │ ├── logo_20110809.png │ ├── logo_2x.png │ ├── logo_livid.png │ ├── me24.png │ ├── mobileme.png │ ├── next.png │ ├── note24.png │ ├── note48.png │ ├── package32.png │ ├── promoted.png │ ├── psn.png │ ├── qbar.png │ ├── rackspacecloud.png │ ├── reply.png │ ├── rss.png │ ├── say.png │ ├── twitter.png │ ├── update48.png │ ├── util24.png │ └── write48.png └── js │ └── jquery.js ├── t.py ├── template.py ├── topic.py ├── tpl ├── api │ ├── currency.json │ ├── error.json │ ├── member.json │ ├── members_show.json │ ├── node.json │ ├── nodes_all.json │ ├── nodes_show.json │ ├── replies_show.json │ ├── site_info.json │ ├── site_stats.json │ ├── topic.txt │ ├── topics_latest.json │ └── topics_show.json ├── desktop │ ├── about.html │ ├── access_denied.html │ ├── advertise.html │ ├── advertisers.html │ ├── backstage_home.html │ ├── backstage_member.html │ ├── backstage_members.html │ ├── backstage_minisite.html │ ├── backstage_move_topic.html │ ├── backstage_new_minisite.html │ ├── backstage_new_page.html │ ├── backstage_node.html │ ├── backstage_page.html │ ├── backstage_section.html │ ├── backstage_site.html │ ├── backstage_topic.html │ ├── blog.html │ ├── blog_entry.html │ ├── changes.html │ ├── common │ │ ├── bottom.html │ │ ├── head.html │ │ ├── say.html │ │ ├── top.html │ │ ├── topics.html │ │ ├── topics_index.html │ │ ├── topics_node.html │ │ ├── topics_search.html │ │ └── tweet.html │ ├── edit_reply.html │ ├── edit_topic.html │ ├── faq.html │ ├── forgot.html │ ├── forgot_sent.html │ ├── images_home.html │ ├── images_rules.html │ ├── index.html │ ├── md5.html │ ├── member_home.html │ ├── member_not_found.html │ ├── member_settings.html │ ├── member_settings_avatar.html │ ├── member_settings_password.html │ ├── mission.html │ ├── money_dashboard.html │ ├── my_following.html │ ├── my_nodes.html │ ├── my_topics.html │ ├── new_topic.html │ ├── node.html │ ├── node_graph.html │ ├── node_not_found.html │ ├── notes_edit.html │ ├── notes_home.html │ ├── notes_item.html │ ├── notes_new.html │ ├── notifications.html │ ├── page.html │ ├── place.html │ ├── planes.html │ ├── recent.html │ ├── reset_password.html │ ├── reset_password_ok.html │ ├── rightbar │ │ ├── ad_sample.html │ │ ├── ads.html │ │ ├── ads_3cpeijian.html │ │ ├── ads_angelcrunch.html │ │ ├── ads_appletuan.html │ │ ├── ads_appletuan2.html │ │ ├── ads_iweekly_1.html │ │ ├── ads_iweekly_2.html │ │ ├── ads_jiepang.html │ │ ├── ads_lkdesign.html │ │ ├── ads_netease.html │ │ ├── ads_tangcha.html │ │ ├── ads_tisiwi.html │ │ ├── adsense.html │ │ ├── backstage_latest_members.html │ │ ├── backstage_section_nodes.html │ │ ├── backstage_siblings.html │ │ ├── backstage_site_permissions.html │ │ ├── goodies.html │ │ ├── guide │ │ │ ├── edit.html │ │ │ └── member_level.html │ │ ├── latest_nodes.html │ │ ├── new_topic_tips.html │ │ ├── notes_tip.html │ │ ├── recent_nodes.html │ │ ├── stats.html │ │ └── user.html │ ├── search.html │ ├── search_unavailable.html │ ├── signin.html │ ├── signout.html │ ├── signup.html │ ├── template.html │ ├── time.html │ ├── token_not_found.html │ ├── topic.html │ ├── topic_not_found.html │ ├── twitter_api_cheat_sheet.html │ ├── twitter_dm_inbox.html │ ├── twitter_home.html │ ├── twitter_mentions.html │ └── twitter_user.html ├── feed │ ├── index.xml │ ├── notifications.xml │ └── read.xml ├── mail │ └── reset_password.txt ├── mobile │ ├── ads │ │ ├── admob.html │ │ ├── adsense.html │ │ ├── iweekly.html │ │ ├── jiepang.html │ │ ├── netease.html │ │ ├── running.html │ │ └── tangcha.html │ ├── backstage_home.html │ ├── backstage_new_node.html │ ├── backstage_new_section.html │ ├── backstage_node.html │ ├── backstage_section.html │ ├── changes.html │ ├── common │ │ ├── ads.html │ │ ├── adsense.html │ │ ├── bottom.html │ │ ├── head.html │ │ └── top.html │ ├── edit_topic.html │ ├── index.html │ ├── member_home.html │ ├── member_settings.html │ ├── member_settings_avatar.html │ ├── member_settings_password.html │ ├── new_topic.html │ ├── node.html │ ├── node_not_found.html │ ├── notes_edit.html │ ├── notes_home.html │ ├── notes_item.html │ ├── notes_new.html │ ├── notifications.html │ ├── recent.html │ ├── signin.html │ ├── signout.html │ ├── signup.html │ ├── topic.html │ ├── topic_not_found.html │ └── ua.html ├── portion │ ├── home.html │ ├── home_mobile.html │ ├── topic_content.html │ ├── topic_replies.html │ └── topic_replies_mobile.html └── themes │ ├── default │ └── style.css │ ├── fluid │ └── style.css │ └── purple │ └── style.css ├── twitter ├── __init__.py ├── bitly.py ├── oauth.py ├── oauthtwitter.py └── twitter.py ├── txt └── v2ex_home_top.txt ├── v2ex ├── __init__.py ├── babel │ ├── __init__.py │ ├── da │ │ └── __init__.py │ ├── ext │ │ ├── __init__.py │ │ ├── bleach │ │ │ ├── __init__.py │ │ │ ├── encoding.py │ │ │ └── sanitizer.py │ │ ├── captcha.py │ │ ├── cookies.py │ │ ├── sessions.py │ │ └── upyun.py │ ├── handlers │ │ └── __init__.py │ ├── l10n │ │ ├── __init__.py │ │ └── messages │ │ │ ├── __init__.py │ │ │ ├── en.py │ │ │ ├── zhHans.py │ │ │ └── zhHant.py │ ├── security │ │ └── __init__.py │ └── ua │ │ └── __init__.py └── templatetags │ ├── __init__.py │ └── filters.py └── xmpp.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.pyc 3 | *.pyo 4 | .bzr 5 | .bzrignore 6 | psd 7 | config.py 8 | app.yaml 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010, Xin Liu 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | * Neither the name of the nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 9 | 10 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Project Babel 2 | 3 | Project Babel is a software package for running a community on Google App Engine. It's written in Python and released under very liberal BSD license. You can use it, modify it or even sell it whatever way you want. 4 | 5 | Features: 6 | 7 | - Topics are organized under Nodes (Discussion Areas), you can have many Nodes in one community 8 | - Nodes can have header, foot and category property, or organized under Sections 9 | - Two clean themes: one for desktop browser, another for iOS device and Android 10 | - Optimized for modern browsers 11 | - Built-in WebDAV avatar facility, you can host all avatars with MobileMe or other WebDAV servers 12 | - Atom feed output 13 | - HTML5 14 | - Built-in MapReduce tasks for optimizing community data 15 | - Built-in OAuth Twitter client for tweeting and syncing topics/replies 16 | - Built-in Notes feature 17 | - Gravatar support -------------------------------------------------------------------------------- /appengine_console.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import code 3 | import getpass 4 | import sys 5 | 6 | sys.path.append("/Applications/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine") 7 | sys.path.append("/Applications/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/yaml/lib") 8 | 9 | from google.appengine.ext.remote_api import remote_api_stub 10 | from google.appengine.ext import db 11 | 12 | def auth_func(): 13 | return raw_input('Username:'), getpass.getpass('Password:') 14 | 15 | if len(sys.argv) < 2: 16 | print "Usage: %s app_id [host]" % (sys.argv[0],) 17 | app_id = sys.argv[1] 18 | if len(sys.argv) > 2: 19 | host = sys.argv[2] 20 | else: 21 | host = '%s.appspot.com' % app_id 22 | 23 | remote_api_stub.ConfigureRemoteDatastore(app_id, '/remote_api', auth_func, host) 24 | 25 | code.interact('App Engine interactive console for %s' % (app_id,), None, locals()) -------------------------------------------------------------------------------- /avatar.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | from google.appengine.ext import webapp 5 | from google.appengine.api import memcache 6 | from google.appengine.ext import db 7 | from google.appengine.ext.webapp import util 8 | 9 | from v2ex.babel import Avatar 10 | 11 | from v2ex.babel.security import * 12 | from v2ex.babel.da import * 13 | 14 | class AvatarHandler(webapp.RequestHandler): 15 | def get(self, member_num, size): 16 | avatar = GetKindByName('Avatar', 'avatar_' + str(member_num) + '_' + str(size)) 17 | if avatar: 18 | self.response.headers['Content-Type'] = "image/png" 19 | self.response.headers['Cache-Control'] = "max-age=172800, public, must-revalidate" 20 | self.response.headers['Expires'] = "Sun, 25 Apr 2011 20:00:00 GMT" 21 | self.response.out.write(avatar.content) 22 | else: 23 | self.redirect('/static/img/avatar_' + str(size) + '.png') 24 | 25 | class NodeAvatarHandler(webapp.RequestHandler): 26 | def get(self, node_num, size): 27 | avatar = GetKindByName('Avatar', 'node_' + str(node_num) + '_' + str(size)) 28 | if avatar: 29 | self.response.headers['Content-Type'] = "image/png" 30 | self.response.headers['Cache-Control'] = "max-age=172800, public, must-revalidate" 31 | self.response.headers['Expires'] = "Sun, 25 Apr 2011 20:00:00 GMT" 32 | self.response.out.write(avatar.content) 33 | else: 34 | self.error(404) 35 | 36 | def main(): 37 | application = webapp.WSGIApplication([ 38 | ('/avatar/([0-9]+)/(large|normal|mini)', AvatarHandler), 39 | ('/navatar/([0-9]+)/(large|normal|mini)', NodeAvatarHandler) 40 | ], 41 | debug=True) 42 | util.run_wsgi_app(application) 43 | 44 | 45 | if __name__ == '__main__': 46 | main() 47 | -------------------------------------------------------------------------------- /config.py.example: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | mobileme_enabled = False 4 | mobileme_username = '' 5 | mobileme_password = '' 6 | 7 | if os.environ['SERVER_SOFTWARE'] == 'Development/1.0': 8 | twitter_consumer_key = '' 9 | twitter_consumer_secret = '' 10 | else: 11 | twitter_consumer_key = '' 12 | twitter_consumer_secret = '' 13 | 14 | fts_enabled = False 15 | fts_server = '' 16 | fts_username = '' 17 | fts_password = '' 18 | 19 | # change this for deploy is you can registration from http://www.google.com/recaptcha 20 | recaptcha_public_key = 'your recaptcha_public_key' 21 | recaptcha_private_key = 'your recaptcha_private_key' 22 | 23 | daydream_secret = '' 24 | 25 | site_key = '' -------------------------------------------------------------------------------- /css.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | import os 5 | import datetime 6 | 7 | from google.appengine.ext import webapp 8 | from google.appengine.api import memcache 9 | from google.appengine.ext import db 10 | from google.appengine.ext.webapp import util 11 | from google.appengine.ext.webapp import template 12 | 13 | from v2ex.babel import SYSTEM_VERSION 14 | 15 | template.register_template_library('v2ex.templatetags.filters') 16 | 17 | class CSSHandler(webapp.RequestHandler): 18 | def get(self, theme): 19 | template_values = {} 20 | themes = os.listdir(os.path.join(os.path.dirname(__file__), 'tpl', 'themes')) 21 | if theme in themes: 22 | path = os.path.join(os.path.dirname(__file__), 'tpl', 'themes', theme, 'style.css') 23 | else: 24 | path = os.path.join(os.path.dirname(__file__), 'tpl', 'themes', 'default', 'style.css') 25 | output = template.render(path, template_values) 26 | expires_date = datetime.datetime.utcnow() + datetime.timedelta(days=7) 27 | expires_str = expires_date.strftime("%d %b %Y %H:%M:%S GMT") 28 | self.response.headers.add_header("Expires", expires_str) 29 | self.response.headers['Cache-Control'] = 'max-age=120, must-revalidate' 30 | self.response.headers['Content-type'] = 'text/css;charset=UTF-8' 31 | self.response.out.write(output) 32 | 33 | def main(): 34 | application = webapp.WSGIApplication([ 35 | ('/css/([a-zA-Z0-9]+).css', CSSHandler) 36 | ], 37 | debug=True) 38 | util.run_wsgi_app(application) 39 | 40 | 41 | if __name__ == '__main__': 42 | main() -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | import os 5 | import re 6 | import time 7 | import datetime 8 | import hashlib 9 | import urllib 10 | import string 11 | import random 12 | 13 | from google.appengine.ext import webapp 14 | from google.appengine.api import memcache 15 | from google.appengine.api import urlfetch 16 | from google.appengine.ext import db 17 | from google.appengine.ext.webapp import util 18 | from google.appengine.ext.webapp import template 19 | 20 | from v2ex.babel import Member 21 | from v2ex.babel import Counter 22 | from v2ex.babel import Section 23 | from v2ex.babel import Node 24 | from v2ex.babel import Topic 25 | from v2ex.babel import Reply 26 | 27 | from v2ex.babel import SYSTEM_VERSION 28 | 29 | from v2ex.babel.security import * 30 | from v2ex.babel.ua import * 31 | from v2ex.babel.da import * 32 | from v2ex.babel.ext.cookies import Cookies 33 | from v2ex.babel.ext.sessions import Session 34 | 35 | from django.utils import simplejson as json 36 | 37 | from mapreduce import operation as op 38 | 39 | def tidy_node(entity): 40 | # Recalculate exact topics counter 41 | q = db.GqlQuery("SELECT __key__ FROM Topic WHERE node_num = :1", entity.num) 42 | entity.topics = q.count() 43 | memcache.set('Node_' + str(entity.num), entity, 86400) 44 | memcache.set('Node::' + entity.name, entity, 86400) 45 | yield op.db.Put(entity) 46 | 47 | def tidy_topic(entity): 48 | # Recalculate exact replies counter 49 | q = db.GqlQuery("SELECT __key__ FROM Reply WHERE topic_num = :1", entity.num) 50 | entity.replies = q.count() 51 | # Ensure member field is correctly set 52 | q2 = db.GqlQuery("SELECT * FROM Member WHERE num = :1", entity.member_num) 53 | if q2.count() == 1: 54 | entity.member = q2[0] 55 | memcache.set('Topic_' + str(entity.num), entity, 86400) 56 | yield op.db.Put(entity) 57 | 58 | def tidy_reply(entity): 59 | # Ensure member field is correctly set 60 | q = db.GqlQuery("SELECT * FROM Member WHERE num = :1", entity.member_num) 61 | if q.count() == 1: 62 | entity.member = q[0] 63 | # Ensure topic field is correctly set 64 | q2 = db.GqlQuery("SELECT * FROM Topic WHERE num = :1", entity.topic_num) 65 | if q2.count() == 1: 66 | entity.topic = q2[0] 67 | yield op.db.Put(entity) -------------------------------------------------------------------------------- /docs/zh-Hans/Installation.md: -------------------------------------------------------------------------------- 1 | # Project Babel 2 安装文档 2 | 3 | [Project Babel 2](http://github.com/livid/v2ex) 是一个用 [Python](http://www.python.org) 语言写成的运行在 [Google App Engine](http://code.google.com/appengine) 云计算环境中的社区软件,本文详细描述最新版本的 Project Babel 2 的安装过程。 4 | 5 | ## 在 Google App Engine 注册你的应用 6 | 7 | Project Babel 2 需要运行在 Google App Engine 的云计算环境中,因此你首先需要在 [Google App Engine](http://code.google.com/appengine) 网站注册自己的 Application ID。 8 | 9 | 第一次注册时会需要通过 Google 的手机验证,请填入你的手机号码并加入国家代码即可,比如: 10 | 11 | +8613901012345 12 | 13 | 通过手机验证之后,即可开始注册自己的 Application ID。Application ID 即网址中 .appspot.com 前面的那串字母及数字,比如在下面的例子中,Application ID 即是 v2ex: 14 | 15 | v2ex.appspot.com 16 | 17 | ## 使用 git 获取最新源代码 18 | 19 | 请首先确保系统上安装有 git,Mac OS X 用户可以通过 [MacPorts](http://www.macports.org/) 获得 git: 20 | 21 | sudo port install git-core 22 | 23 | 安装 git 之后,运行以下指令获得最新版本的 Project Babel 2 源代码: 24 | 25 | git clone git://github.com/livid/v2ex.git v2ex 26 | 27 | 之后你需要将获得的那个目录更改为自己的 Application ID。然后将其中的 app.yaml.example 复制为 app.yaml,将其中的 application: 后面的字符串同样更改为自己的 Application ID。 28 | 29 | 之后,你需要将 config.py.example 复制为 config.py 并做一些必要的修改。比如如果你需要用到 Project Babel 2 内置的 Twitter 客户端,那么你就需要在 config.py 中填入你在 [Twitter](http://twitter.com) 网站上申请的 OAuth Consumer Key 和 Secret。 30 | 31 | 为了防止恶意注册,Project Babel 2 还使用了 [reCAPTCHA](http://www.google.com/recaptcha),因此你同样需要在 config.py 填入你自己的 reCAPTCHA 信息。 32 | 33 | ## 使用 Google App Engine Launcher 进行部署 34 | 35 | [Google App Engine Launcher](http://code.google.com/appengine/downloads.html) 是 Google 官方的 App Engine 部署工具,可以非常方便的用于上传和更新自己的 Project Babel 2。该工具需要系统安装有 Python 运行环境,如果你的系统里还没有 Python,Windows 用户建议安装 [ActivePython](http://www.activestate.com/activepython)。 36 | 37 | 安装好 Google App Engine Launcher 后,选择 Add Existing Application,然后选择之前通过 git 获得的那个目录,然后点击蓝色的 Deploy 按钮,即可完成部署。 38 | 39 | 第一次部署结束后,将需要等待 Google 完成数据库索引,之后网站才可访问。 40 | 41 | ## 更多资源 42 | 43 | 如果你在使用 Project Babel 2 的过程中遇到任何问题,欢迎到官方讨论区探讨: 44 | 45 | [http://v2ex.appspot.com/go/babel](http://v2ex.appspot.com/go/babel) -------------------------------------------------------------------------------- /html5lib/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | HTML parsing library based on the WHATWG "HTML5" 3 | specification. The parser is designed to be compatible with existing 4 | HTML found in the wild and implements well-defined error recovery that 5 | is largely compatible with modern desktop web browsers. 6 | 7 | Example usage: 8 | 9 | import html5lib 10 | f = open("my_document.html") 11 | tree = html5lib.parse(f) 12 | """ 13 | __version__ = "0.95-dev" 14 | from html5parser import HTMLParser, parse, parseFragment 15 | from treebuilders import getTreeBuilder 16 | from treewalkers import getTreeWalker 17 | from serializer import serialize 18 | -------------------------------------------------------------------------------- /html5lib/filters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/livid/v2ex-gae/32be3a77d535e7c9df85a333e01ab8834d0e8581/html5lib/filters/__init__.py -------------------------------------------------------------------------------- /html5lib/filters/_base.py: -------------------------------------------------------------------------------- 1 | 2 | class Filter(object): 3 | def __init__(self, source): 4 | self.source = source 5 | 6 | def __iter__(self): 7 | return iter(self.source) 8 | 9 | def __getattr__(self, name): 10 | return getattr(self.source, name) 11 | -------------------------------------------------------------------------------- /html5lib/filters/sanitizer.py: -------------------------------------------------------------------------------- 1 | import _base 2 | from html5lib.sanitizer import HTMLSanitizerMixin 3 | 4 | class Filter(_base.Filter, HTMLSanitizerMixin): 5 | def __iter__(self): 6 | for token in _base.Filter.__iter__(self): 7 | token = self.sanitize_token(token) 8 | if token: yield token 9 | -------------------------------------------------------------------------------- /html5lib/filters/whitespace.py: -------------------------------------------------------------------------------- 1 | try: 2 | frozenset 3 | except NameError: 4 | # Import from the sets module for python 2.3 5 | from sets import ImmutableSet as frozenset 6 | 7 | import re 8 | 9 | import _base 10 | from html5lib.constants import rcdataElements, spaceCharacters 11 | spaceCharacters = u"".join(spaceCharacters) 12 | 13 | SPACES_REGEX = re.compile(u"[%s]+" % spaceCharacters) 14 | 15 | class Filter(_base.Filter): 16 | 17 | spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements)) 18 | 19 | def __iter__(self): 20 | preserve = 0 21 | for token in _base.Filter.__iter__(self): 22 | type = token["type"] 23 | if type == "StartTag" \ 24 | and (preserve or token["name"] in self.spacePreserveElements): 25 | preserve += 1 26 | 27 | elif type == "EndTag" and preserve: 28 | preserve -= 1 29 | 30 | elif not preserve and type == "SpaceCharacters" and token["data"]: 31 | # Test on token["data"] above to not introduce spaces where there were not 32 | token["data"] = u" " 33 | 34 | elif not preserve and type == "Characters": 35 | token["data"] = collapse_spaces(token["data"]) 36 | 37 | yield token 38 | 39 | def collapse_spaces(text): 40 | return SPACES_REGEX.sub(' ', text) 41 | 42 | -------------------------------------------------------------------------------- /html5lib/serializer/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from html5lib import treewalkers 3 | 4 | from htmlserializer import HTMLSerializer 5 | from xhtmlserializer import XHTMLSerializer 6 | 7 | def serialize(input, tree="simpletree", format="html", encoding=None, 8 | **serializer_opts): 9 | # XXX: Should we cache this? 10 | walker = treewalkers.getTreeWalker(tree) 11 | if format == "html": 12 | s = HTMLSerializer(**serializer_opts) 13 | elif format == "xhtml": 14 | s = XHTMLSerializer(**serializer_opts) 15 | else: 16 | raise ValueError, "type must be either html or xhtml" 17 | return s.render(walker(input), encoding) 18 | -------------------------------------------------------------------------------- /html5lib/serializer/xhtmlserializer.py: -------------------------------------------------------------------------------- 1 | from htmlserializer import HTMLSerializer 2 | 3 | class XHTMLSerializer(HTMLSerializer): 4 | quote_attr_values = True 5 | minimize_boolean_attributes = False 6 | use_trailing_solidus = True 7 | escape_lt_in_attrs = True 8 | omit_optional_tags = False 9 | escape_rcdata = True 10 | -------------------------------------------------------------------------------- /html5lib/tests/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | parent_path = os.path.abspath(os.path.join(os.path.split(__file__)[0], "..")) 5 | 6 | if not parent_path in sys.path: 7 | sys.path.insert(0, parent_path) 8 | del parent_path 9 | 10 | from runtests import buildTestSuite 11 | 12 | import support 13 | -------------------------------------------------------------------------------- /html5lib/tests/mockParser.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | if __name__ == '__main__': 5 | #Allow us to import from the src directory 6 | os.chdir(os.path.split(os.path.abspath(__file__))[0]) 7 | sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src"))) 8 | 9 | from tokenizer import HTMLTokenizer 10 | 11 | class HTMLParser(object): 12 | """ Fake parser to test tokenizer output """ 13 | def parse(self, stream, output=True): 14 | tokenizer = HTMLTokenizer(stream) 15 | for token in tokenizer: 16 | if output: 17 | print token 18 | 19 | if __name__ == "__main__": 20 | x = HTMLParser() 21 | if len(sys.argv) > 1: 22 | if len(sys.argv) > 2: 23 | import hotshot, hotshot.stats 24 | prof = hotshot.Profile('stats.prof') 25 | prof.runcall(x.parse, sys.argv[1], False) 26 | prof.close() 27 | stats = hotshot.stats.load('stats.prof') 28 | stats.strip_dirs() 29 | stats.sort_stats('time') 30 | stats.print_stats() 31 | else: 32 | x.parse(sys.argv[1]) 33 | else: 34 | print """Usage: python mockParser.py filename [stats] 35 | If stats is specified the hotshots profiler will run and output the 36 | stats instead. 37 | """ 38 | -------------------------------------------------------------------------------- /html5lib/tests/runparsertests.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import glob 4 | import unittest 5 | 6 | #Allow us to import the parent module 7 | os.chdir(os.path.split(os.path.abspath(__file__))[0]) 8 | sys.path.insert(0, os.path.abspath(os.curdir)) 9 | sys.path.insert(0, os.path.abspath(os.pardir)) 10 | sys.path.insert(0, os.path.join(os.path.abspath(os.pardir), "src")) 11 | 12 | def buildTestSuite(): 13 | suite = unittest.TestSuite() 14 | for testcase in glob.glob('test_*.py'): 15 | if testcase in ("test_tokenizer.py", "test_parser.py", "test_parser2.py"): 16 | module = os.path.splitext(testcase)[0] 17 | suite.addTest(__import__(module).buildTestSuite()) 18 | return suite 19 | 20 | def main(): 21 | results = unittest.TextTestRunner().run(buildTestSuite()) 22 | return results 23 | 24 | if __name__ == "__main__": 25 | results = main() 26 | if not results.wasSuccessful(): 27 | sys.exit(1) 28 | -------------------------------------------------------------------------------- /html5lib/tests/runtests.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import glob 4 | import unittest 5 | 6 | def buildTestSuite(): 7 | suite = unittest.TestSuite() 8 | for testcase in glob.glob('test_*.py'): 9 | module = os.path.splitext(testcase)[0] 10 | suite.addTest(__import__(module).buildTestSuite()) 11 | return suite 12 | 13 | def main(): 14 | results = unittest.TextTestRunner().run(buildTestSuite()) 15 | return results 16 | 17 | if __name__ == "__main__": 18 | results = main() 19 | if not results.wasSuccessful(): 20 | sys.exit(1) 21 | -------------------------------------------------------------------------------- /html5lib/tests/test_encoding.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from support import html5lib_test_files, TestData, test_dir 4 | 5 | from html5lib import HTMLParser, inputstream 6 | 7 | import re, unittest 8 | 9 | class Html5EncodingTestCase(unittest.TestCase): 10 | def test_codec_name(self): 11 | self.assertEquals(inputstream.codecName("utf-8"), "utf-8") 12 | self.assertEquals(inputstream.codecName("utf8"), "utf-8") 13 | self.assertEquals(inputstream.codecName(" utf8 "), "utf-8") 14 | self.assertEquals(inputstream.codecName("ISO_8859--1"), "windows-1252") 15 | 16 | def buildTestSuite(): 17 | for filename in html5lib_test_files("encoding"): 18 | test_name = os.path.basename(filename).replace('.dat',''). \ 19 | replace('-','') 20 | tests = TestData(filename, "data") 21 | for idx, test in enumerate(tests): 22 | def encodingTest(self, data=test['data'], 23 | encoding=test['encoding']): 24 | p = HTMLParser() 25 | t = p.parse(data, useChardet=False) 26 | 27 | errorMessage = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n"% 28 | (data, repr(encoding.lower()), 29 | repr(p.tokenizer.stream.charEncoding))) 30 | self.assertEquals(encoding.lower(), 31 | p.tokenizer.stream.charEncoding[0], 32 | errorMessage) 33 | setattr(Html5EncodingTestCase, 'test_%s_%d' % (test_name, idx+1), 34 | encodingTest) 35 | 36 | try: 37 | import chardet 38 | def test_chardet(self): 39 | data = open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt")).read() 40 | encoding = inputstream.HTMLInputStream(data).charEncoding 41 | assert encoding[0].lower() == "big5" 42 | setattr(Html5EncodingTestCase, 'test_chardet', test_chardet) 43 | except ImportError: 44 | print "chardet not found, skipping chardet tests" 45 | 46 | 47 | return unittest.defaultTestLoader.loadTestsFromName(__name__) 48 | 49 | def main(): 50 | buildTestSuite() 51 | unittest.main() 52 | 53 | if __name__ == "__main__": 54 | main() 55 | -------------------------------------------------------------------------------- /html5lib/tests/test_parser2.py: -------------------------------------------------------------------------------- 1 | import support 2 | from html5lib import html5parser 3 | from html5lib.constants import namespaces 4 | from html5lib.treebuilders import dom 5 | 6 | import unittest 7 | 8 | # tests that aren't autogenerated from text files 9 | class MoreParserTests(unittest.TestCase): 10 | 11 | def test_assertDoctypeCloneable(self): 12 | parser = html5parser.HTMLParser(tree=dom.TreeBuilder) 13 | doc = parser.parse('') 14 | self.assert_(doc.cloneNode(True)) 15 | 16 | def test_line_counter(self): 17 | # http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0 18 | parser = html5parser.HTMLParser(tree=dom.TreeBuilder) 19 | parser.parse("
\nx\n>\n
") 20 | 21 | def test_namespace_html_elements_0(self): 22 | parser = html5parser.HTMLParser(namespaceHTMLElements=True) 23 | doc = parser.parse("") 24 | self.assert_(doc.childNodes[0].namespace == namespaces["html"]) 25 | 26 | def test_namespace_html_elements_1(self): 27 | parser = html5parser.HTMLParser(namespaceHTMLElements=False) 28 | doc = parser.parse("") 29 | self.assert_(doc.childNodes[0].namespace == None) 30 | 31 | def buildTestSuite(): 32 | return unittest.defaultTestLoader.loadTestsFromName(__name__) 33 | 34 | def main(): 35 | buildTestSuite() 36 | unittest.main() 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /html5lib/tests/testdata/encoding/test-yahoo-jp.dat: -------------------------------------------------------------------------------- 1 | #data 2 | 3 | 4 | 5 | 6 | Yahoo! JAPAN 7 | 8 |
20 | #errors 21 | #document 22 | | 23 | | 24 | | 25 | | 26 | |
27 | | 28 | |