├── .project ├── simsearch ├── experiments │ ├── __init__.py │ ├── evaluate_paths.py │ ├── check_connectivity.py │ ├── simulate_accessibility.py │ └── simulate_search.py ├── static │ ├── img │ │ ├── magnifier.png │ │ ├── ajax-loader.gif │ │ ├── lookup_back_hover.png │ │ ├── lookup_back_static.png │ │ ├── lookup_reset_hover.png │ │ ├── lookup_forward_hover.png │ │ ├── lookup_reset_static.png │ │ └── lookup_forward_static.png │ ├── css │ │ ├── blueprint │ │ │ ├── src │ │ │ │ ├── grid.png │ │ │ │ ├── reset.css │ │ │ │ ├── print.css │ │ │ │ ├── forms.css │ │ │ │ ├── ie.css │ │ │ │ ├── typography.css │ │ │ │ └── grid.css │ │ │ ├── plugins │ │ │ │ ├── buttons │ │ │ │ │ ├── icons │ │ │ │ │ │ ├── key.png │ │ │ │ │ │ ├── cross.png │ │ │ │ │ │ └── tick.png │ │ │ │ │ ├── readme.txt │ │ │ │ │ └── screen.css │ │ │ │ ├── link-icons │ │ │ │ │ ├── icons │ │ │ │ │ │ ├── doc.png │ │ │ │ │ │ ├── im.png │ │ │ │ │ │ ├── pdf.png │ │ │ │ │ │ ├── xls.png │ │ │ │ │ │ ├── email.png │ │ │ │ │ │ ├── feed.png │ │ │ │ │ │ ├── external.png │ │ │ │ │ │ └── visited.png │ │ │ │ │ ├── readme.txt │ │ │ │ │ └── screen.css │ │ │ │ ├── rtl │ │ │ │ │ ├── readme.txt │ │ │ │ │ └── screen.css │ │ │ │ └── fancy-type │ │ │ │ │ ├── readme.txt │ │ │ │ │ └── screen.css │ │ │ ├── print.css │ │ │ ├── ie.css │ │ │ └── screen.css │ │ ├── static.css │ │ ├── lookup.css │ │ └── common.css │ └── js │ │ ├── jquery.sizes.min.js │ │ ├── search.js │ │ └── raphael-min.js ├── data │ └── jp_char_corpus_counts.gz ├── templates │ ├── 404.html │ ├── 500.html │ ├── static │ │ ├── base.html │ │ ├── feedback.html │ │ ├── about.html │ │ └── help.html │ ├── search │ │ ├── index.html │ │ └── display.html │ ├── base.html │ └── translate │ │ └── kanji.html ├── SConscript ├── views.py ├── urls.py ├── context.py ├── settings.py ├── heap_cache.py ├── stroke.pyx ├── __init__.py └── models.py ├── MANIFEST.in ├── simsearch.py ├── .gitignore ├── requirements.txt ├── .hgignore ├── Makefile ├── setup.py ├── README.rst └── SConstruct /.project: -------------------------------------------------------------------------------- 1 | name = simsearch 2 | -------------------------------------------------------------------------------- /simsearch/experiments/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | graft simsearch/data 2 | -------------------------------------------------------------------------------- /simsearch/static/img/magnifier.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/img/magnifier.png -------------------------------------------------------------------------------- /simsearch/static/img/ajax-loader.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/img/ajax-loader.gif -------------------------------------------------------------------------------- /simsearch/data/jp_char_corpus_counts.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/data/jp_char_corpus_counts.gz -------------------------------------------------------------------------------- /simsearch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | if __name__ == '__main__': 4 | from simsearch import app 5 | app.run(debug=True) 6 | 7 | -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/src/grid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/css/blueprint/src/grid.png -------------------------------------------------------------------------------- /simsearch/static/img/lookup_back_hover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/img/lookup_back_hover.png -------------------------------------------------------------------------------- /simsearch/static/img/lookup_back_static.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/img/lookup_back_static.png -------------------------------------------------------------------------------- /simsearch/static/img/lookup_reset_hover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/img/lookup_reset_hover.png -------------------------------------------------------------------------------- /simsearch/static/img/lookup_forward_hover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/img/lookup_forward_hover.png -------------------------------------------------------------------------------- /simsearch/static/img/lookup_reset_static.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/img/lookup_reset_static.png -------------------------------------------------------------------------------- /simsearch/static/img/lookup_forward_static.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/img/lookup_forward_static.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | build/ 3 | simsearch.egg-info/ 4 | simsearch/stroke.c 5 | simsearch/stroke.so 6 | *.pyo 7 | *.pyc 8 | .simsearch-installed 9 | .models-created 10 | -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/plugins/buttons/icons/key.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/css/blueprint/plugins/buttons/icons/key.png -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/plugins/buttons/icons/cross.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/css/blueprint/plugins/buttons/icons/cross.png -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/plugins/buttons/icons/tick.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/css/blueprint/plugins/buttons/icons/tick.png -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/plugins/link-icons/icons/doc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/css/blueprint/plugins/link-icons/icons/doc.png -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/plugins/link-icons/icons/im.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/css/blueprint/plugins/link-icons/icons/im.png -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/plugins/link-icons/icons/pdf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/css/blueprint/plugins/link-icons/icons/pdf.png -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/plugins/link-icons/icons/xls.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/css/blueprint/plugins/link-icons/icons/xls.png -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/plugins/link-icons/icons/email.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/css/blueprint/plugins/link-icons/icons/email.png -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/plugins/link-icons/icons/feed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/css/blueprint/plugins/link-icons/icons/feed.png -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/plugins/link-icons/icons/external.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/css/blueprint/plugins/link-icons/icons/external.png -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/plugins/link-icons/icons/visited.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsyencken/simsearch/HEAD/simsearch/static/css/blueprint/plugins/link-icons/icons/visited.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cjktools>=1.5.0 2 | cjktools-data>=0.2.1-2010-07-29 3 | consoleLog>=0.2.4 4 | simplestats>=0.2.0 5 | pymongo 6 | mongoengine>=0.3 7 | pyyaml 8 | nltk 9 | mercurial 10 | flask 11 | simplejson 12 | cython 13 | -------------------------------------------------------------------------------- /.hgignore: -------------------------------------------------------------------------------- 1 | syntax: glob 2 | *.pyc 3 | *.pyo 4 | *.swp 5 | *.orig 6 | *-env 7 | local_settings.py 8 | simsearch/stroke.{so,os,c} 9 | *.log 10 | distribute*.tar.gz 11 | .scon* 12 | .DS_Store 13 | *.paths 14 | *.csv 15 | tags 16 | build 17 | dist 18 | *.egg-info 19 | -------------------------------------------------------------------------------- /simsearch/templates/404.html: -------------------------------------------------------------------------------- 1 | {% extends "static/base.html" %} 2 | 3 | {% block inner_content %} 4 |

Page not found

5 |

Hi there! I'm not sure which page you were looking for, but we don't seem 6 | to have it. If you're lost, search page 7 | is always a good place to start.

8 | {% endblock %} 9 | -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/plugins/rtl/readme.txt: -------------------------------------------------------------------------------- 1 | RTL 2 | * Mirrors Blueprint, so it can be used with Right-to-Left languages. 3 | 4 | By Ran Yaniv Hartstein, ranh.co.il 5 | 6 | Usage 7 | ---------------------------------------------------------------- 8 | 9 | 1) Add this line to your HTML: 10 | 11 | -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/plugins/fancy-type/readme.txt: -------------------------------------------------------------------------------- 1 | Fancy Type 2 | 3 | * Gives you classes to use if you'd like some 4 | extra fancy typography. 5 | 6 | Credits and instructions are specified above each class 7 | in the fancy-type.css file in this directory. 8 | 9 | 10 | Usage 11 | ---------------------------------------------------------------- 12 | 13 | 1) Add this plugin to lib/settings.yml. 14 | See compress.rb for instructions. 15 | -------------------------------------------------------------------------------- /simsearch/templates/500.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | An error occurred 5 | 6 | 7 |

Sorry! An error occurred

8 |

Terribly sorry, the site had an internal error of some sort. If this keeps happening, please file a bug on our bitbucket page.

9 | 10 | 11 | -------------------------------------------------------------------------------- /simsearch/templates/static/base.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block headers %} 4 | 6 | {% block sub_headers %}{% endblock %} 7 | {% endblock %} 8 | 9 | {% block content %} 10 |
11 |
12 |
13 | {% block inner_content %} 14 | {% endblock %} 15 |
16 |
17 | {% endblock %} 18 | -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/plugins/link-icons/readme.txt: -------------------------------------------------------------------------------- 1 | Link Icons 2 | * Icons for links based on protocol or file type. 3 | 4 | This is not supported in IE versions < 7. 5 | 6 | 7 | Credits 8 | ---------------------------------------------------------------- 9 | 10 | * Marc Morgan 11 | * Olav Bjorkoy [bjorkoy.com] 12 | 13 | 14 | Usage 15 | ---------------------------------------------------------------- 16 | 17 | 1) Add this line to your HTML: 18 | 19 | -------------------------------------------------------------------------------- /simsearch/static/css/static.css: -------------------------------------------------------------------------------- 1 | /* 2 | * static.css 3 | */ 4 | 5 | .container { width: 550px; } 6 | #pivot { font-size: 200px; vertical-align: middle; text-align: right; } 7 | .readings > tbody > tr > th { text-align: right; vertical-align: top; } 8 | td, th, p, h1, h2, h3 { font: 300 "Helvetica Neue", Helvetica, "Arial Unicode MS", 9 | Arial, sans-serif; } 10 | p, li, td, th { font-size: 1.2em } 11 | p { margin-left: 2em; } 12 | h3 { font-size: 1.3em; margin-bottom: 0.3em; } 13 | h2 { font-size: 2em; margin-bottom: 0.2em; } 14 | 15 | a { text-decoration: none; } 16 | a:hover { text-decoration: underline; } 17 | -------------------------------------------------------------------------------- /simsearch/templates/static/feedback.html: -------------------------------------------------------------------------------- 1 | {% extends "static/base.html" %} 2 | 3 | {% block inner_content %} 4 |

Feedback

5 | 6 |

Interfaces like this can only get better with your help. If you have 7 | difficulty using the interface, feature suggestions, or any other kind of 8 | feedback, please send an email to lars@yencken.org. 9 | 10 |

Also note that the full source code to this site is available on Bitbucket, allowing you to host your own version, or modify it as you like.

11 | 12 | {% endblock %} 13 | -------------------------------------------------------------------------------- /simsearch/SConscript: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # SConscript 4 | # simsearch 5 | # 6 | # Created by Lars Yencken on 27-08-2010. 7 | # Copyright 2010 Lars Yencken. All rights reserved. 8 | # 9 | 10 | """ 11 | Scons build file for structure extensions. 12 | """ 13 | 14 | #----------------------------------------------------------------------------# 15 | 16 | Import('env') 17 | 18 | #----------------------------------------------------------------------------# 19 | 20 | stroke = env.Cython('stroke.c', 'stroke.pyx') 21 | env.SharedLibrary('stroke', stroke) 22 | 23 | #----------------------------------------------------------------------------# 24 | -------------------------------------------------------------------------------- /simsearch/views.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # views.py 4 | # simsearch 5 | # 6 | # Created by Lars Yencken on 24-08-2010. 7 | # Copyright 2010 Lars Yencken. All rights reserved. 8 | # 9 | 10 | """ 11 | """ 12 | 13 | from django.views.static import serve 14 | from django.conf import settings 15 | 16 | def media(request): 17 | """ 18 | Use this to serve static media. Since some of the media may be files 19 | which were uploaded, we want to password protect everything. 20 | """ 21 | return serve(request, request.path[len(settings.MEDIA_URL):], 22 | document_root=settings.MEDIA_ROOT) 23 | 24 | # vim: ts=4 sw=4 sts=4 et tw=78: 25 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | serve: .simsearch-installed .models-created 3 | env/bin/python setup.py develop 4 | env/bin/python simsearch.py 5 | 6 | env: requirements.txt 7 | test -d env || virtualenv -p python2.7 env 8 | env/bin/pip install -r requirements.txt 9 | touch env 10 | 11 | env/bin/cython: env 12 | 13 | .simsearch-installed: simsearch/stroke.c 14 | env/bin/python setup.py develop 15 | touch $@ 16 | 17 | .models-created: .simsearch-installed 18 | env/bin/python -m simsearch.models 19 | touch $@ 20 | 21 | simsearch/stroke.c: simsearch/stroke.pyx env/bin/cython 22 | env/bin/cython $< 23 | 24 | clean: 25 | rm -rf env build .simsearch-installed .models-created simsearch.egg-info 26 | -------------------------------------------------------------------------------- /simsearch/urls.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # urls.py 4 | # simsearch 5 | # 6 | # Created by Lars Yencken on 24-08-2010. 7 | # Copyright 2010 Lars Yencken. All rights reserved. 8 | # 9 | 10 | from django.conf.urls.defaults import * 11 | from django.conf import settings 12 | 13 | _patterns = ['', 14 | (r'^translate/', include('simsearch.translate.urls')), 15 | (r'', include('simsearch.static.urls')), 16 | (r'', include('simsearch.search.urls')), 17 | ] 18 | 19 | if settings.DEBUG: 20 | _patterns[1:1] = [ 21 | url(r'^media/', 'simsearch.views.media', name='media'), 22 | ] 23 | 24 | urlpatterns = patterns(*_patterns) 25 | 26 | # vim: ts=4 sw=4 sts=4 et tw=78: 27 | -------------------------------------------------------------------------------- /simsearch/static/css/lookup.css: -------------------------------------------------------------------------------- 1 | /* 2 | * lookup.css 3 | */ 4 | 5 | body { 6 | background: #f3f3f3; 7 | } 8 | 9 | a {text-decoration: none; } 10 | a img {border: none;} 11 | 12 | p { 13 | text-align: center; 14 | font: 300 1.2em "Helvetica Neue", Helvetica, "Arial Unicode MS", Arial, sans-serif; 15 | } 16 | 17 | #seedLookup { 18 | width:190px; 19 | height:50px; 20 | position:absolute; margin:auto; top:0; right:0; left:0; bottom:0; 21 | text-align: center; 22 | } 23 | 24 | #seedInput { 25 | border:1px solid #505050; 26 | font-size:20px; 27 | width: 120px; 28 | margin: 0px; 29 | vertical-align: top; 30 | } 31 | 32 | #seedLookup > img { 33 | width: 29px; 34 | height: 29px; 35 | margin: 0px; 36 | } 37 | -------------------------------------------------------------------------------- /simsearch/templates/search/index.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block headers %} 4 | 5 | {% endblock %} 6 | 7 | {% block body_tags %}onload="document.getElementById('seedInput').focus()"{% endblock %} 8 | 9 | {% block content %} 10 |
11 |
12 | 13 | 14 |
15 |
16 |

17 | {% if error %} 18 | {{error}} (help) 19 | {% else %} 20 | Enter the kanji you want to find, or one that looks similar. 21 | {% endif %} 22 |

23 |
24 | {% endblock %} 25 | -------------------------------------------------------------------------------- /simsearch/context.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # context.py 4 | # simsearch 5 | # 6 | # Created by Lars Yencken on 04-09-2010. 7 | # Copyright 2010 Lars Yencken. All rights reserved. 8 | # 9 | 10 | """ 11 | Context processors for similarity search. 12 | """ 13 | 14 | import os 15 | 16 | from mercurial import ui, hg, node 17 | 18 | def mercurial_revision(): 19 | project_base = os.path.join(settings.PROJECT_ROOT, '..') 20 | repo = hg.repository(ui.ui(), project_base) 21 | fctx = repo.filectx(project_base, 'tip') 22 | 23 | return {'revision': { 24 | 'short': node.short(fctx.node()), 25 | 'number': fctx.rev(), 26 | }} 27 | 28 | def site_settings(): 29 | return {'settings': settings} 30 | 31 | # vim: ts=4 sw=4 sts=4 et tw=78: 32 | -------------------------------------------------------------------------------- /simsearch/static/css/common.css: -------------------------------------------------------------------------------- 1 | /* 2 | * common.css 3 | */ 4 | 5 | 6 | #copy { 7 | bottom: 0; 8 | font: 300 1.2em "Helvetica Neue", Helvetica, "Arial Unicode MS", Arial, sans-serif; 9 | position: absolute; 10 | right: 1em; 11 | text-align: right; 12 | margin-top: 1em; 13 | margin-bottom: 1em; 14 | color: #505050; 15 | /* 16 | border: 1px solid #dddddd; 17 | padding: 3px; 18 | */ 19 | } 20 | #copy a { 21 | color: #303090; 22 | } 23 | #copy a:hover { 24 | text-decoration: underline; 25 | } 26 | 27 | a, a:hover { color: #303090; } 28 | a:hover { 29 | text-decoration: underline; 30 | } 31 | 32 | #nav { 33 | bottom: top; 34 | font: 300 1.2em "Helvetica Neue", Helvetica, "Arial Unicode MS", Arial, sans-serif; 35 | position: absolute; 36 | right: 1em; 37 | margin-top: 1em; 38 | margin-bottom: 1em; 39 | text-align: right; 40 | color: #505050; 41 | } 42 | #nav a { 43 | color: #303090; 44 | } 45 | #nav a:hover { 46 | text-decoration: underline; 47 | } 48 | -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/plugins/buttons/readme.txt: -------------------------------------------------------------------------------- 1 | Buttons 2 | 3 | * Gives you great looking CSS buttons, for both and 25 | 26 | 27 | Change Password 28 | 29 | 30 | 31 | Cancel 32 | 33 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # setup.py 4 | # simsearch 5 | # 6 | # Created by Lars Yencken on 2011-10-26. 7 | # Copyright 2011 Lars Yencken. All rights reserved. 8 | # 9 | 10 | """ 11 | Package information for simsearch. 12 | """ 13 | 14 | from setuptools import setup 15 | from setuptools.extension import Extension 16 | 17 | setup( 18 | name='simsearch', 19 | author='Lars Yencken', 20 | author_email='lars@yencken.org', 21 | version='0.3.0', 22 | description='Similarity search for Japanese kanji.', 23 | url="http://simsearch.gakusha.info/", 24 | license='BSD', 25 | install_requires=[ 26 | 'cjktools>=1.5.0', 27 | 'cjktools-data>=0.2.1-2010-07-29', 28 | 'consoleLog>=0.2.4', 29 | 'simplestats>=0.2.0', 30 | 'pymongo', 31 | 'mongoengine>=0.3', 32 | 'pyyaml', 33 | 'nltk', 34 | 'mercurial', 35 | 'flask', 36 | 'simplejson', 37 | ], 38 | packages=['simsearch'], 39 | ext_modules=[Extension( 40 | 'simsearch.stroke', 41 | sources=['simsearch/stroke.pyx'], 42 | )], 43 | scripts=['simsearch.py'], 44 | zip_safe=False, 45 | ) 46 | 47 | -------------------------------------------------------------------------------- /simsearch/static/js/jquery.sizes.min.js: -------------------------------------------------------------------------------- 1 | /* 2 | * JSizes - JQuery plugin v0.33 3 | * 4 | * Licensed under the revised BSD License. 5 | * Copyright 2008-2010 Bram Stein 6 | * All rights reserved. 7 | */ 8 | (function(b){var a=function(c){return parseInt(c,10)||0};b.each(["min","max"],function(d,c){b.fn[c+"Size"]=function(g){var f,e;if(g){if(g.width!==undefined){this.css(c+"-width",g.width)}if(g.height!==undefined){this.css(c+"-height",g.height)}return this}else{f=this.css(c+"-width");e=this.css(c+"-height");return{width:(c==="max"&&(f===undefined||f==="none"||a(f)===-1)&&Number.MAX_VALUE)||a(f),height:(c==="max"&&(e===undefined||e==="none"||a(e)===-1)&&Number.MAX_VALUE)||a(e)}}}});b.fn.isVisible=function(){return this.is(":visible")};b.each(["border","margin","padding"],function(d,c){b.fn[c]=function(e){if(e){if(e.top!==undefined){this.css(c+"-top"+(c==="border"?"-width":""),e.top)}if(e.bottom!==undefined){this.css(c+"-bottom"+(c==="border"?"-width":""),e.bottom)}if(e.left!==undefined){this.css(c+"-left"+(c==="border"?"-width":""),e.left)}if(e.right!==undefined){this.css(c+"-right"+(c==="border"?"-width":""),e.right)}return this}else{return{top:a(this.css(c+"-top"+(c==="border"?"-width":""))),bottom:a(this.css(c+"-bottom"+(c==="border"?"-width":""))),left:a(this.css(c+"-left"+(c==="border"?"-width":""))),right:a(this.css(c+"-right"+(c==="border"?"-width":"")))}}}})})(jQuery); -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/print.css: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------------- 2 | 3 | 4 | Blueprint CSS Framework 0.9 5 | http://blueprintcss.org 6 | 7 | * Copyright (c) 2007-Present. See LICENSE for more info. 8 | * See README for instructions on how to use Blueprint. 9 | * For credits and origins, see AUTHORS. 10 | * This is a compressed file. See the sources in the 'src' directory. 11 | 12 | ----------------------------------------------------------------------- */ 13 | 14 | /* print.css */ 15 | body {line-height:1.5;font-family:"Helvetica Neue", Arial, Helvetica, sans-serif;color:#000;background:none;font-size:10pt;} 16 | .container {background:none;} 17 | hr {background:#ccc;color:#ccc;width:100%;height:2px;margin:2em 0;padding:0;border:none;} 18 | hr.space {background:#fff;color:#fff;visibility:hidden;} 19 | h1, h2, h3, h4, h5, h6 {font-family:"Helvetica Neue", Arial, "Lucida Grande", sans-serif;} 20 | code {font:.9em "Courier New", Monaco, Courier, monospace;} 21 | a img {border:none;} 22 | p img.top {margin-top:0;} 23 | blockquote {margin:1.5em;padding:1em;font-style:italic;font-size:.9em;} 24 | .small {font-size:.9em;} 25 | .large {font-size:1.1em;} 26 | .quiet {color:#999;} 27 | .hide {display:none;} 28 | a:link, a:visited {background:transparent;font-weight:700;text-decoration:underline;} 29 | a:link:after, a:visited:after {content:" (" attr(href) ")";font-size:90%;} -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/src/reset.css: -------------------------------------------------------------------------------- 1 | /* -------------------------------------------------------------- 2 | 3 | reset.css 4 | * Resets default browser CSS. 5 | 6 | -------------------------------------------------------------- */ 7 | 8 | html, body, div, span, object, iframe, 9 | h1, h2, h3, h4, h5, h6, p, blockquote, pre, 10 | a, abbr, acronym, address, code, 11 | del, dfn, em, img, q, dl, dt, dd, ol, ul, li, 12 | fieldset, form, label, legend, 13 | table, caption, tbody, tfoot, thead, tr, th, td, 14 | article, aside, dialog, figure, footer, header, 15 | hgroup, nav, section { 16 | margin: 0; 17 | padding: 0; 18 | border: 0; 19 | font-weight: inherit; 20 | font-style: inherit; 21 | font-size: 100%; 22 | font-family: inherit; 23 | vertical-align: baseline; 24 | } 25 | 26 | article, aside, dialog, figure, footer, header, 27 | hgroup, nav, section { 28 | display:block; 29 | } 30 | 31 | body { 32 | line-height: 1.5; 33 | } 34 | 35 | /* Tables still need 'cellspacing="0"' in the markup. */ 36 | table { border-collapse: separate; border-spacing: 0; } 37 | caption, th, td { text-align: left; font-weight: normal; } 38 | table, td, th { vertical-align: middle; } 39 | 40 | /* Remove possible quote marks (") from ,
. */ 41 | blockquote:before, blockquote:after, q:before, q:after { content: ""; } 42 | blockquote, q { quotes: "" ""; } 43 | 44 | /* Remove annoying border on linked images. */ 45 | a img { border: none; } 46 | -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/plugins/link-icons/screen.css: -------------------------------------------------------------------------------- 1 | /* -------------------------------------------------------------- 2 | 3 | link-icons.css 4 | * Icons for links based on protocol or file type. 5 | 6 | See the Readme file in this folder for additional instructions. 7 | 8 | -------------------------------------------------------------- */ 9 | 10 | /* Use this class if a link gets an icon when it shouldn't. */ 11 | body a.noicon { 12 | background:transparent none !important; 13 | padding:0 !important; 14 | margin:0 !important; 15 | } 16 | 17 | /* Make sure the icons are not cut */ 18 | a[href^="http:"], a[href^="mailto:"], a[href^="http:"]:visited, 19 | a[href$=".pdf"], a[href$=".doc"], a[href$=".xls"], a[href$=".rss"], 20 | a[href$=".rdf"], a[href^="aim:"] { 21 | padding:2px 22px 2px 0; 22 | margin:-2px 0; 23 | background-repeat: no-repeat; 24 | background-position: right center; 25 | } 26 | 27 | /* External links */ 28 | a[href^="http:"] { background-image: url(icons/external.png); } 29 | a[href^="mailto:"] { background-image: url(icons/email.png); } 30 | a[href^="http:"]:visited { background-image: url(icons/visited.png); } 31 | 32 | /* Files */ 33 | a[href$=".pdf"] { background-image: url(icons/pdf.png); } 34 | a[href$=".doc"] { background-image: url(icons/doc.png); } 35 | a[href$=".xls"] { background-image: url(icons/xls.png); } 36 | 37 | /* Misc */ 38 | a[href$=".rss"], 39 | a[href$=".rdf"] { background-image: url(icons/feed.png); } 40 | a[href^="aim:"] { background-image: url(icons/im.png); } 41 | -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/ie.css: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------------- 2 | 3 | 4 | Blueprint CSS Framework 0.9 5 | http://blueprintcss.org 6 | 7 | * Copyright (c) 2007-Present. See LICENSE for more info. 8 | * See README for instructions on how to use Blueprint. 9 | * For credits and origins, see AUTHORS. 10 | * This is a compressed file. See the sources in the 'src' directory. 11 | 12 | ----------------------------------------------------------------------- */ 13 | 14 | /* ie.css */ 15 | body {text-align:center;} 16 | .container {text-align:left;} 17 | * html .column, * html .span-1, * html .span-2, * html .span-3, * html .span-4, * html .span-5, * html .span-6, * html .span-7, * html .span-8, * html .span-9, * html .span-10, * html .span-11, * html .span-12, * html .span-13, * html .span-14, * html .span-15, * html .span-16, * html .span-17, * html .span-18, * html .span-19, * html .span-20, * html .span-21, * html .span-22, * html .span-23, * html .span-24 {display:inline;overflow-x:hidden;} 18 | * html legend {margin:0px -8px 16px 0;padding:0;} 19 | sup {vertical-align:text-top;} 20 | sub {vertical-align:text-bottom;} 21 | html>body p code {*white-space:normal;} 22 | hr {margin:-8px auto 11px;} 23 | img {-ms-interpolation-mode:bicubic;} 24 | .clearfix, .container {display:inline-block;} 25 | * html .clearfix, * html .container {height:1%;} 26 | fieldset {padding-top:0;} 27 | textarea {overflow:auto;} 28 | input.text, input.title, textarea {background-color:#fff;border:1px solid #bbb;} 29 | input.text:focus, input.title:focus {border-color:#666;} 30 | input.text, input.title, textarea, select {margin:0.5em 0;} 31 | input.checkbox, input.radio {position:relative;top:.25em;} 32 | form.inline div, form.inline p {vertical-align:middle;} 33 | form.inline label {position:relative;top:-0.25em;} 34 | form.inline input.checkbox, form.inline input.radio, form.inline input.button, form.inline button {margin:0.5em 0;} 35 | button, input.button {position:relative;top:0.25em;} -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | SimSearch 2 | ========= 3 | 4 | :Author: Lars Yencken 5 | :Date: 21st Jan 2011 6 | 7 | Overview 8 | -------- 9 | 10 | SimSearch is a dictionary search-by-similarity interface for Japanese kanji, 11 | providing a nice front-end for Kanjidic. It lets you find a kanji you don't 12 | know, using kanji that are visually similar. 13 | 14 | .. image:: http://files.gakusha.info/simsearch/homepage/ss-example-med.png 15 | 16 | If you're viewing this source code, you should be a developer, or someone at 17 | least a little comfortable with Python. 18 | 19 | Developing/running 20 | ------------------ 21 | 22 | This is a quick guide to getting SimSearch up and running locally. 23 | 24 | Dependencies 25 | ~~~~~~~~~~~~ 26 | 27 | SimSearch uses MongoDB as its database backend. If you don't already have it, 28 | install MongoDB first. By default, it will create and use a database called 29 | ``simsearch`` in MongoDB. 30 | 31 | Next, you need Python (2.6/2.7), pip and virtualenv. Then you can install the 32 | necessary packages in an environment for simsearch:: 33 | 34 | $ pip -E ss-env install ./simsearch 35 | 36 | Occasionally a dependency will fail to install cleanly (e.g. NLTK). In that 37 | case, you will need to download a package for it, enter the virtual 38 | environment and install the package from there:: 39 | 40 | $ tar xfz nltk-v2.08b.tgz 41 | $ cd nltk-v2.08b 42 | $ source /path/to/simsearch/ss-env/bin/activate 43 | (ss-env) $ python setup.py install 44 | 45 | Building and running 46 | ~~~~~~~~~~~~~~~~~~~~ 47 | 48 | Once installed, build the database with:: 49 | 50 | $ python -m simsearch.models 51 | Building similarity matrix 52 | Building neighbourhood graph 53 | 54 | You can then run the debug server with the command ``simsearch.py``. The 55 | server will be available at http://localhost:5000/. 56 | 57 | Deploying 58 | --------- 59 | 60 | Please see Flask documentation around deployment. Feel free to email me as 61 | well, if you have any issues. 62 | 63 | -------------------------------------------------------------------------------- /simsearch/experiments/evaluate_paths.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # evaluate_paths.py 5 | # simsearch 6 | # 7 | # Created by Lars Yencken on 03-09-2010. 8 | # Copyright 2010 Lars Yencken. All rights reserved. 9 | # 10 | 11 | """ 12 | A script to generate statistics on a set of query traces (i.e. walks through 13 | the kanji graph generated by simulated search). 14 | """ 15 | 16 | import os, sys, optparse 17 | 18 | from simplestats import basic_stats 19 | 20 | from simulate_search import TraceFile 21 | 22 | def evaluate_paths(input_file, limit=5): 23 | print 'Evaluating paths from "%s"' % os.path.basename(input_file) 24 | traces = TraceFile.load(input_file) 25 | 26 | path_lengths = [] 27 | successes = [] 28 | for (query, target, path) in traces: 29 | if path and path[-1] == target: 30 | successes.append(path) 31 | path_lengths.append(len(path) - 1) 32 | else: 33 | path_lengths.append(limit) 34 | 35 | print u'Success rate: %d/%d (%.02f%%)' % (len(successes), 36 | len(traces), 100.0 * len(successes) / len(traces)) 37 | 38 | print u'Mean path length: %.02f (σ = %.02f)' % basic_stats(path_lengths) 39 | 40 | #----------------------------------------------------------------------------# 41 | 42 | def _create_option_parser(): 43 | usage = \ 44 | """%prog [options] input_file 45 | 46 | Generates evaluation statistics on a collection of traces.""" 47 | 48 | parser = optparse.OptionParser(usage) 49 | 50 | return parser 51 | 52 | def main(argv): 53 | parser = _create_option_parser() 54 | (options, args) = parser.parse_args(argv) 55 | 56 | if len(args) != 1: 57 | parser.print_help() 58 | sys.exit(1) 59 | 60 | input_file, = args 61 | evaluate_paths(input_file) 62 | 63 | #----------------------------------------------------------------------------# 64 | 65 | if __name__ == '__main__': 66 | main(sys.argv[1:]) 67 | 68 | # vim: ts=4 sw=4 sts=4 et tw=78: 69 | -------------------------------------------------------------------------------- /simsearch/experiments/check_connectivity.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # check_connectivity.py 5 | # simsearch 6 | # 7 | # Created by Lars Yencken on 03-09-2010. 8 | # Copyright 2010 Lars Yencken. All rights reserved. 9 | # 10 | 11 | """ 12 | Performs a connectivity check on the search graph, determining how many kanji 13 | occur within the top-k neighbour list of at least one other kanji. 14 | """ 15 | 16 | import sys 17 | import optparse 18 | 19 | from simsearch import settings 20 | from simsearch.search import models 21 | 22 | def check_connectivity(k=settings.N_NEIGHBOURS_RECALLED): 23 | kanji_set = models.Node.get_coverage() 24 | covered_set = set() 25 | for node in models.Node.objects: 26 | covered_set.update(n.kanji for n in node.neighbours[:k]) 27 | 28 | print '%d/%d (%.02f%%) covered' % (len(covered_set), len(kanji_set), 29 | 100.0 * len(covered_set) / len(kanji_set)) 30 | 31 | #----------------------------------------------------------------------------# 32 | 33 | def _create_option_parser(): 34 | usage = \ 35 | """%prog [options] 36 | 37 | Performs a connectivity check on the neighbour database to determine how 38 | many kanji are realistically accessible.""" 39 | 40 | parser = optparse.OptionParser(usage) 41 | 42 | parser.add_option('-k', action='store', dest='k', type='int', 43 | default=settings.N_NEIGHBOURS_RECALLED, 44 | help='Connected kanji must occur within top-k neighbours [%d]' \ 45 | % settings.N_NEIGHBOURS_RECALLED) 46 | 47 | return parser 48 | 49 | def main(argv): 50 | parser = _create_option_parser() 51 | (options, args) = parser.parse_args(argv) 52 | 53 | if args: 54 | parser.print_help() 55 | sys.exit(1) 56 | 57 | check_connectivity(k=options.k) 58 | 59 | #----------------------------------------------------------------------------# 60 | 61 | if __name__ == '__main__': 62 | main(sys.argv[1:]) 63 | 64 | # vim: ts=4 sw=4 sts=4 et tw=78: 65 | -------------------------------------------------------------------------------- /simsearch/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # settings.py 4 | # simsearch 5 | # 6 | # Created by Lars Yencken on 24-08-2010. 7 | # Copyright 2010 Lars Yencken. All rights reserved. 8 | # 9 | 10 | """ 11 | Settings for the simsearch project. 12 | """ 13 | 14 | import os 15 | 16 | import mongoengine 17 | 18 | # custom MongoDB connection settings 19 | MONGODB_NAME = 'simsearch' 20 | MONGODB_USERNAME = None 21 | MONGODB_PASSWORD = None 22 | MONGODB_HOST = 'localhost' 23 | MONGODB_PORT = 27017 24 | 25 | UTF8_BYTES_PER_CHAR = 3 # for CJK chars 26 | 27 | N_NEIGHBOURS_STORED = 100 28 | 29 | N_NEIGHBOURS_RECALLED = 15 30 | 31 | GOOGLE_ANALYTICS_CODE = None 32 | 33 | # Tradeoff in Pr(a|s) and likelihood of reaching a further target from s' 34 | UPDATE_GAMMA = 0.7 35 | 36 | PROJECT_ROOT = os.path.dirname(__file__) 37 | 38 | # Absolute path to the directory that holds media. 39 | # Example: "/home/media/media.lawrence.com/" 40 | MEDIA_ROOT = os.path.join(PROJECT_ROOT, 'media') 41 | 42 | # URL that handles the media served from MEDIA_ROOT. Make sure to use a 43 | # trailing slash if there is a path component (optional in other cases). 44 | # Examples: "http://media.lawrence.com", "http://example.com/media/" 45 | MEDIA_URL = '/static/' 46 | 47 | # The source of stroke data for each character 48 | STROKE_SOURCE = None 49 | 50 | # The source of frequency counts for each character 51 | FREQ_SOURCE = None 52 | 53 | try: 54 | from local_settings import * 55 | except ImportError: 56 | pass 57 | 58 | # connect to our database 59 | import mongoengine 60 | mongoengine.connect(MONGODB_NAME, username=MONGODB_USERNAME, 61 | password=MONGODB_PASSWORD, host=MONGODB_HOST, port=MONGODB_PORT) 62 | 63 | # static data files needed for building 64 | DATA_DIR = os.path.join(PROJECT_ROOT, 'data') 65 | 66 | # default stroke source 67 | if STROKE_SOURCE is None: 68 | STROKE_SOURCE = os.path.join(DATA_DIR, 'strokes_ulrich') 69 | 70 | # default frequency source 71 | if FREQ_SOURCE is None: 72 | FREQ_SOURCE = os.path.join(DATA_DIR, 'jp_char_corpus_counts.gz') 73 | 74 | # vim: ts=4 sw=4 sts=4 et tw=78: 75 | -------------------------------------------------------------------------------- /simsearch/heap_cache.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # heap_cache.py 4 | # simsearch 5 | # 6 | # Created by Lars Yencken on 30-08-2010. 7 | # Copyright 2010 Lars Yencken. All rights reserved. 8 | # 9 | 10 | """ 11 | Caches to aid similarity caculation, to efficiently maintain only the highest 12 | similarity neighbours. 13 | """ 14 | 15 | import heapq 16 | 17 | class TopNHeap(object): 18 | "A heap which only keeps the top-n items and their weights." 19 | __slots__ = '_n', '_backing_list' 20 | def __init__(self, n): 21 | self._n = n 22 | self._backing_list = [] 23 | 24 | def add(self, item, weight): 25 | heapq.heappush(self._backing_list, (weight, item)) 26 | if len(self._backing_list) > self._n: 27 | heapq.heappop(self._backing_list) 28 | 29 | def get_contents(self): 30 | return self._backing_list 31 | 32 | class FixedSimilarityCache(object): 33 | """ 34 | A kanji similarity cache which only keeps the top-n most similar 35 | neighbours. 36 | """ 37 | def __init__(self, n): 38 | self._n = n 39 | self._heaps = {} 40 | self._sum = 0.0 41 | self._n_seen = 0.0 42 | self._sum_squared = 0.0 43 | 44 | def add(self, kanji_a, kanji_b, similarity): 45 | """ 46 | Attempt to add this similarity score to the cache. If there are 47 | already n closer neighbours for either kanji it will be discarded. 48 | """ 49 | self.get_heap(kanji_a).add(kanji_b, similarity) 50 | self.get_heap(kanji_b).add(kanji_a, similarity) 51 | self._n_seen += 1 52 | self._sum += similarity 53 | self._sum_squared += similarity * similarity 54 | 55 | 56 | def __getitem__(self, kanji): 57 | return self.get_heap(kanji) 58 | 59 | def get_heap(self, kanji): 60 | heap = self._heaps.get(kanji) 61 | if heap is None: 62 | heap = self._heaps.setdefault(kanji, TopNHeap(self._n)) 63 | return heap 64 | 65 | def get_mean(self): 66 | return self._sum / self._n_seen 67 | 68 | # vim: ts=4 sw=4 sts=4 et tw=78: 69 | 70 | -------------------------------------------------------------------------------- /simsearch/templates/base.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | Visual kanji search 9 | 11 | 13 | 17 | 19 | {% block headers %} 20 | {% endblock %} 21 | 22 | 23 | 24 | 25 | {% block navigation %} 26 | 32 | {% endblock %} 33 | 34 | {% block content %} 35 | {% endblock %} 36 | 37 | {% block copy %} 38 |

39 | Visual kanji search (r{{revision.number}}:{{revision.short}}) 41 |

42 | {% endblock %} 43 | 44 | {% if settings.GOOGLE_ANALYTICS_CODE %} 45 | 56 | {% endif %} 57 | 58 | 59 | -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/src/print.css: -------------------------------------------------------------------------------- 1 | /* -------------------------------------------------------------- 2 | 3 | print.css 4 | * Gives you some sensible styles for printing pages. 5 | * See Readme file in this directory for further instructions. 6 | 7 | Some additions you'll want to make, customized to your markup: 8 | #header, #footer, #navigation { display:none; } 9 | 10 | -------------------------------------------------------------- */ 11 | 12 | body { 13 | line-height: 1.5; 14 | font-family: "Helvetica Neue", Arial, Helvetica, sans-serif; 15 | color:#000; 16 | background: none; 17 | font-size: 10pt; 18 | } 19 | 20 | 21 | /* Layout 22 | -------------------------------------------------------------- */ 23 | 24 | .container { 25 | background: none; 26 | } 27 | 28 | hr { 29 | background:#ccc; 30 | color:#ccc; 31 | width:100%; 32 | height:2px; 33 | margin:2em 0; 34 | padding:0; 35 | border:none; 36 | } 37 | hr.space { 38 | background: #fff; 39 | color: #fff; 40 | visibility: hidden; 41 | } 42 | 43 | 44 | /* Text 45 | -------------------------------------------------------------- */ 46 | 47 | h1,h2,h3,h4,h5,h6 { font-family: "Helvetica Neue", Arial, "Lucida Grande", sans-serif; } 48 | code { font:.9em "Courier New", Monaco, Courier, monospace; } 49 | 50 | a img { border:none; } 51 | p img.top { margin-top: 0; } 52 | 53 | blockquote { 54 | margin:1.5em; 55 | padding:1em; 56 | font-style:italic; 57 | font-size:.9em; 58 | } 59 | 60 | .small { font-size: .9em; } 61 | .large { font-size: 1.1em; } 62 | .quiet { color: #999; } 63 | .hide { display:none; } 64 | 65 | 66 | /* Links 67 | -------------------------------------------------------------- */ 68 | 69 | a:link, a:visited { 70 | background: transparent; 71 | font-weight:700; 72 | text-decoration: underline; 73 | } 74 | 75 | a:link:after, a:visited:after { 76 | content: " (" attr(href) ")"; 77 | font-size: 90%; 78 | } 79 | 80 | /* If you're having trouble printing relative links, uncomment and customize this: 81 | (note: This is valid CSS3, but it still won't go through the W3C CSS Validator) */ 82 | 83 | /* a[href^="/"]:after { 84 | content: " (http://www.yourdomain.com" attr(href) ") "; 85 | } */ 86 | -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/src/forms.css: -------------------------------------------------------------------------------- 1 | /* -------------------------------------------------------------- 2 | 3 | forms.css 4 | * Sets up some default styling for forms 5 | * Gives you classes to enhance your forms 6 | 7 | Usage: 8 | * For text fields, use class .title or .text 9 | * For inline forms, use .inline (even when using columns) 10 | 11 | -------------------------------------------------------------- */ 12 | 13 | label { font-weight: bold; } 14 | fieldset { padding:1.4em; margin: 0 0 1.5em 0; border: 1px solid #ccc; } 15 | legend { font-weight: bold; font-size:1.2em; } 16 | 17 | 18 | /* Form fields 19 | -------------------------------------------------------------- */ 20 | 21 | input[type=text], input[type=password], 22 | input.text, input.title, 23 | textarea, select { 24 | background-color:#fff; 25 | border:1px solid #bbb; 26 | } 27 | input[type=text]:focus, input[type=password]:focus, 28 | input.text:focus, input.title:focus, 29 | textarea:focus, select:focus { 30 | border-color:#666; 31 | } 32 | 33 | input[type=text], input[type=password], 34 | input.text, input.title, 35 | textarea, select { 36 | margin:0.5em 0; 37 | } 38 | 39 | input.text, 40 | input.title { width: 300px; padding:5px; } 41 | input.title { font-size:1.5em; } 42 | textarea { width: 390px; height: 250px; padding:5px; } 43 | 44 | input[type=checkbox], input[type=radio], 45 | input.checkbox, input.radio { 46 | position:relative; top:.25em; 47 | } 48 | 49 | form.inline { line-height:3; } 50 | form.inline p { margin-bottom:0; } 51 | 52 | 53 | /* Success, notice and error boxes 54 | -------------------------------------------------------------- */ 55 | 56 | .error, 57 | .notice, 58 | .success, 59 | .info { padding: 0.8em; margin-bottom: 1em; border: 2px solid #ddd; } 60 | 61 | .error { background: #fbe3e4; color: #8a1f11; border-color: #fbc2c4; } 62 | .notice { background: #fff6bf; color: #514721; border-color: #ffd324; } 63 | .success { background: #e6efc2; color: #264409; border-color: #c6d880; } 64 | .info { background: #d5edf8; color: #205791; border-color: #92cae4; } 65 | .error a { color: #8a1f11; } 66 | .notice a { color: #514721; } 67 | .success a { color: #264409; } 68 | .info a { color: #205791; } 69 | -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/plugins/buttons/screen.css: -------------------------------------------------------------------------------- 1 | /* -------------------------------------------------------------- 2 | 3 | buttons.css 4 | * Gives you some great CSS-only buttons. 5 | 6 | Created by Kevin Hale [particletree.com] 7 | * particletree.com/features/rediscovering-the-button-element 8 | 9 | See Readme.txt in this folder for instructions. 10 | 11 | -------------------------------------------------------------- */ 12 | 13 | a.button, button { 14 | display:block; 15 | float:left; 16 | margin: 0.7em 0.5em 0.7em 0; 17 | padding:5px 10px 5px 7px; /* Links */ 18 | 19 | border:1px solid #dedede; 20 | border-top:1px solid #eee; 21 | border-left:1px solid #eee; 22 | 23 | background-color:#f5f5f5; 24 | font-family:"Lucida Grande", Tahoma, Arial, Verdana, sans-serif; 25 | font-size:100%; 26 | line-height:130%; 27 | text-decoration:none; 28 | font-weight:bold; 29 | color:#565656; 30 | cursor:pointer; 31 | } 32 | button { 33 | width:auto; 34 | overflow:visible; 35 | padding:4px 10px 3px 7px; /* IE6 */ 36 | } 37 | button[type] { 38 | padding:4px 10px 4px 7px; /* Firefox */ 39 | line-height:17px; /* Safari */ 40 | } 41 | *:first-child+html button[type] { 42 | padding:4px 10px 3px 7px; /* IE7 */ 43 | } 44 | button img, a.button img{ 45 | margin:0 3px -3px 0 !important; 46 | padding:0; 47 | border:none; 48 | width:16px; 49 | height:16px; 50 | float:none; 51 | } 52 | 53 | 54 | /* Button colors 55 | -------------------------------------------------------------- */ 56 | 57 | /* Standard */ 58 | button:hover, a.button:hover{ 59 | background-color:#dff4ff; 60 | border:1px solid #c2e1ef; 61 | color:#336699; 62 | } 63 | a.button:active{ 64 | background-color:#6299c5; 65 | border:1px solid #6299c5; 66 | color:#fff; 67 | } 68 | 69 | /* Positive */ 70 | body .positive { 71 | color:#529214; 72 | } 73 | a.positive:hover, button.positive:hover { 74 | background-color:#E6EFC2; 75 | border:1px solid #C6D880; 76 | color:#529214; 77 | } 78 | a.positive:active { 79 | background-color:#529214; 80 | border:1px solid #529214; 81 | color:#fff; 82 | } 83 | 84 | /* Negative */ 85 | body .negative { 86 | color:#d12f19; 87 | } 88 | a.negative:hover, button.negative:hover { 89 | background-color:#fbe3e4; 90 | border:1px solid #fbc2c4; 91 | color:#d12f19; 92 | } 93 | a.negative:active { 94 | background-color:#d12f19; 95 | border:1px solid #d12f19; 96 | color:#fff; 97 | } 98 | -------------------------------------------------------------------------------- /simsearch/templates/translate/kanji.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block headers %} 4 | 6 | 8 | 12 | 13 | 29 | {% endblock %} 30 | 31 | {% block content %} 32 |
33 |
{{translation.kanji}}
34 |
35 |
36 |

Readings

37 | 38 | 39 | 40 | 41 | 46 | 47 | 48 | 49 | 50 | 55 | 56 | 57 |
On 42 | {% for r in translation.on_readings %} 43 | {{r}}{% if not loop.last %},{% endif %} 44 | {% endfor %} 45 |
Kun 51 | {% for r in translation.kun_readings %} 52 | {{r}}{% if not loop.last %},{% endif %} 53 | {% endfor %} 54 |
58 | 59 |

Meaning

60 | 61 |
    62 | {% for v in translation.glosses %} 63 |
  • {{v}}
  • 64 | {% endfor %} 65 |
66 | 67 |

Find words

68 | 73 |
74 |
75 | {% endblock %} 76 | -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/plugins/fancy-type/screen.css: -------------------------------------------------------------------------------- 1 | /* -------------------------------------------------------------- 2 | 3 | fancy-type.css 4 | * Lots of pretty advanced classes for manipulating text. 5 | 6 | See the Readme file in this folder for additional instructions. 7 | 8 | -------------------------------------------------------------- */ 9 | 10 | /* Indentation instead of line shifts for sibling paragraphs. */ 11 | p + p { text-indent:2em; margin-top:-1.5em; } 12 | form p + p { text-indent: 0; } /* Don't want this in forms. */ 13 | 14 | 15 | /* For great looking type, use this code instead of asdf: 16 | asdf 17 | Best used on prepositions and ampersands. */ 18 | 19 | .alt { 20 | color: #666; 21 | font-family: "Warnock Pro", "Goudy Old Style","Palatino","Book Antiqua", Georgia, serif; 22 | font-style: italic; 23 | font-weight: normal; 24 | } 25 | 26 | 27 | /* For great looking quote marks in titles, replace "asdf" with: 28 | asdf” 29 | (That is, when the title starts with a quote mark). 30 | (You may have to change this value depending on your font size). */ 31 | 32 | .dquo { margin-left: -.5em; } 33 | 34 | 35 | /* Reduced size type with incremental leading 36 | (http://www.markboulton.co.uk/journal/comments/incremental_leading/) 37 | 38 | This could be used for side notes. For smaller type, you don't necessarily want to 39 | follow the 1.5x vertical rhythm -- the line-height is too much. 40 | 41 | Using this class, it reduces your font size and line-height so that for 42 | every four lines of normal sized type, there is five lines of the sidenote. eg: 43 | 44 | New type size in em's: 45 | 10px (wanted side note size) / 12px (existing base size) = 0.8333 (new type size in ems) 46 | 47 | New line-height value: 48 | 12px x 1.5 = 18px (old line-height) 49 | 18px x 4 = 72px 50 | 72px / 5 = 14.4px (new line height) 51 | 14.4px / 10px = 1.44 (new line height in em's) */ 52 | 53 | p.incr, .incr p { 54 | font-size: 10px; 55 | line-height: 1.44em; 56 | margin-bottom: 1.5em; 57 | } 58 | 59 | 60 | /* Surround uppercase words and abbreviations with this class. 61 | Based on work by Jørgen Arnor Gårdsø Lom [http://twistedintellect.com/] */ 62 | 63 | .caps { 64 | font-variant: small-caps; 65 | letter-spacing: 1px; 66 | text-transform: lowercase; 67 | font-size:1.2em; 68 | line-height:1%; 69 | font-weight:bold; 70 | padding:0 2px; 71 | } 72 | -------------------------------------------------------------------------------- /simsearch/templates/static/about.html: -------------------------------------------------------------------------------- 1 | {% extends "static/base.html" %} 2 | 3 | {% block inner_content %} 4 |

About SimSearch

5 | 6 |

What is it?

7 | 8 |

SimSearch is a visual search-by-similarity for Japanese kanji. Suppose you 9 | encounter a kanji you don't know, but it looks very similar to one you do know. 10 | In this case, you can enter in the one you know as a query, and navigate 11 | through the similarity space until you find the kanji you are looking for.

12 | 13 |

How does it work?

14 | 15 |

In order to determine how similar two kanji are to one another, we use the 16 | stroke edit distance between the two kanji [1]. In other words, we 17 | look at the sequence of strokes used to write each character, and determine how 18 | many changes you'd need to make to turn one kanji's series of strokes into that 19 | of the other kanji. Research so far has that this measure best matches human 20 | judgements of similarity [2].

21 | 22 |

Secondly, search is also adaptive. That is, it will adapt to whatever people 23 | actually find similar when they do searches using the system. For this, we use 24 | Q-learning [3], a well known algorithm for learning the best action to take in 25 | a state space. Q-learning has been used in a wide variety of search 26 | applications, including game players for board games which learn from 27 | experience.

28 | 29 |

Fortunately, systems such as this do not need their own custom dictionaries 30 | of Japanese. For kanji translations and pronunciation, we use the excellent 31 | and free Kanjidic dictionary [4], and for layout we use RaphaelJS [5].

32 | 33 |

How can I help?

34 | 35 |

If you're a learner of Japanese, or a native speaker, please give the system 36 | a try, and send me some feedback. If you're a 37 | programmer, note that SimSearch is open source, so feel free to suggest new 38 | improvements, or even try running your own site.

39 | 40 |
41 | 42 |

References

43 |
    44 |
  1. Wikipedia: Levenshtein 46 | distance
  2. 47 |
  3. Yencken, Lars and Baldwin, Timothy: “Measuring and predicting orthographic 48 | associations: modelling the similarity of Japanese kanji”, in Proceedings of 49 | COLING 2008, Manchester, UK (2008)
  4. 50 |
  5. Wikipedia: Q-learning
  6. 52 |
  7. 53 | The KANJIDIC Home Page (license) 54 |
  8. 55 |
  9. 56 | Raphaël -- JavaScript Library 57 |
  10. 58 |
59 | {% endblock %} 60 | -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/src/ie.css: -------------------------------------------------------------------------------- 1 | /* -------------------------------------------------------------- 2 | 3 | ie.css 4 | 5 | Contains every hack for Internet Explorer, 6 | so that our core files stay sweet and nimble. 7 | 8 | -------------------------------------------------------------- */ 9 | 10 | /* Make sure the layout is centered in IE5 */ 11 | body { text-align: center; } 12 | .container { text-align: left; } 13 | 14 | /* Fixes IE margin bugs */ 15 | * html .column, * html .span-1, * html .span-2, 16 | * html .span-3, * html .span-4, * html .span-5, 17 | * html .span-6, * html .span-7, * html .span-8, 18 | * html .span-9, * html .span-10, * html .span-11, 19 | * html .span-12, * html .span-13, * html .span-14, 20 | * html .span-15, * html .span-16, * html .span-17, 21 | * html .span-18, * html .span-19, * html .span-20, 22 | * html .span-21, * html .span-22, * html .span-23, 23 | * html .span-24 { display:inline; overflow-x: hidden; } 24 | 25 | 26 | /* Elements 27 | -------------------------------------------------------------- */ 28 | 29 | /* Fixes incorrect styling of legend in IE6. */ 30 | * html legend { margin:0px -8px 16px 0; padding:0; } 31 | 32 | /* Fixes wrong line-height on sup/sub in IE. */ 33 | sup { vertical-align:text-top; } 34 | sub { vertical-align:text-bottom; } 35 | 36 | /* Fixes IE7 missing wrapping of code elements. */ 37 | html>body p code { *white-space: normal; } 38 | 39 | /* IE 6&7 has problems with setting proper
margins. */ 40 | hr { margin:-8px auto 11px; } 41 | 42 | /* Explicitly set interpolation, allowing dynamically resized images to not look horrible */ 43 | img { -ms-interpolation-mode:bicubic; } 44 | 45 | /* Clearing 46 | -------------------------------------------------------------- */ 47 | 48 | /* Makes clearfix actually work in IE */ 49 | .clearfix, .container { display:inline-block; } 50 | * html .clearfix, 51 | * html .container { height:1%; } 52 | 53 | 54 | /* Forms 55 | -------------------------------------------------------------- */ 56 | 57 | /* Fixes padding on fieldset */ 58 | fieldset { padding-top:0; } 59 | 60 | /* Makes classic textareas in IE 6 resemble other browsers */ 61 | textarea { overflow:auto; } 62 | 63 | /* Fixes rule that IE 6 ignores */ 64 | input.text, input.title, textarea { background-color:#fff; border:1px solid #bbb; } 65 | input.text:focus, input.title:focus { border-color:#666; } 66 | input.text, input.title, textarea, select { margin:0.5em 0; } 67 | input.checkbox, input.radio { position:relative; top:.25em; } 68 | 69 | /* Fixes alignment of inline form elements */ 70 | form.inline div, form.inline p { vertical-align:middle; } 71 | form.inline label { position:relative;top:-0.25em; } 72 | form.inline input.checkbox, form.inline input.radio, 73 | form.inline input.button, form.inline button { 74 | margin:0.5em 0; 75 | } 76 | button, input.button { position:relative;top:0.25em; } 77 | -------------------------------------------------------------------------------- /simsearch/templates/static/help.html: -------------------------------------------------------------------------------- 1 | {% extends "static/base.html" %} 2 | 3 | {% block sub_headers %} 4 | 12 | {% endblock %} 13 | 14 | {% block inner_content %} 15 |

Help

16 | 17 |

Using similarity search

18 | 19 |

First of all, this site can be used as a basic kanji dictionary. Enter the 20 | kanji into the search box and press return, then click on it again, and you'll 21 | be taken to its translation. More interestingly though, it can also be used to 22 | find a kanji which you don't know how to type in. Here's how it 23 | works.

24 | 25 |

Suppose you ran into the kanji , which you hadn't seen before. It looks a lot like one 27 | you do know, from 働く 28 | hataraku "to work". If you enter as query, 29 | the search display will show you a number of kanji which look similar. 30 | Searching this way, you can quickly find the unknown kanji you want.

31 | 32 |

If you enter a query, and can't immediately see the kanji you're looking 33 | for in the results, you should click on the next closest match. By doing this 34 | a few times, you should hopefully find your target kanji.

35 | 36 |

Whilst this form of search is designed for single-kanji queries, you can 37 | also use it for words by querying any of the kanji in the word. When you find 38 | your target, its translation page provides single-click queries to word-level 39 | dictionaries.

40 | 41 |

Other good dictionaries

42 | 43 |

Whilst searching this way can be fun, it's not appropriate all the time. 44 | You might not know any similar kanji to the one you're trying to find, or you 45 | might just have a difficult one. The best strategy in general is to have 46 | a number of dictionaries you can use, and to pick the best one for the job. We 47 | recommend:

48 | 49 |
    50 |
  • 51 | Forgiving Online Kanji Search
    52 | A word-level dictionary interface for search-by-pronunciation which can correct for common misreadings. 53 |
  • 54 |
  • 55 | WWWJDIC
    56 | The reference-level free word-level dictionary for Japanese, which accepts roomaji input, and occasionally has examples of use. 57 |
  • 58 |
  • 59 | Kansuke 60 |
    61 |
    A kanji-level dictionary based on a simplified method of counting strokes.
    62 |
  • 63 |
  • 64 | Handwritten kanji lookup 65 |
    66 |
    Ben Bullock's handwritten kanji interface, which lets you find a kanji by drawing it.
    67 |
  • 68 |
69 | {% endblock %} 70 | -------------------------------------------------------------------------------- /simsearch/stroke.pyx: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # stroke.pyx 4 | # simsearch 5 | # 6 | # Created by Lars Yencken on 03-09-2010. 7 | # Copyright 2010 Lars Yencken. All rights reserved. 8 | # 9 | 10 | """ 11 | Optimised Levenstein distance calculation between stroke signatures for two 12 | kanji. 13 | """ 14 | 15 | import os 16 | 17 | from cjktools.common import sopen 18 | 19 | from simsearch import settings 20 | 21 | cdef class StrokeEditDistance: 22 | """The edit distance between stroke sequences for both kanji.""" 23 | cdef readonly signatures 24 | cdef readonly object stroke_types 25 | cdef readonly int n_stroke_types 26 | 27 | def __init__(self, input_file=None): 28 | self.stroke_types = {} 29 | self.n_stroke_types = 0 30 | 31 | input_file = input_file or settings.STROKE_SOURCE 32 | self.signatures = {} 33 | i_stream = sopen(input_file) 34 | for i, line in enumerate(i_stream): 35 | kanji, raw_strokes = line.rstrip().split() 36 | raw_strokes = raw_strokes.split(',') 37 | strokes = map(self.get_stroke_type, raw_strokes) 38 | self.signatures[kanji] = strokes 39 | i_stream.close() 40 | 41 | def get_stroke_type(self, stroke): 42 | try: 43 | return self.stroke_types[stroke] 44 | except KeyError: 45 | pass 46 | 47 | self.stroke_types[stroke] = self.n_stroke_types 48 | self.n_stroke_types = self.n_stroke_types + 1 49 | 50 | return self.n_stroke_types - 1 51 | 52 | def raw_distance(self, kanji_a, kanji_b): 53 | s_py = self.signatures[kanji_a] 54 | t_py = self.signatures[kanji_b] 55 | 56 | return edit_distance(s_py, t_py) 57 | 58 | def __call__(self, kanji_a, kanji_b): 59 | s_py = self.signatures[kanji_a] 60 | t_py = self.signatures[kanji_b] 61 | 62 | result = edit_distance(s_py, t_py) 63 | return float(result) / max(len(s_py), len(t_py)) 64 | 65 | def __contains__(self, kanji): 66 | return kanji in self.signatures 67 | 68 | #----------------------------------------------------------------------------# 69 | 70 | cdef edit_distance(s_py, t_py): 71 | cdef int m, n, i, j 72 | cdef int table[100][100] 73 | cdef int s[100] 74 | cdef int t[100] 75 | cdef int up, left, diag, cost 76 | 77 | s_len = len(s_py) 78 | t_len = len(t_py) 79 | if s_len > 99 or t_len > 99: 80 | raise ValueError, "stroke sequences too long" 81 | 82 | for 0 <= i < s_len: 83 | table[i][0] = i 84 | s[i] = s_py[i] 85 | table[s_len][0] = s_len 86 | 87 | for 0 <= j < t_len: 88 | table[0][j] = j 89 | t[j] = t_py[j] 90 | table[0][t_len] = t_len 91 | 92 | for 1 <= i <= s_len: 93 | for 1 <= j <= t_len: 94 | if s[i-1] == t[j-1]: 95 | cost = 0 96 | else: 97 | cost = 1 98 | 99 | up = table[i-1][j] + 1 100 | left = table[i][j-1] + 1 101 | diag = table[i-1][j-1] + cost 102 | if up <= left: 103 | if up <= diag: 104 | table[i][j] = up 105 | else: 106 | table[i][j] = diag 107 | else: 108 | if left <= diag: 109 | table[i][j] = left 110 | else: 111 | table[i][j] = diag 112 | 113 | return table[s_len][t_len] 114 | 115 | # vim: ts=4 sw=4 sts=4 et tw=78: 116 | -------------------------------------------------------------------------------- /SConstruct: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # SConstruct 4 | # simsearch 5 | # 6 | # Created by Lars Yencken on 27-08-2010. 7 | # Copyright 2010 Lars Yencken. All rights reserved. 8 | # 9 | 10 | """ 11 | Build instructions for the stroke similarity extension." 12 | """ 13 | 14 | #----------------------------------------------------------------------------# 15 | 16 | import os 17 | from distutils import sysconfig 18 | 19 | #----------------------------------------------------------------------------# 20 | 21 | # Default include path for python, version inspecific. 22 | #scons_python_version = sysconfig.get_config_var('VERSION') 23 | scons_python_version = '2.7' 24 | python_version = ARGUMENTS.get('python') or scons_python_version 25 | 26 | print 'Using Python %s' % python_version 27 | 28 | #----------------------------------------------------------------------------# 29 | 30 | def check_libraries(env): 31 | """ Check whether the correct libraries exist, and thus whether building 32 | is possible. 33 | """ 34 | # Detect OS X python installation, and attempt to correct for it. 35 | if os.uname()[0] == 'Darwin': 36 | env.Replace(SHLINKFLAGS='$LINKFLAGS -bundle -flat_namespace -undefined suppress') 37 | env.Replace(SHLIBSUFFIX='.so') 38 | if os.path.isdir('/opt/local'): 39 | env.Append( 40 | LIBPATH=['/opt/local/lib'], 41 | CPPPATH=['/opt/local/include'] 42 | ) 43 | 44 | # Detect the presence of necessary dependencies. 45 | conf = Configure(env) 46 | 47 | if not conf.CheckLibWithHeader('m', 'math.h', 'c'): 48 | print "Can't find standard math libraries." 49 | Exit(1) 50 | 51 | if not conf.CheckLibWithHeader('python%s' % python_version, 52 | 'Python.h', 'c'): 53 | print "Can't find python %s." % python_version 54 | Exit(1) 55 | 56 | env = conf.Finish() 57 | 58 | return env 59 | 60 | #----------------------------------------------------------------------------# 61 | # CONFIGURATION 62 | #----------------------------------------------------------------------------# 63 | 64 | # Set up the compilation environment. 65 | env = Environment( 66 | CPPPATH=sysconfig.get_python_inc().replace(scons_python_version, 67 | python_version), 68 | LIBPATH=[sysconfig.get_config_var('LIBPL').replace( 69 | scons_python_version, python_version)], 70 | SHLIBPREFIX='', 71 | LIBS=['python%s' % python_version], 72 | ) 73 | 74 | environmentVars = ( 75 | 'CPATH', 76 | 'LD_LIBRARY_PATH', 77 | 'LIBRARY_PATH', 78 | 'PATH', 79 | 'PYTHONPATH', 80 | ) 81 | 82 | envDict = env['ENV'] 83 | for var in environmentVars: 84 | if var in os.environ: 85 | envDict[var] = os.environ[var] 86 | 87 | # Choose between debugging or optimized mode. 88 | if ARGUMENTS.get('debug'): 89 | print 'Using debug targets' 90 | env.Replace(DEBUG=True, CXXFLAGS='-O0 -g -Wall ', CFLAGS='-O0 -g -Wall ') 91 | else: 92 | print 'Using optimised targets' 93 | env.Replace(DEBUG=False, CXXFLAGS='-O3 -DNDEBUG -Wall ', 94 | CFLAGS='-O3 -DNDEBUG -Wall ') 95 | 96 | # Configure the environment. 97 | env = check_libraries(env) 98 | 99 | pyxbuild = Builder(action='cython -o $TARGET $SOURCE') 100 | env.Append(BUILDERS={'Cython': pyxbuild}) 101 | 102 | #----------------------------------------------------------------------------# 103 | 104 | SConscript('simsearch/SConscript', exports='env') 105 | 106 | #----------------------------------------------------------------------------# 107 | -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/src/typography.css: -------------------------------------------------------------------------------- 1 | /* -------------------------------------------------------------- 2 | 3 | typography.css 4 | * Sets up some sensible default typography. 5 | 6 | -------------------------------------------------------------- */ 7 | 8 | /* Default font settings. 9 | The font-size percentage is of 16px. (0.75 * 16px = 12px) */ 10 | html { font-size:100.01%; } 11 | body { 12 | font-size: 75%; 13 | color: #222; 14 | background: #fff; 15 | font-family: "Helvetica Neue", Arial, Helvetica, sans-serif; 16 | } 17 | 18 | 19 | /* Headings 20 | -------------------------------------------------------------- */ 21 | 22 | h1,h2,h3,h4,h5,h6 { font-weight: normal; color: #111; } 23 | 24 | h1 { font-size: 3em; line-height: 1; margin-bottom: 0.5em; } 25 | h2 { font-size: 2em; margin-bottom: 0.75em; } 26 | h3 { font-size: 1.5em; line-height: 1; margin-bottom: 1em; } 27 | h4 { font-size: 1.2em; line-height: 1.25; margin-bottom: 1.25em; } 28 | h5 { font-size: 1em; font-weight: bold; margin-bottom: 1.5em; } 29 | h6 { font-size: 1em; font-weight: bold; } 30 | 31 | h1 img, h2 img, h3 img, 32 | h4 img, h5 img, h6 img { 33 | margin: 0; 34 | } 35 | 36 | 37 | /* Text elements 38 | -------------------------------------------------------------- */ 39 | 40 | p { margin: 0 0 1.5em; } 41 | p img.left { float: left; margin: 1.5em 1.5em 1.5em 0; padding: 0; } 42 | p img.right { float: right; margin: 1.5em 0 1.5em 1.5em; } 43 | 44 | a:focus, 45 | a:hover { color: #08d; } 46 | a { color: #009; text-decoration: underline; } 47 | 48 | blockquote { margin: 1.5em; color: #666; font-style: italic; } 49 | strong { font-weight: bold; } 50 | em,dfn { font-style: italic; } 51 | dfn { font-weight: bold; } 52 | sup, sub { line-height: 0; } 53 | 54 | abbr, 55 | acronym { border-bottom: 1px dotted #666; } 56 | address { margin: 0 0 1.5em; font-style: italic; } 57 | del { color:#666; } 58 | 59 | pre { margin: 1.5em 0; white-space: pre; } 60 | pre,code,tt { font: 1em 'andale mono', 'lucida console', monospace; line-height: 1.5; } 61 | 62 | 63 | /* Lists 64 | -------------------------------------------------------------- */ 65 | 66 | li ul, 67 | li ol { margin: 0; } 68 | ul, ol { margin: 0 1.5em 1.5em 0; padding-left: 3.333em; } 69 | 70 | ul { list-style-type: disc; } 71 | ol { list-style-type: decimal; } 72 | 73 | dl { margin: 0 0 1.5em 0; } 74 | dl dt { font-weight: bold; } 75 | dd { margin-left: 1.5em;} 76 | 77 | 78 | /* Tables 79 | -------------------------------------------------------------- */ 80 | 81 | table { margin-bottom: 1.4em; width:100%; } 82 | th { font-weight: bold; } 83 | thead th { background: #c3d9ff; } 84 | th,td,caption { padding: 4px 10px 4px 5px; } 85 | tr.even td { background: #e5ecf9; } 86 | tfoot { font-style: italic; } 87 | caption { background: #eee; } 88 | 89 | 90 | /* Misc classes 91 | -------------------------------------------------------------- */ 92 | 93 | .small { font-size: .8em; margin-bottom: 1.875em; line-height: 1.875em; } 94 | .large { font-size: 1.2em; line-height: 2.5em; margin-bottom: 1.25em; } 95 | .hide { display: none; } 96 | 97 | .quiet { color: #666; } 98 | .loud { color: #000; } 99 | .highlight { background:#ff0; } 100 | .added { background:#060; color: #fff; } 101 | .removed { background:#900; color: #fff; } 102 | 103 | .first { margin-left:0; padding-left:0; } 104 | .last { margin-right:0; padding-right:0; } 105 | .top { margin-top:0; padding-top:0; } 106 | .bottom { margin-bottom:0; padding-bottom:0; } 107 | -------------------------------------------------------------------------------- /simsearch/experiments/simulate_accessibility.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # simulate_accessibility.py 5 | # simsearch 6 | # 7 | # Created by Lars Yencken on 05-09-2010. 8 | # Copyright 2010 Lars Yencken. All rights reserved. 9 | # 10 | 11 | """ 12 | A basic simulation of accessibility improvements estimated from use of visual 13 | similarity search. 14 | """ 15 | 16 | import os 17 | import sys 18 | import optparse 19 | import codecs 20 | import random 21 | 22 | from simplestats import FreqDist, basic_stats 23 | 24 | from simsearch import settings 25 | from simsearch.search import models 26 | 27 | DEFAULT_THRESHOLD = 0.95 28 | 29 | def simulate_accessibility(output_file, threshold=DEFAULT_THRESHOLD): 30 | print 'Loading frequency distribution' 31 | dist = FreqDist.from_file(settings.FREQ_SOURCE) 32 | 33 | print 'Loading kanji' 34 | kanji_set = list(models._get_kanji()) 35 | random.seed(123456789) 36 | random.shuffle(kanji_set) 37 | 38 | kanji_in_order = sorted(kanji_set, key=lambda k: dist.prob(k)) 39 | 40 | print 'Loading graph' 41 | graph = RestrictedGraph() 42 | 43 | print 'Dumping frequencies to %s' % os.path.basename(output_file) 44 | n_neighbours = [] 45 | with codecs.open(output_file, 'w', 'utf8') as ostream: 46 | print >> ostream, u'#n_known,n_accessible' 47 | print >> ostream, u'%d,%d' % (0, 0) 48 | known_set = set() 49 | accessible_set = set() 50 | for i, kanji in enumerate(kanji_in_order): 51 | known_set.add(kanji) 52 | accessible_set.add(kanji) 53 | 54 | neighbours = graph[kanji] 55 | accessible_set.update(neighbours) 56 | n_neighbours.append(len(neighbours)) 57 | 58 | if (i + 1) % 50 == 0: 59 | print >> ostream, u'%d,%d' % (len(known_set), 60 | len(accessible_set)) 61 | print >> ostream, u'%d,%d' % (len(known_set), len(accessible_set)) 62 | 63 | print 'Average neighbourhood size: %.02f (σ = %.02f)' % \ 64 | basic_stats(n_neighbours) 65 | 66 | class RestrictedGraph(object): 67 | def __init__(self, threshold=DEFAULT_THRESHOLD): 68 | self._graph = models.Similarity.load() 69 | self._threshold = threshold 70 | 71 | def __getitem__(self, kanji): 72 | neighbour_heap = self._graph[kanji] 73 | ordered_neighbourhood = sorted(neighbour_heap.get_contents(), 74 | reverse=True) 75 | 76 | first_sim, first_neighbour = ordered_neighbourhood[0] 77 | cutoff_neighbours = set(n for (s, n) in ordered_neighbourhood 78 | if s >= self._threshold * first_sim) 79 | 80 | return cutoff_neighbours 81 | 82 | #----------------------------------------------------------------------------# 83 | 84 | def _create_option_parser(): 85 | usage = \ 86 | """%prog [options] output_file.csv 87 | 88 | Simulates how many kanji are accessible as kanji are learned, assuming they 89 | are studied in frequency order.""" 90 | 91 | parser = optparse.OptionParser(usage) 92 | 93 | parser.add_option('-t', action='store', dest='threshold', 94 | default=DEFAULT_THRESHOLD, type='float', 95 | help='The neighbourhood cutoff threshold [%.02f]' % \ 96 | DEFAULT_THRESHOLD) 97 | 98 | return parser 99 | 100 | def main(argv): 101 | parser = _create_option_parser() 102 | (options, args) = parser.parse_args(argv) 103 | 104 | if len(args) != 1: 105 | parser.print_help() 106 | sys.exit(1) 107 | 108 | simulate_accessibility(args[0], threshold=options.threshold) 109 | 110 | #----------------------------------------------------------------------------# 111 | 112 | if __name__ == '__main__': 113 | main(sys.argv[1:]) 114 | 115 | # vim: ts=4 sw=4 sts=4 et tw=78: 116 | -------------------------------------------------------------------------------- /simsearch/templates/search/display.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block headers %} 4 | 5 | 6 | 7 | 8 | 125 | {% endblock %} 126 | 127 | -------------------------------------------------------------------------------- /simsearch/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # __init__.py 4 | # simsearch 5 | # 6 | # Created by Lars Yencken on 2011-10-02. 7 | # Copyright 2011 Lars Yencken. All rights reserved. 8 | # 9 | 10 | """ 11 | The server for simsearch. 12 | """ 13 | 14 | import os 15 | 16 | import flask 17 | from cjktools import scripts 18 | import mercurial.hg 19 | import mercurial.ui 20 | import mercurial.node 21 | import simplejson 22 | import mongoengine 23 | 24 | import models 25 | 26 | app = flask.Flask(__name__) 27 | app.config.from_object('simsearch.settings') 28 | 29 | if 'SIMSEARCH_SETTINGS' in os.environ: 30 | app.config.from_envvar('SIMSEARCH_SETTINGS') 31 | 32 | @app.route('/help/') 33 | def help(): 34 | c = base_context() 35 | return flask.render_template('static/help.html', **c) 36 | 37 | @app.route('/feedback/') 38 | def feedback(): 39 | c = base_context() 40 | return flask.render_template("static/feedback.html", **c) 41 | 42 | @app.route('/about/') 43 | def about(): 44 | c = base_context() 45 | return flask.render_template("static/about.html", **c) 46 | 47 | @app.route('/') 48 | def index(): 49 | "Renders the search display." 50 | kanji = flask.request.args.get('kanji', '') 51 | kanji_ok = _is_kanji(kanji) 52 | context = base_context() 53 | 54 | context.update({ 55 | 'kanji': kanji, 56 | 'kanji_ok': kanji_ok, 57 | }) 58 | if not kanji or not kanji_ok: 59 | # show the search dialog 60 | if kanji: 61 | context['error'] = 'Please enter a single kanji only as input.' 62 | return flask.render_template('search/index.html', **context) 63 | 64 | try: 65 | node = models.Node.objects.get(pivot=kanji) 66 | except mongoengine.queryset.DoesNotExist: 67 | context['error'] = u'Sorry, %s not found' % kanji 68 | return flask.render_template('search/index.html', **context) 69 | 70 | # make sure the path is ok 71 | path = flask.request.args.get('path', '') 72 | if not all(map(_is_kanji, path)): 73 | path = [] 74 | 75 | path = list(path) + [kanji] 76 | neighbours = [n.kanji for n in sorted(node.neighbours, reverse=True)] 77 | neighbours = neighbours[:app.config['N_NEIGHBOURS_RECALLED']] 78 | 79 | context.update({'data': simplejson.dumps({ 80 | 'kanji': kanji, 81 | 'tier1': neighbours[:4], 82 | 'tier2': neighbours[4:9], 83 | 'tier3': neighbours[9:], 84 | 'path': ''.join(path), 85 | })}) 86 | return flask.render_template('search/display.html', **context) 87 | 88 | @app.route('/translate//') 89 | def translate(kanji): 90 | "Updates the query model before redirecting to the real translation." 91 | kanji = kanji or flask.request.args.get('kanji') 92 | if not _is_kanji(kanji): 93 | flask.abort(404) 94 | 95 | path = flask.request.args.get('path') 96 | if path and len(path) > 1 and all(map(_is_kanji, path)) \ 97 | and path.endswith(kanji): 98 | models.Node.update(path) 99 | models.Trace.log(flask.request, path) 100 | 101 | t = models.Translation.objects.get(kanji=kanji) 102 | if t is None: 103 | flask.abort(404) 104 | 105 | c = base_context() 106 | c['translation'] = t 107 | return flask.render_template('translate/kanji.html', **c) 108 | 109 | @app.route('/search//') 110 | def search_json(pivot): 111 | "Returns the search display data as JSON." 112 | pivot = pivot or flask.request.args.get('pivot') 113 | node = models.Node.objects.get(pivot=pivot) 114 | neighbours = [n.kanji for n in sorted(node.neighbours, reverse=True)] 115 | neighbours = neighbours[:app.conf['N_NEIGHBOURS_RECALLED']] 116 | 117 | return flask.jsonify( 118 | pivot_kanji=pivot, 119 | tier1=neighbours[:4], 120 | tier2=neighbours[4:9], 121 | tier3=neighbours[9:], 122 | ) 123 | 124 | def _is_kanji(kanji): 125 | return isinstance(kanji, unicode) and len(kanji) == 1 \ 126 | and scripts.script_type(kanji) == scripts.Script.Kanji 127 | 128 | def base_context(): 129 | c = {} 130 | c.update(mercurial_revision()) 131 | c.update(site_settings()) 132 | return c 133 | 134 | def mercurial_revision(): 135 | project_base = os.path.join(app.config['PROJECT_ROOT'], '..') 136 | repo = mercurial.hg.repository(mercurial.ui.ui(), project_base) 137 | fctx = repo.filectx(project_base, 'tip') 138 | 139 | return {'revision': { 140 | 'short': mercurial.node.short(fctx.node()), 141 | 'number': fctx.rev(), 142 | }} 143 | 144 | def site_settings(): 145 | return {'settings': app.config, 'MEDIA_URL': app.config['MEDIA_URL']} 146 | -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/plugins/rtl/screen.css: -------------------------------------------------------------------------------- 1 | /* -------------------------------------------------------------- 2 | 3 | rtl.css 4 | * Mirrors Blueprint for left-to-right languages 5 | 6 | By Ran Yaniv Hartstein [ranh.co.il] 7 | 8 | -------------------------------------------------------------- */ 9 | 10 | body .container { direction: rtl; } 11 | body .column, body .span-1, body .span-2, body .span-3, body .span-4, body .span-5, body .span-6, body .span-7, body .span-8, body .span-9, body .span-10, body .span-11, body .span-12, body .span-13, body .span-14, body .span-15, body .span-16, body .span-17, body .span-18, body .span-19, body .span-20, body .span-21, body .span-22, body .span-23, body .span-24 { 12 | float: right; 13 | margin-right: 0; 14 | margin-left: 10px; 15 | text-align:right; 16 | } 17 | 18 | body div.last { margin-left: 0; } 19 | body table .last { padding-left: 0; } 20 | 21 | body .append-1 { padding-right: 0; padding-left: 40px; } 22 | body .append-2 { padding-right: 0; padding-left: 80px; } 23 | body .append-3 { padding-right: 0; padding-left: 120px; } 24 | body .append-4 { padding-right: 0; padding-left: 160px; } 25 | body .append-5 { padding-right: 0; padding-left: 200px; } 26 | body .append-6 { padding-right: 0; padding-left: 240px; } 27 | body .append-7 { padding-right: 0; padding-left: 280px; } 28 | body .append-8 { padding-right: 0; padding-left: 320px; } 29 | body .append-9 { padding-right: 0; padding-left: 360px; } 30 | body .append-10 { padding-right: 0; padding-left: 400px; } 31 | body .append-11 { padding-right: 0; padding-left: 440px; } 32 | body .append-12 { padding-right: 0; padding-left: 480px; } 33 | body .append-13 { padding-right: 0; padding-left: 520px; } 34 | body .append-14 { padding-right: 0; padding-left: 560px; } 35 | body .append-15 { padding-right: 0; padding-left: 600px; } 36 | body .append-16 { padding-right: 0; padding-left: 640px; } 37 | body .append-17 { padding-right: 0; padding-left: 680px; } 38 | body .append-18 { padding-right: 0; padding-left: 720px; } 39 | body .append-19 { padding-right: 0; padding-left: 760px; } 40 | body .append-20 { padding-right: 0; padding-left: 800px; } 41 | body .append-21 { padding-right: 0; padding-left: 840px; } 42 | body .append-22 { padding-right: 0; padding-left: 880px; } 43 | body .append-23 { padding-right: 0; padding-left: 920px; } 44 | 45 | body .prepend-1 { padding-left: 0; padding-right: 40px; } 46 | body .prepend-2 { padding-left: 0; padding-right: 80px; } 47 | body .prepend-3 { padding-left: 0; padding-right: 120px; } 48 | body .prepend-4 { padding-left: 0; padding-right: 160px; } 49 | body .prepend-5 { padding-left: 0; padding-right: 200px; } 50 | body .prepend-6 { padding-left: 0; padding-right: 240px; } 51 | body .prepend-7 { padding-left: 0; padding-right: 280px; } 52 | body .prepend-8 { padding-left: 0; padding-right: 320px; } 53 | body .prepend-9 { padding-left: 0; padding-right: 360px; } 54 | body .prepend-10 { padding-left: 0; padding-right: 400px; } 55 | body .prepend-11 { padding-left: 0; padding-right: 440px; } 56 | body .prepend-12 { padding-left: 0; padding-right: 480px; } 57 | body .prepend-13 { padding-left: 0; padding-right: 520px; } 58 | body .prepend-14 { padding-left: 0; padding-right: 560px; } 59 | body .prepend-15 { padding-left: 0; padding-right: 600px; } 60 | body .prepend-16 { padding-left: 0; padding-right: 640px; } 61 | body .prepend-17 { padding-left: 0; padding-right: 680px; } 62 | body .prepend-18 { padding-left: 0; padding-right: 720px; } 63 | body .prepend-19 { padding-left: 0; padding-right: 760px; } 64 | body .prepend-20 { padding-left: 0; padding-right: 800px; } 65 | body .prepend-21 { padding-left: 0; padding-right: 840px; } 66 | body .prepend-22 { padding-left: 0; padding-right: 880px; } 67 | body .prepend-23 { padding-left: 0; padding-right: 920px; } 68 | 69 | body .border { 70 | padding-right: 0; 71 | padding-left: 4px; 72 | margin-right: 0; 73 | margin-left: 5px; 74 | border-right: none; 75 | border-left: 1px solid #eee; 76 | } 77 | 78 | body .colborder { 79 | padding-right: 0; 80 | padding-left: 24px; 81 | margin-right: 0; 82 | margin-left: 25px; 83 | border-right: none; 84 | border-left: 1px solid #eee; 85 | } 86 | 87 | body .pull-1 { margin-left: 0; margin-right: -40px; } 88 | body .pull-2 { margin-left: 0; margin-right: -80px; } 89 | body .pull-3 { margin-left: 0; margin-right: -120px; } 90 | body .pull-4 { margin-left: 0; margin-right: -160px; } 91 | 92 | body .push-0 { margin: 0 18px 0 0; } 93 | body .push-1 { margin: 0 18px 0 -40px; } 94 | body .push-2 { margin: 0 18px 0 -80px; } 95 | body .push-3 { margin: 0 18px 0 -120px; } 96 | body .push-4 { margin: 0 18px 0 -160px; } 97 | body .push-0, body .push-1, body .push-2, 98 | body .push-3, body .push-4 { float: left; } 99 | 100 | 101 | /* Typography with RTL support */ 102 | body h1,body h2,body h3, 103 | body h4,body h5,body h6 { font-family: Arial, sans-serif; } 104 | html body { font-family: Arial, sans-serif; } 105 | body pre,body code,body tt { font-family: monospace; } 106 | 107 | /* Mirror floats and margins on typographic elements */ 108 | body p img { float: right; margin: 1.5em 0 1.5em 1.5em; } 109 | body dd, body ul, body ol { margin-left: 0; margin-right: 1.5em;} 110 | body td, body th { text-align:right; } 111 | -------------------------------------------------------------------------------- /simsearch/experiments/simulate_search.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # simulate_search.py 5 | # simsearch 6 | # 7 | # Created by Lars Yencken on 03-09-2010. 8 | # Copyright 2010 Lars Yencken. All rights reserved. 9 | # 10 | 11 | """ 12 | A script to simulate how users might search with the system. Query paths 13 | generated by this script can be analysed as a form of intrinsic evaluation. 14 | """ 15 | 16 | import os 17 | import sys 18 | import optparse 19 | import codecs 20 | import random 21 | 22 | from consoleLog import withProgress 23 | 24 | from simsearch import settings 25 | from simsearch.search import stroke, models 26 | 27 | def simulate_search(output_file, strategy='greedy', 28 | k=settings.N_NEIGHBOURS_RECALLED, error_rate=0.0): 29 | """ 30 | Simulate user searches on every query/target pair from the flashcard 31 | dataset, using one of the available strategies. The resulting query paths 32 | are dumped to the specified file. 33 | """ 34 | if strategy == 'greedy': 35 | search_fn = _greedy_search 36 | elif strategy == 'shortest': 37 | search_fn = _breadth_first_search 38 | elif strategy == 'random': 39 | random.seed(123456789) 40 | search_fn = _random_stumble 41 | else: 42 | raise ValueError(strategy) 43 | 44 | traces = [] 45 | for query, target in withProgress(_load_search_examples()): 46 | path = search_fn(query, target, k=k, error_rate=error_rate) 47 | traces.append((query, target, path)) 48 | 49 | TraceFile.save(traces, output_file) 50 | print 'Paths dumped to %s' % output_file 51 | 52 | class TraceFile(object): 53 | "A basic human-readable query path file format." 54 | @staticmethod 55 | def save(traces, filename): 56 | with codecs.open(filename, 'w', 'utf8') as ostream: 57 | print >> ostream, u"#query\ttarget\tvia" 58 | for query, target, path in traces: 59 | if path: 60 | assert path[0] == query 61 | # have at least a partial search 62 | if path[-1] == target: 63 | # success 64 | print >> ostream, u'%s\t%s\t[%s]' % (query, target, 65 | ''.join(path[1:-1])) 66 | else: 67 | # failure with partial path 68 | print >> ostream, u'%s\t(%s)\t[%s]' % (query, target, 69 | ''.join(path[1:])) 70 | 71 | else: 72 | # failure without partial path 73 | print >> ostream, u'%s\t(%s)\tNone' % (query, target) 74 | 75 | @staticmethod 76 | def load(filename): 77 | traces = [] 78 | with codecs.open(filename, 'r', 'utf8') as istream: 79 | header = istream.next() 80 | assert header.startswith('#') 81 | for line in istream: 82 | query, target, path = line.rstrip().split('\t') 83 | if len(target) != 1: 84 | target = target.strip('()') 85 | was_success = False 86 | assert len(target) == 1 87 | else: 88 | was_success = True 89 | 90 | if path == 'None': 91 | path = None 92 | else: 93 | path = [query] + list(path.strip('[]')) 94 | if was_success: 95 | path.append(target) 96 | 97 | traces.append((query, target, path)) 98 | 99 | return traces 100 | 101 | def _load_search_examples(): 102 | flashcard_file = os.path.join(settings.DATA_DIR, 'similarity', 'flashcard') 103 | results = [] 104 | with codecs.open(flashcard_file, 'r', 'utf8') as istream: 105 | for line in istream: 106 | _id, query, targets = line.split() 107 | for target in targets: 108 | results.append((query, target)) 109 | 110 | return results 111 | 112 | def _greedy_search(query, target, limit=5, k=settings.N_NEIGHBOURS_RECALLED, 113 | error_rate=0.0): 114 | """ 115 | Simulate a search between the query and target where the user always 116 | chooses the next kanji which looks closest to the target. 117 | """ 118 | assert query != target 119 | if query not in sed or target not in sed: 120 | # we can't simulate this search type without using a distance 121 | # heuristic 122 | return 123 | 124 | path = [query] 125 | while path[-1] != target and len(path) <= limit: 126 | assert path[0] == query 127 | 128 | new_query = path[-1] 129 | neighbours = _get_neighbours(new_query, k=k) 130 | 131 | if target in neighbours: 132 | if error_rate == 0.0 or random.random() < (1 - error_rate)**k: 133 | # Success! 134 | path.append(target) 135 | return path 136 | 137 | # Recognition error =( 138 | neighbours.remove(target) 139 | 140 | # Our options are neighbours we haven't tried yet 141 | options = neighbours.difference(path) 142 | 143 | if not options: 144 | # Search exhausted =( 145 | break 146 | 147 | # Choose the one visually most similar to the target 148 | _d, neighbour = min((sed(n, target), n) for n in options) 149 | path.append(neighbour) 150 | 151 | assert path[0] == query and path[-1] != target 152 | 153 | return path 154 | 155 | def _breadth_first_search(query, target, limit=5, 156 | k=settings.N_NEIGHBOURS_RECALLED, error_rate=0.0): 157 | """ 158 | Perform breadth first search to a fixed depth limit, returning the 159 | shortest path from the query to the target (within the limit). 160 | """ 161 | paths = [[query]] 162 | shortest = set([query]) # has a shortest path been checked 163 | while paths: 164 | current = paths.pop(0) 165 | current_query = current[-1] 166 | neighbours = _get_neighbours(current_query, k=k) 167 | 168 | if target in neighbours: 169 | current.append(target) 170 | assert current[0] == query 171 | return current 172 | 173 | if len(current) < limit: 174 | # visit in similarity order if possible 175 | try: 176 | neighbours = sorted(neighbours, key=lambda n: sed(n, target)) 177 | except KeyError: 178 | pass 179 | neighbours = [n for n in neighbours if n not in shortest] 180 | shortest.update(neighbours) 181 | paths.extend((current + [n]) for n in neighbours) 182 | 183 | def _random_stumble(query, target, limit=5, k=settings.N_NEIGHBOURS_RECALLED, 184 | error_rate=0.0): 185 | """ 186 | A worst-case simulation of user search, completely unguided by the 187 | target kanji (except for the initial query). 188 | """ 189 | path = [query] 190 | while len(path) <= limit: 191 | neighbours = _get_neighbours(path[-1], k=k) 192 | if target in neighbours: 193 | if error_rate == 0.0 or random.random() < (1 - error_rate)**k: 194 | return path + [target] 195 | 196 | neighbours.remove(target) 197 | 198 | path.append(random.choice(list(neighbours))) 199 | 200 | return path 201 | 202 | class cache(object): 203 | """ 204 | A simple cache wrapper whose contents never expire. Useful for reducing 205 | expensive calls on small datasets. 206 | """ 207 | def __init__(self, f): 208 | self.f = f 209 | self._cached = {} 210 | 211 | def __call__(self, *args, **kwargs): 212 | key = args + tuple(kwargs.items()) 213 | if key not in self._cached: 214 | self._cached[key] = self.f(*args, **kwargs) 215 | 216 | return self._cached[key] 217 | 218 | def __contains__(self, key): 219 | # workaround for StrokeEditDistance also acting like a container 220 | return self.f.__contains__(key) 221 | 222 | @cache 223 | def _get_neighbours(query, k=settings.N_NEIGHBOURS_RECALLED): 224 | neighbours = set(n.kanji for n in models.Node.objects.get( 225 | pivot=query).neighbours[:k]) 226 | return neighbours 227 | 228 | sed = cache(stroke.StrokeEditDistance()) 229 | 230 | #----------------------------------------------------------------------------# 231 | 232 | def _create_option_parser(): 233 | usage = \ 234 | """%prog [options] output_file 235 | 236 | Simulate queries through the search graph, dumping the traces to the given 237 | file.""" 238 | 239 | parser = optparse.OptionParser(usage) 240 | 241 | parser.add_option('--strategy', action='store', type='choice', 242 | choices=['greedy', 'shortest', 'random'], dest='strategy', 243 | default='greedy', 244 | help='The search strategy to use ([greedy]/shortest/random)') 245 | 246 | parser.add_option('-k', action='store', type='int', 247 | default=settings.N_NEIGHBOURS_RECALLED, dest='k', 248 | help='The number of neighbours displayed each query [%d]' % \ 249 | settings.N_NEIGHBOURS_RECALLED) 250 | 251 | parser.add_option('-e', action='store', type='float', 252 | default=0.0, dest='error_rate', 253 | help='Factor in an estimated recognition error rate [0.0]') 254 | 255 | return parser 256 | 257 | def main(argv): 258 | parser = _create_option_parser() 259 | (options, args) = parser.parse_args(argv) 260 | 261 | if len(args) != 1: 262 | parser.print_help() 263 | sys.exit(1) 264 | 265 | simulate_search(args[0], strategy=options.strategy, k=options.k, 266 | error_rate=options.error_rate) 267 | 268 | #----------------------------------------------------------------------------# 269 | 270 | if __name__ == '__main__': 271 | main(sys.argv[1:]) 272 | 273 | # vim: ts=4 sw=4 sts=4 et tw=78: 274 | -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/src/grid.css: -------------------------------------------------------------------------------- 1 | /* -------------------------------------------------------------- 2 | 3 | grid.css 4 | * Sets up an easy-to-use grid of 24 columns. 5 | 6 | By default, the grid is 950px wide, with 24 columns 7 | spanning 30px, and a 10px margin between columns. 8 | 9 | If you need fewer or more columns, namespaces or semantic 10 | element names, use the compressor script (lib/compress.rb) 11 | 12 | -------------------------------------------------------------- */ 13 | 14 | /* A container should group all your columns. */ 15 | .container { 16 | width: 950px; 17 | margin: 0 auto; 18 | } 19 | 20 | /* Use this class on any .span / container to see the grid. */ 21 | .showgrid { 22 | background: url(src/grid.png); 23 | } 24 | 25 | 26 | /* Columns 27 | -------------------------------------------------------------- */ 28 | 29 | /* Sets up basic grid floating and margin. */ 30 | .column, .span-1, .span-2, .span-3, .span-4, .span-5, .span-6, .span-7, .span-8, .span-9, .span-10, .span-11, .span-12, .span-13, .span-14, .span-15, .span-16, .span-17, .span-18, .span-19, .span-20, .span-21, .span-22, .span-23, .span-24 { 31 | float: left; 32 | margin-right: 10px; 33 | } 34 | 35 | /* The last column in a row needs this class. */ 36 | .last { margin-right: 0; } 37 | 38 | /* Use these classes to set the width of a column. */ 39 | .span-1 {width: 30px;} 40 | 41 | .span-2 {width: 70px;} 42 | .span-3 {width: 110px;} 43 | .span-4 {width: 150px;} 44 | .span-5 {width: 190px;} 45 | .span-6 {width: 230px;} 46 | .span-7 {width: 270px;} 47 | .span-8 {width: 310px;} 48 | .span-9 {width: 350px;} 49 | .span-10 {width: 390px;} 50 | .span-11 {width: 430px;} 51 | .span-12 {width: 470px;} 52 | .span-13 {width: 510px;} 53 | .span-14 {width: 550px;} 54 | .span-15 {width: 590px;} 55 | .span-16 {width: 630px;} 56 | .span-17 {width: 670px;} 57 | .span-18 {width: 710px;} 58 | .span-19 {width: 750px;} 59 | .span-20 {width: 790px;} 60 | .span-21 {width: 830px;} 61 | .span-22 {width: 870px;} 62 | .span-23 {width: 910px;} 63 | .span-24 {width:950px; margin-right:0;} 64 | 65 | /* Use these classes to set the width of an input. */ 66 | input.span-1, textarea.span-1, input.span-2, textarea.span-2, input.span-3, textarea.span-3, input.span-4, textarea.span-4, input.span-5, textarea.span-5, input.span-6, textarea.span-6, input.span-7, textarea.span-7, input.span-8, textarea.span-8, input.span-9, textarea.span-9, input.span-10, textarea.span-10, input.span-11, textarea.span-11, input.span-12, textarea.span-12, input.span-13, textarea.span-13, input.span-14, textarea.span-14, input.span-15, textarea.span-15, input.span-16, textarea.span-16, input.span-17, textarea.span-17, input.span-18, textarea.span-18, input.span-19, textarea.span-19, input.span-20, textarea.span-20, input.span-21, textarea.span-21, input.span-22, textarea.span-22, input.span-23, textarea.span-23, input.span-24, textarea.span-24 { 67 | border-left-width: 1px; 68 | border-right-width: 1px; 69 | padding-left: 5px; 70 | padding-right: 5px; 71 | } 72 | 73 | input.span-1, textarea.span-1 { width: 18px; } 74 | input.span-2, textarea.span-2 { width: 58px; } 75 | input.span-3, textarea.span-3 { width: 98px; } 76 | input.span-4, textarea.span-4 { width: 138px; } 77 | input.span-5, textarea.span-5 { width: 178px; } 78 | input.span-6, textarea.span-6 { width: 218px; } 79 | input.span-7, textarea.span-7 { width: 258px; } 80 | input.span-8, textarea.span-8 { width: 298px; } 81 | input.span-9, textarea.span-9 { width: 338px; } 82 | input.span-10, textarea.span-10 { width: 378px; } 83 | input.span-11, textarea.span-11 { width: 418px; } 84 | input.span-12, textarea.span-12 { width: 458px; } 85 | input.span-13, textarea.span-13 { width: 498px; } 86 | input.span-14, textarea.span-14 { width: 538px; } 87 | input.span-15, textarea.span-15 { width: 578px; } 88 | input.span-16, textarea.span-16 { width: 618px; } 89 | input.span-17, textarea.span-17 { width: 658px; } 90 | input.span-18, textarea.span-18 { width: 698px; } 91 | input.span-19, textarea.span-19 { width: 738px; } 92 | input.span-20, textarea.span-20 { width: 778px; } 93 | input.span-21, textarea.span-21 { width: 818px; } 94 | input.span-22, textarea.span-22 { width: 858px; } 95 | input.span-23, textarea.span-23 { width: 898px; } 96 | input.span-24, textarea.span-24 { width: 938px; } 97 | 98 | /* Add these to a column to append empty cols. */ 99 | 100 | .append-1 { padding-right: 40px;} 101 | .append-2 { padding-right: 80px;} 102 | .append-3 { padding-right: 120px;} 103 | .append-4 { padding-right: 160px;} 104 | .append-5 { padding-right: 200px;} 105 | .append-6 { padding-right: 240px;} 106 | .append-7 { padding-right: 280px;} 107 | .append-8 { padding-right: 320px;} 108 | .append-9 { padding-right: 360px;} 109 | .append-10 { padding-right: 400px;} 110 | .append-11 { padding-right: 440px;} 111 | .append-12 { padding-right: 480px;} 112 | .append-13 { padding-right: 520px;} 113 | .append-14 { padding-right: 560px;} 114 | .append-15 { padding-right: 600px;} 115 | .append-16 { padding-right: 640px;} 116 | .append-17 { padding-right: 680px;} 117 | .append-18 { padding-right: 720px;} 118 | .append-19 { padding-right: 760px;} 119 | .append-20 { padding-right: 800px;} 120 | .append-21 { padding-right: 840px;} 121 | .append-22 { padding-right: 880px;} 122 | .append-23 { padding-right: 920px;} 123 | 124 | /* Add these to a column to prepend empty cols. */ 125 | 126 | .prepend-1 { padding-left: 40px;} 127 | .prepend-2 { padding-left: 80px;} 128 | .prepend-3 { padding-left: 120px;} 129 | .prepend-4 { padding-left: 160px;} 130 | .prepend-5 { padding-left: 200px;} 131 | .prepend-6 { padding-left: 240px;} 132 | .prepend-7 { padding-left: 280px;} 133 | .prepend-8 { padding-left: 320px;} 134 | .prepend-9 { padding-left: 360px;} 135 | .prepend-10 { padding-left: 400px;} 136 | .prepend-11 { padding-left: 440px;} 137 | .prepend-12 { padding-left: 480px;} 138 | .prepend-13 { padding-left: 520px;} 139 | .prepend-14 { padding-left: 560px;} 140 | .prepend-15 { padding-left: 600px;} 141 | .prepend-16 { padding-left: 640px;} 142 | .prepend-17 { padding-left: 680px;} 143 | .prepend-18 { padding-left: 720px;} 144 | .prepend-19 { padding-left: 760px;} 145 | .prepend-20 { padding-left: 800px;} 146 | .prepend-21 { padding-left: 840px;} 147 | .prepend-22 { padding-left: 880px;} 148 | .prepend-23 { padding-left: 920px;} 149 | 150 | 151 | /* Border on right hand side of a column. */ 152 | .border { 153 | padding-right: 4px; 154 | margin-right: 5px; 155 | border-right: 1px solid #ddd; 156 | } 157 | 158 | /* Border with more whitespace, spans one column. */ 159 | .colborder { 160 | padding-right: 24px; 161 | margin-right: 25px; 162 | border-right: 1px solid #ddd; 163 | } 164 | 165 | 166 | /* Use these classes on an element to push it into the 167 | next column, or to pull it into the previous column. */ 168 | 169 | 170 | .pull-1 { margin-left: -40px; } 171 | .pull-2 { margin-left: -80px; } 172 | .pull-3 { margin-left: -120px; } 173 | .pull-4 { margin-left: -160px; } 174 | .pull-5 { margin-left: -200px; } 175 | .pull-6 { margin-left: -240px; } 176 | .pull-7 { margin-left: -280px; } 177 | .pull-8 { margin-left: -320px; } 178 | .pull-9 { margin-left: -360px; } 179 | .pull-10 { margin-left: -400px; } 180 | .pull-11 { margin-left: -440px; } 181 | .pull-12 { margin-left: -480px; } 182 | .pull-13 { margin-left: -520px; } 183 | .pull-14 { margin-left: -560px; } 184 | .pull-15 { margin-left: -600px; } 185 | .pull-16 { margin-left: -640px; } 186 | .pull-17 { margin-left: -680px; } 187 | .pull-18 { margin-left: -720px; } 188 | .pull-19 { margin-left: -760px; } 189 | .pull-20 { margin-left: -800px; } 190 | .pull-21 { margin-left: -840px; } 191 | .pull-22 { margin-left: -880px; } 192 | .pull-23 { margin-left: -920px; } 193 | .pull-24 { margin-left: -960px; } 194 | 195 | .pull-1, .pull-2, .pull-3, .pull-4, .pull-5, .pull-6, .pull-7, .pull-8, .pull-9, .pull-10, .pull-11, .pull-12, .pull-13, .pull-14, .pull-15, .pull-16, .pull-17, .pull-18, .pull-19, .pull-20, .pull-21, .pull-22, .pull-23, .pull-24 {float: left; position:relative;} 196 | 197 | 198 | .push-1 { margin: 0 -40px 1.5em 40px; } 199 | .push-2 { margin: 0 -80px 1.5em 80px; } 200 | .push-3 { margin: 0 -120px 1.5em 120px; } 201 | .push-4 { margin: 0 -160px 1.5em 160px; } 202 | .push-5 { margin: 0 -200px 1.5em 200px; } 203 | .push-6 { margin: 0 -240px 1.5em 240px; } 204 | .push-7 { margin: 0 -280px 1.5em 280px; } 205 | .push-8 { margin: 0 -320px 1.5em 320px; } 206 | .push-9 { margin: 0 -360px 1.5em 360px; } 207 | .push-10 { margin: 0 -400px 1.5em 400px; } 208 | .push-11 { margin: 0 -440px 1.5em 440px; } 209 | .push-12 { margin: 0 -480px 1.5em 480px; } 210 | .push-13 { margin: 0 -520px 1.5em 520px; } 211 | .push-14 { margin: 0 -560px 1.5em 560px; } 212 | .push-15 { margin: 0 -600px 1.5em 600px; } 213 | .push-16 { margin: 0 -640px 1.5em 640px; } 214 | .push-17 { margin: 0 -680px 1.5em 680px; } 215 | .push-18 { margin: 0 -720px 1.5em 720px; } 216 | .push-19 { margin: 0 -760px 1.5em 760px; } 217 | .push-20 { margin: 0 -800px 1.5em 800px; } 218 | .push-21 { margin: 0 -840px 1.5em 840px; } 219 | .push-22 { margin: 0 -880px 1.5em 880px; } 220 | .push-23 { margin: 0 -920px 1.5em 920px; } 221 | .push-24 { margin: 0 -960px 1.5em 960px; } 222 | 223 | .push-1, .push-2, .push-3, .push-4, .push-5, .push-6, .push-7, .push-8, .push-9, .push-10, .push-11, .push-12, .push-13, .push-14, .push-15, .push-16, .push-17, .push-18, .push-19, .push-20, .push-21, .push-22, .push-23, .push-24 {float: right; position:relative;} 224 | 225 | 226 | /* Misc classes and elements 227 | -------------------------------------------------------------- */ 228 | 229 | /* In case you need to add a gutter above/below an element */ 230 | div.prepend-top, .prepend-top { 231 | margin-top:1.5em; 232 | } 233 | div.append-bottom, .append-bottom { 234 | margin-bottom:1.5em; 235 | } 236 | 237 | /* Use a .box to create a padded box inside a column. */ 238 | .box { 239 | padding: 1.5em; 240 | margin-bottom: 1.5em; 241 | background: #E5ECF9; 242 | } 243 | 244 | /* Use this to create a horizontal ruler across a column. */ 245 | hr { 246 | background: #ddd; 247 | color: #ddd; 248 | clear: both; 249 | float: none; 250 | width: 100%; 251 | height: 1px; 252 | margin: 0 0 1.45em; 253 | border: none; 254 | } 255 | 256 | hr.space { 257 | background: #fff; 258 | color: #fff; 259 | visibility: hidden; 260 | } 261 | 262 | 263 | /* Clearing floats without extra markup 264 | Based on How To Clear Floats Without Structural Markup by PiE 265 | [http://www.positioniseverything.net/easyclearing.html] */ 266 | 267 | .clearfix:after, .container:after { 268 | content: "\0020"; 269 | display: block; 270 | height: 0; 271 | clear: both; 272 | visibility: hidden; 273 | overflow:hidden; 274 | } 275 | .clearfix, .container {display: block;} 276 | 277 | /* Regular clearing 278 | apply to column that should drop below previous ones. */ 279 | 280 | .clear { clear:both; } 281 | -------------------------------------------------------------------------------- /simsearch/models.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # models.py 4 | # simsearch 5 | # 6 | # Created by Lars Yencken on 28-08-2010. 7 | # Copyright 2010 Lars Yencken. All rights reserved. 8 | # 9 | 10 | """ 11 | Database models for similarity search. 12 | """ 13 | 14 | import os 15 | import codecs 16 | import gzip 17 | import itertools 18 | 19 | import mongoengine 20 | from cjktools import scripts 21 | from cjktools.resources import kanjidic 22 | from nltk.probability import FreqDist, LaplaceProbDist 23 | 24 | import stroke 25 | import heap_cache 26 | 27 | from simsearch import settings 28 | 29 | class Similarity(mongoengine.Document): 30 | "Raw similarity scores for kanji pairs." 31 | kanji_pair = mongoengine.StringField(max_length=2, primary_key=True) 32 | similarity = mongoengine.FloatField(min_value=0.0, max_value=1.0, 33 | required=True) 34 | 35 | def partner_to(self, kanji): 36 | "Returns the partnering kanji in this pair." 37 | if kanji not in self.kanji_pair: 38 | raise ValueError('kanji not part of this pair') 39 | 40 | return self.kanji_pair.replace(kanji, '') 41 | 42 | @classmethod 43 | def build(cls): 44 | print 'Building similarity matrix' 45 | cls.drop_collection() 46 | sed = stroke.StrokeEditDistance() 47 | kanji_set = _get_kanji() 48 | 49 | cache = heap_cache.FixedSimilarityCache(settings.N_NEIGHBOURS_STORED) 50 | pairs = ((a, b) for (a, b) in itertools.product(kanji_set, kanji_set) 51 | if (a < b)) 52 | 53 | for kanji_a, kanji_b in pairs: 54 | distance = sed(kanji_a, kanji_b) 55 | cache.add(kanji_a, kanji_b, 1 - distance) 56 | 57 | for kanji in kanji_set: 58 | heap = cache.get_heap(kanji) 59 | for similarity, neighbour in heap.get_contents(): 60 | kanji_pair = ''.join(min( 61 | (kanji, neighbour), (neighbour, kanji) 62 | )) 63 | doc = Similarity( 64 | kanji_pair=kanji_pair, 65 | similarity=similarity, 66 | ) 67 | doc.save() 68 | 69 | return cache 70 | 71 | @classmethod 72 | def load(cls, n=None): 73 | if n is None: 74 | n = settings.N_NEIGHBOURS_STORED 75 | cache = heap_cache.FixedSimilarityCache(n) 76 | for record in cls.objects: 77 | kanji_pair = record.kanji_pair 78 | cache.add(kanji_pair[0], kanji_pair[1], record.similarity) 79 | 80 | return cache 81 | 82 | def __unicode__(self): 83 | return u'(%s, %s, %f)' % (self.kanji_pair[0], self.kanji_pair[1], 84 | self.similarity) 85 | 86 | class Neighbour(mongoengine.EmbeddedDocument): 87 | "A weighted graph edge." 88 | kanji = mongoengine.StringField(max_length=1) 89 | weight = mongoengine.FloatField(min_value=0.0) 90 | 91 | def __cmp__(self, rhs): 92 | return cmp(self.weight, rhs.weight) 93 | 94 | def __unicode__(self): 95 | return self.kanji 96 | 97 | class Node(mongoengine.Document): 98 | """ 99 | A single node in the state graph for Q-learning. The neighbours attribute 100 | stores Q(n, a) for all actions which can be taken from this node. 101 | """ 102 | pivot = mongoengine.StringField(max_length=1, primary_key=True) 103 | neighbours = mongoengine.ListField(mongoengine.EmbeddedDocumentField( 104 | Neighbour)) 105 | n_updates = mongoengine.IntField(default=0, min_value=0) 106 | 107 | def at(self, kanji): 108 | "Gets the neighbour described by the given kanji." 109 | for neighbour in self.neighbours: 110 | if neighbour.kanji == kanji: 111 | return neighbour 112 | 113 | raise KeyError(kanji) 114 | 115 | @classmethod 116 | def build(cls, cache=None): 117 | "Builds the initial graph for Q learning." 118 | print 'Building neighbourhood graph' 119 | n = settings.N_NEIGHBOURS_RECALLED 120 | 121 | if cache is None: 122 | cache = Similarity.load(n) 123 | 124 | cls.drop_collection() 125 | dist = cls._load_corpus_counts() 126 | for kanji in _get_kanji(): 127 | node = Node(pivot=kanji, neighbours=[]) 128 | 129 | weights = {} 130 | best_n = sorted(cache.get_heap(kanji).get_contents(), 131 | reverse=True)[:n] 132 | for weight, partner in best_n: 133 | weights[partner] = weight * dist.prob(partner) 134 | total_weights = sum(weights.itervalues()) 135 | 136 | for partner, weight in sorted(weights.iteritems(), 137 | key=lambda p: p[1], reverse=True): 138 | node.neighbours.append(Neighbour(kanji=partner, 139 | weight=weight / total_weights)) 140 | 141 | node.save() 142 | 143 | @classmethod 144 | def _load_corpus_counts(cls): 145 | input_file = os.path.join(settings.DATA_DIR, 146 | 'corpus', 'jp_char_corpus_counts.gz') 147 | freq_dist = FreqDist() 148 | with open(input_file, 'r') as istream: 149 | istream = gzip.GzipFile(fileobj=istream) 150 | istream = codecs.getreader('utf8')(istream) 151 | for line in istream: 152 | kanji, count = line.split() 153 | freq_dist.inc(kanji, count=int(count)) 154 | 155 | return LaplaceProbDist(freq_dist) 156 | 157 | @classmethod 158 | def get_coverage(cls): 159 | "Returns the set of kanji for which neighbours are stored." 160 | db = cls.objects._collection 161 | return set(r['_id'] for r in db.find({}, fields=['_id'])) 162 | 163 | @classmethod 164 | def update(cls, path): 165 | path = cls._remove_cycles(path) 166 | nodes = cls.objects.filter(pivot__in=list(path)) 167 | if len(nodes) != len(path): 168 | found_pivots = set(n.pivot for n in nodes) 169 | missing_kanji = [k for k in path if k not in found_pivots][0] 170 | raise ValueError('node not found in database for kanji %s' % \ 171 | missing_kanji.encode('utf8')) 172 | 173 | # cache Q(s, a) for the subgraph we're interested in 174 | q = cls._cache_subgraph(nodes) 175 | 176 | # Calculate Q'(s, a) in reverse order along the path 177 | # Q'(s, a) = (1 - A(s))Q(s, a) + A(s)*(r(a) + G * max_a Q(s', a)) 178 | gamma = settings.UPDATE_GAMMA 179 | for i in xrange(len(path) - 2, -1, -1): 180 | s = path[i] 181 | q_s = q[s] 182 | alpha = 1.0 / (4.0 + 0.5 * q_s.n_updates) 183 | 184 | # update very action available from state s 185 | for a in sorted(q_s.neighbours, key=lambda n: n.weight, 186 | reverse=True): 187 | q_old = a.weight 188 | 189 | r_a = (1 if a.kanji == path[-1] else 0) 190 | q_opt = r_a + gamma * max(q[a.kanji].neighbours).weight 191 | 192 | a.weight = (1.0 - alpha) * q_old + alpha * q_opt 193 | 194 | q_s.n_updates += 1 195 | q_s.save() 196 | 197 | @classmethod 198 | def _cache_subgraph(cls, nodes): 199 | q = {} 200 | missing_neighbours = set() 201 | for node in nodes: 202 | q[node.pivot] = node 203 | missing_neighbours.update(n.kanji for n in node.neighbours) 204 | missing_neighbours.discard(node.pivot for node in nodes) 205 | 206 | extra_nodes = cls.objects.filter(pivot__in=missing_neighbours) 207 | if len(extra_nodes) != len(missing_neighbours): 208 | raise ValueError('cannot cache subgraph -- neighbours missing') 209 | 210 | for node in extra_nodes: 211 | q[node.pivot] = node 212 | 213 | return q 214 | 215 | @staticmethod 216 | def _remove_cycles(path): 217 | # an O(n^2) method is good enough 218 | i = 0 219 | while i < len(path): 220 | kanji = path[i] 221 | j = path.find(kanji, i + 1) 222 | if j >= 0: 223 | # a cycle! trim the path to skip it 224 | path = path[:i] + path[j:] 225 | else: 226 | i += 1 227 | 228 | return path 229 | 230 | def __unicode__(self): 231 | return self.pivot 232 | 233 | class Trace(mongoengine.Document): 234 | "A search path through the graph, as taken by a user." 235 | ip_address = mongoengine.StringField(max_length=15) 236 | path = mongoengine.ListField(mongoengine.StringField(max_length=1)) 237 | 238 | @classmethod 239 | def log(cls, request, path): 240 | ip = request.remote_addr 241 | cls(ip_address=ip, path=list(path)).save() 242 | 243 | class Translation(mongoengine.Document): 244 | "A per-kanji dictionary entry of readings and translations." 245 | kanji = mongoengine.StringField(max_length=1, primary_key=True) 246 | on_readings = mongoengine.ListField(mongoengine.StringField()) 247 | kun_readings = mongoengine.ListField(mongoengine.StringField()) 248 | glosses = mongoengine.ListField(mongoengine.StringField()) 249 | 250 | @classmethod 251 | def build(cls): 252 | cls.drop_collection() 253 | kjd = kanjidic.Kanjidic() 254 | for entry in kjd.itervalues(): 255 | translation = cls( 256 | kanji=entry.kanji, 257 | on_readings=entry.on_readings, 258 | kun_readings=entry.kun_readings, 259 | glosses = entry.gloss, 260 | ) 261 | translation.save() 262 | 263 | def build(): 264 | "Builds the database." 265 | cache = Similarity.build() 266 | Node.build(cache) 267 | Translation.build() 268 | 269 | #----------------------------------------------------------------------------# 270 | 271 | def _get_kanji(): 272 | "Fetches our canonical list of kanji to work with." 273 | if not hasattr(_get_kanji, '_cached'): 274 | kanji_set = set() 275 | with codecs.open(settings.STROKE_SOURCE, 'r', 'utf8') as istream: 276 | for line in istream: 277 | kanji, rest = line.split() 278 | 279 | # check for a kanji or hanzi; our Chinese data extends into 280 | # the E000-F8FF private use block, so an "Unknown" script is 281 | # ok too 282 | assert len(kanji) == 1 and scripts.script_type(kanji) in \ 283 | (scripts.Script.Kanji, scripts.Script.Unknown) 284 | 285 | kanji_set.add(kanji) 286 | 287 | _get_kanji._cached = kanji_set 288 | 289 | return _get_kanji._cached 290 | 291 | #----------------------------------------------------------------------------# 292 | 293 | if __name__ == '__main__': 294 | build() 295 | 296 | # vim: ts=4 sw=4 sts=4 et tw=78: 297 | -------------------------------------------------------------------------------- /simsearch/static/css/blueprint/screen.css: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------------- 2 | 3 | 4 | Blueprint CSS Framework 0.9 5 | http://blueprintcss.org 6 | 7 | * Copyright (c) 2007-Present. See LICENSE for more info. 8 | * See README for instructions on how to use Blueprint. 9 | * For credits and origins, see AUTHORS. 10 | * This is a compressed file. See the sources in the 'src' directory. 11 | 12 | ----------------------------------------------------------------------- */ 13 | 14 | /* reset.css */ 15 | html, body, div, span, object, iframe, h1, h2, h3, h4, h5, h6, p, blockquote, pre, a, abbr, acronym, address, code, del, dfn, em, img, q, dl, dt, dd, ol, ul, li, fieldset, form, label, legend, table, caption, tbody, tfoot, thead, tr, th, td, article, aside, dialog, figure, footer, header, hgroup, nav, section {margin:0;padding:0;border:0;font-weight:inherit;font-style:inherit;font-size:100%;font-family:inherit;vertical-align:baseline;} 16 | article, aside, dialog, figure, footer, header, hgroup, nav, section {display:block;} 17 | body {line-height:1.5;} 18 | table {border-collapse:separate;border-spacing:0;} 19 | caption, th, td {text-align:left;font-weight:normal;} 20 | table, td, th {vertical-align:middle;} 21 | blockquote:before, blockquote:after, q:before, q:after {content:"";} 22 | blockquote, q {quotes:"" "";} 23 | a img {border:none;} 24 | 25 | /* typography.css */ 26 | html {font-size:100.01%;} 27 | body {font-size:75%;color:#222;background:#fff;font-family:"Helvetica Neue", Arial, Helvetica, sans-serif;} 28 | h1, h2, h3, h4, h5, h6 {font-weight:normal;color:#111;} 29 | h1 {font-size:3em;line-height:1;margin-bottom:0.5em;} 30 | h2 {font-size:2em;margin-bottom:0.75em;} 31 | h3 {font-size:1.5em;line-height:1;margin-bottom:1em;} 32 | h4 {font-size:1.2em;line-height:1.25;margin-bottom:1.25em;} 33 | h5 {font-size:1em;font-weight:bold;margin-bottom:1.5em;} 34 | h6 {font-size:1em;font-weight:bold;} 35 | h1 img, h2 img, h3 img, h4 img, h5 img, h6 img {margin:0;} 36 | p {margin:0 0 1.5em;} 37 | p img.left {float:left;margin:1.5em 1.5em 1.5em 0;padding:0;} 38 | p img.right {float:right;margin:1.5em 0 1.5em 1.5em;} 39 | a:focus, a:hover {color:#08d;} 40 | a {color:#009;text-decoration:underline;} 41 | blockquote {margin:1.5em;color:#666;font-style:italic;} 42 | strong {font-weight:bold;} 43 | em, dfn {font-style:italic;} 44 | dfn {font-weight:bold;} 45 | sup, sub {line-height:0;} 46 | abbr, acronym {border-bottom:1px dotted #666;} 47 | address {margin:0 0 1.5em;font-style:italic;} 48 | del {color:#666;} 49 | pre {margin:1.5em 0;white-space:pre;} 50 | pre, code, tt {font:1em 'andale mono', 'lucida console', monospace;line-height:1.5;} 51 | li ul, li ol {margin:0;} 52 | ul, ol {margin:0 1.5em 1.5em 0;padding-left:3.333em;} 53 | ul {list-style-type:disc;} 54 | ol {list-style-type:decimal;} 55 | dl {margin:0 0 1.5em 0;} 56 | dl dt {font-weight:bold;} 57 | dd {margin-left:1.5em;} 58 | table {margin-bottom:1.4em;width:100%;} 59 | th {font-weight:bold;} 60 | thead th {background:#c3d9ff;} 61 | th, td, caption {padding:4px 10px 4px 5px;} 62 | tr.even td {background:#e5ecf9;} 63 | tfoot {font-style:italic;} 64 | caption {background:#eee;} 65 | .small {font-size:.8em;margin-bottom:1.875em;line-height:1.875em;} 66 | .large {font-size:1.2em;line-height:2.5em;margin-bottom:1.25em;} 67 | .hide {display:none;} 68 | .quiet {color:#666;} 69 | .loud {color:#000;} 70 | .highlight {background:#ff0;} 71 | .added {background:#060;color:#fff;} 72 | .removed {background:#900;color:#fff;} 73 | .first {margin-left:0;padding-left:0;} 74 | .last {margin-right:0;padding-right:0;} 75 | .top {margin-top:0;padding-top:0;} 76 | .bottom {margin-bottom:0;padding-bottom:0;} 77 | 78 | /* forms.css */ 79 | label {font-weight:bold;} 80 | fieldset {padding:1.4em;margin:0 0 1.5em 0;border:1px solid #ccc;} 81 | legend {font-weight:bold;font-size:1.2em;} 82 | input[type=text], input[type=password], input.text, input.title, textarea, select {background-color:#fff;border:1px solid #bbb;} 83 | input[type=text]:focus, input[type=password]:focus, input.text:focus, input.title:focus, textarea:focus, select:focus {border-color:#666;} 84 | input[type=text], input[type=password], input.text, input.title, textarea, select {margin:0.5em 0;} 85 | input.text, input.title {width:300px;padding:5px;} 86 | input.title {font-size:1.5em;} 87 | textarea {width:390px;height:250px;padding:5px;} 88 | input[type=checkbox], input[type=radio], input.checkbox, input.radio {position:relative;top:.25em;} 89 | form.inline {line-height:3;} 90 | form.inline p {margin-bottom:0;} 91 | .error, .notice, .success, .info {padding:0.8em;margin-bottom:1em;border:2px solid #ddd;} 92 | .error {background:#fbe3e4;color:#8a1f11;border-color:#fbc2c4;} 93 | .notice {background:#fff6bf;color:#514721;border-color:#ffd324;} 94 | .success {background:#e6efc2;color:#264409;border-color:#c6d880;} 95 | .info {background:#d5edf8;color:#205791;border-color:#92cae4;} 96 | .error a {color:#8a1f11;} 97 | .notice a {color:#514721;} 98 | .success a {color:#264409;} 99 | .info a {color:#205791;} 100 | 101 | /* grid.css */ 102 | .container {width:950px;margin:0 auto;} 103 | .showgrid {background:url(src/grid.png);} 104 | .column, .span-1, .span-2, .span-3, .span-4, .span-5, .span-6, .span-7, .span-8, .span-9, .span-10, .span-11, .span-12, .span-13, .span-14, .span-15, .span-16, .span-17, .span-18, .span-19, .span-20, .span-21, .span-22, .span-23, .span-24 {float:left;margin-right:10px;} 105 | .last {margin-right:0;} 106 | .span-1 {width:30px;} 107 | .span-2 {width:70px;} 108 | .span-3 {width:110px;} 109 | .span-4 {width:150px;} 110 | .span-5 {width:190px;} 111 | .span-6 {width:230px;} 112 | .span-7 {width:270px;} 113 | .span-8 {width:310px;} 114 | .span-9 {width:350px;} 115 | .span-10 {width:390px;} 116 | .span-11 {width:430px;} 117 | .span-12 {width:470px;} 118 | .span-13 {width:510px;} 119 | .span-14 {width:550px;} 120 | .span-15 {width:590px;} 121 | .span-16 {width:630px;} 122 | .span-17 {width:670px;} 123 | .span-18 {width:710px;} 124 | .span-19 {width:750px;} 125 | .span-20 {width:790px;} 126 | .span-21 {width:830px;} 127 | .span-22 {width:870px;} 128 | .span-23 {width:910px;} 129 | .span-24 {width:950px;margin-right:0;} 130 | input.span-1, textarea.span-1, input.span-2, textarea.span-2, input.span-3, textarea.span-3, input.span-4, textarea.span-4, input.span-5, textarea.span-5, input.span-6, textarea.span-6, input.span-7, textarea.span-7, input.span-8, textarea.span-8, input.span-9, textarea.span-9, input.span-10, textarea.span-10, input.span-11, textarea.span-11, input.span-12, textarea.span-12, input.span-13, textarea.span-13, input.span-14, textarea.span-14, input.span-15, textarea.span-15, input.span-16, textarea.span-16, input.span-17, textarea.span-17, input.span-18, textarea.span-18, input.span-19, textarea.span-19, input.span-20, textarea.span-20, input.span-21, textarea.span-21, input.span-22, textarea.span-22, input.span-23, textarea.span-23, input.span-24, textarea.span-24 {border-left-width:1px;border-right-width:1px;padding-left:5px;padding-right:5px;} 131 | input.span-1, textarea.span-1 {width:18px;} 132 | input.span-2, textarea.span-2 {width:58px;} 133 | input.span-3, textarea.span-3 {width:98px;} 134 | input.span-4, textarea.span-4 {width:138px;} 135 | input.span-5, textarea.span-5 {width:178px;} 136 | input.span-6, textarea.span-6 {width:218px;} 137 | input.span-7, textarea.span-7 {width:258px;} 138 | input.span-8, textarea.span-8 {width:298px;} 139 | input.span-9, textarea.span-9 {width:338px;} 140 | input.span-10, textarea.span-10 {width:378px;} 141 | input.span-11, textarea.span-11 {width:418px;} 142 | input.span-12, textarea.span-12 {width:458px;} 143 | input.span-13, textarea.span-13 {width:498px;} 144 | input.span-14, textarea.span-14 {width:538px;} 145 | input.span-15, textarea.span-15 {width:578px;} 146 | input.span-16, textarea.span-16 {width:618px;} 147 | input.span-17, textarea.span-17 {width:658px;} 148 | input.span-18, textarea.span-18 {width:698px;} 149 | input.span-19, textarea.span-19 {width:738px;} 150 | input.span-20, textarea.span-20 {width:778px;} 151 | input.span-21, textarea.span-21 {width:818px;} 152 | input.span-22, textarea.span-22 {width:858px;} 153 | input.span-23, textarea.span-23 {width:898px;} 154 | input.span-24, textarea.span-24 {width:938px;} 155 | .append-1 {padding-right:40px;} 156 | .append-2 {padding-right:80px;} 157 | .append-3 {padding-right:120px;} 158 | .append-4 {padding-right:160px;} 159 | .append-5 {padding-right:200px;} 160 | .append-6 {padding-right:240px;} 161 | .append-7 {padding-right:280px;} 162 | .append-8 {padding-right:320px;} 163 | .append-9 {padding-right:360px;} 164 | .append-10 {padding-right:400px;} 165 | .append-11 {padding-right:440px;} 166 | .append-12 {padding-right:480px;} 167 | .append-13 {padding-right:520px;} 168 | .append-14 {padding-right:560px;} 169 | .append-15 {padding-right:600px;} 170 | .append-16 {padding-right:640px;} 171 | .append-17 {padding-right:680px;} 172 | .append-18 {padding-right:720px;} 173 | .append-19 {padding-right:760px;} 174 | .append-20 {padding-right:800px;} 175 | .append-21 {padding-right:840px;} 176 | .append-22 {padding-right:880px;} 177 | .append-23 {padding-right:920px;} 178 | .prepend-1 {padding-left:40px;} 179 | .prepend-2 {padding-left:80px;} 180 | .prepend-3 {padding-left:120px;} 181 | .prepend-4 {padding-left:160px;} 182 | .prepend-5 {padding-left:200px;} 183 | .prepend-6 {padding-left:240px;} 184 | .prepend-7 {padding-left:280px;} 185 | .prepend-8 {padding-left:320px;} 186 | .prepend-9 {padding-left:360px;} 187 | .prepend-10 {padding-left:400px;} 188 | .prepend-11 {padding-left:440px;} 189 | .prepend-12 {padding-left:480px;} 190 | .prepend-13 {padding-left:520px;} 191 | .prepend-14 {padding-left:560px;} 192 | .prepend-15 {padding-left:600px;} 193 | .prepend-16 {padding-left:640px;} 194 | .prepend-17 {padding-left:680px;} 195 | .prepend-18 {padding-left:720px;} 196 | .prepend-19 {padding-left:760px;} 197 | .prepend-20 {padding-left:800px;} 198 | .prepend-21 {padding-left:840px;} 199 | .prepend-22 {padding-left:880px;} 200 | .prepend-23 {padding-left:920px;} 201 | .border {padding-right:4px;margin-right:5px;border-right:1px solid #ddd;} 202 | .colborder {padding-right:24px;margin-right:25px;border-right:1px solid #ddd;} 203 | .pull-1 {margin-left:-40px;} 204 | .pull-2 {margin-left:-80px;} 205 | .pull-3 {margin-left:-120px;} 206 | .pull-4 {margin-left:-160px;} 207 | .pull-5 {margin-left:-200px;} 208 | .pull-6 {margin-left:-240px;} 209 | .pull-7 {margin-left:-280px;} 210 | .pull-8 {margin-left:-320px;} 211 | .pull-9 {margin-left:-360px;} 212 | .pull-10 {margin-left:-400px;} 213 | .pull-11 {margin-left:-440px;} 214 | .pull-12 {margin-left:-480px;} 215 | .pull-13 {margin-left:-520px;} 216 | .pull-14 {margin-left:-560px;} 217 | .pull-15 {margin-left:-600px;} 218 | .pull-16 {margin-left:-640px;} 219 | .pull-17 {margin-left:-680px;} 220 | .pull-18 {margin-left:-720px;} 221 | .pull-19 {margin-left:-760px;} 222 | .pull-20 {margin-left:-800px;} 223 | .pull-21 {margin-left:-840px;} 224 | .pull-22 {margin-left:-880px;} 225 | .pull-23 {margin-left:-920px;} 226 | .pull-24 {margin-left:-960px;} 227 | .pull-1, .pull-2, .pull-3, .pull-4, .pull-5, .pull-6, .pull-7, .pull-8, .pull-9, .pull-10, .pull-11, .pull-12, .pull-13, .pull-14, .pull-15, .pull-16, .pull-17, .pull-18, .pull-19, .pull-20, .pull-21, .pull-22, .pull-23, .pull-24 {float:left;position:relative;} 228 | .push-1 {margin:0 -40px 1.5em 40px;} 229 | .push-2 {margin:0 -80px 1.5em 80px;} 230 | .push-3 {margin:0 -120px 1.5em 120px;} 231 | .push-4 {margin:0 -160px 1.5em 160px;} 232 | .push-5 {margin:0 -200px 1.5em 200px;} 233 | .push-6 {margin:0 -240px 1.5em 240px;} 234 | .push-7 {margin:0 -280px 1.5em 280px;} 235 | .push-8 {margin:0 -320px 1.5em 320px;} 236 | .push-9 {margin:0 -360px 1.5em 360px;} 237 | .push-10 {margin:0 -400px 1.5em 400px;} 238 | .push-11 {margin:0 -440px 1.5em 440px;} 239 | .push-12 {margin:0 -480px 1.5em 480px;} 240 | .push-13 {margin:0 -520px 1.5em 520px;} 241 | .push-14 {margin:0 -560px 1.5em 560px;} 242 | .push-15 {margin:0 -600px 1.5em 600px;} 243 | .push-16 {margin:0 -640px 1.5em 640px;} 244 | .push-17 {margin:0 -680px 1.5em 680px;} 245 | .push-18 {margin:0 -720px 1.5em 720px;} 246 | .push-19 {margin:0 -760px 1.5em 760px;} 247 | .push-20 {margin:0 -800px 1.5em 800px;} 248 | .push-21 {margin:0 -840px 1.5em 840px;} 249 | .push-22 {margin:0 -880px 1.5em 880px;} 250 | .push-23 {margin:0 -920px 1.5em 920px;} 251 | .push-24 {margin:0 -960px 1.5em 960px;} 252 | .push-1, .push-2, .push-3, .push-4, .push-5, .push-6, .push-7, .push-8, .push-9, .push-10, .push-11, .push-12, .push-13, .push-14, .push-15, .push-16, .push-17, .push-18, .push-19, .push-20, .push-21, .push-22, .push-23, .push-24 {float:right;position:relative;} 253 | div.prepend-top, .prepend-top {margin-top:1.5em;} 254 | div.append-bottom, .append-bottom {margin-bottom:1.5em;} 255 | .box {padding:1.5em;margin-bottom:1.5em;background:#E5ECF9;} 256 | hr {background:#ddd;color:#ddd;clear:both;float:none;width:100%;height:1px;margin:0 0 1.45em;border:none;} 257 | hr.space {background:#fff;color:#fff;visibility:hidden;} 258 | .clearfix:after, .container:after {content:"\0020";display:block;height:0;clear:both;visibility:hidden;overflow:hidden;} 259 | .clearfix, .container {display:block;} 260 | .clear {clear:both;} -------------------------------------------------------------------------------- /simsearch/static/js/search.js: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * search.py 4 | * simsearch 5 | * 6 | * Created by Lars Yencken on 31-08-2010. 7 | * Copyright 2010 Lars Yencken. All rights reserved. 8 | * 9 | */ 10 | 11 | /* 12 | * GLOBALS 13 | */ 14 | 15 | // The fraction of the screen's width or height to use for the lookup pane. 16 | var g_useFraction = 0.8; 17 | 18 | // A history of data, so that we can provide backwards and forwards 19 | // navigation. 20 | var g_historyStore = new Array(); 21 | var currentIndex = null; 22 | 23 | // The current "mode" of the interface. 24 | var g_currentState = 'seeding'; 25 | 26 | // Extra state about what's currently being displayed. Only updated by objects 27 | // that need it on draw() and clean() operations, primarily to avoid excessive 28 | // redraws. 29 | var g_currentObjects = {} 30 | var g_windowDirty = false; 31 | 32 | /* 33 | * drawError() 34 | * Draws an error message to the screen. 35 | */ 36 | function drawError(messageEn, messageJp, timeout) { 37 | // Render it, but still hidden, so we can check its size. 38 | g_currentObjects['errorMessage'] = [messageEn, messageJp, timeout] 39 | setOpacity('errorMessage', 0.01); 40 | showElement('errorMessage'); 41 | 42 | // Delete any previous messages. 43 | var parentNode = $('errorMessage') 44 | while (parentNode.childNodes.length > 0) { 45 | c = parentNode.childNodes[0]; 46 | parentNode.removeChild(c); 47 | delete c; 48 | } 49 | 50 | // Add our messages. 51 | parentNode.appendChild(document.createTextNode(messageEn)) 52 | parentNode.appendChild(document.createElement('br')); 53 | parentNode.appendChild(document.createTextNode(messageJp)) 54 | 55 | var errorSize = getElementDimensions('errorMessage'); 56 | var seedInputSize = getSeedingInputSize(); 57 | var seedInputLoc = getElementPosition(document['seedForm'].seedKanji); 58 | var windowSize = getWindowSize(); 59 | 60 | var errorLoc = new Coordinates(); 61 | errorLoc.x = Math.round(seedInputLoc.x + seedInputSize.w/2 - errorSize.w/2); 62 | errorLoc.y = seedInputLoc.y + seedInputSize.h + 50; 63 | 64 | setElementPosition('errorMessage', errorLoc); 65 | 66 | appear('errorMessage'); 67 | drawSearch(false); 68 | if (timeout > 0) { 69 | callLater(timeout, function(){ 70 | fade('errorMessage'); 71 | delete g_currentObjects["errorMessage"]; 72 | }); 73 | } 74 | } 75 | 76 | /* 77 | * clearError() 78 | * Hides any error messages that are shown. 79 | */ 80 | function clearError() { 81 | hideElement('errorMessage'); 82 | } 83 | 84 | /* 85 | * getSeedingInputSize() 86 | * Returns the size of the combined input element. 87 | */ 88 | function getSeedingInputSize() { 89 | setOpacity('seedLookup', 0.01); 90 | showElement('seedLookup'); 91 | // Get the elements. 92 | var magnifier = document['magnifier']; 93 | var seedKanjiInput = document['seedForm'].seedKanji 94 | 95 | // Determine their sizes. 96 | var inputSize = getElementDimensions(seedKanjiInput); 97 | var magSize = getElementDimensions(magnifier); 98 | 99 | // Calculate the combined size. 100 | var combinedSize = new Dimensions(); 101 | var padding = 5; 102 | combinedSize.w = inputSize.w + padding + magSize.w; 103 | combinedSize.h = magSize.h; 104 | 105 | return combinedSize; 106 | } 107 | 108 | /* 109 | * drawSeedingInput() 110 | * Draw the initial input dialog. 111 | */ 112 | function drawSeedingInput(useAppear) { 113 | drawSearch(useAppear); 114 | if ("errorMessage" in g_currentObjects) { 115 | var em = g_currentObjects["errorMessage"]; 116 | drawError(em[0], em[1], em[2]); 117 | } 118 | } 119 | 120 | function drawSearch(useAppear) { 121 | if (useAppear == null) { 122 | useAppear = true; 123 | } 124 | 125 | // Calculate the coordinates. 126 | var windowSize = getWindowSize(); 127 | var inputLoc = new Coordinates(); 128 | var combinedSize = getSeedingInputSize(); 129 | inputLoc.x = Math.round(0.5*(windowSize.w - combinedSize.w)); 130 | inputLoc.y = Math.round(0.5*(windowSize.h - combinedSize.h)); 131 | 132 | // Center and make the input appear. 133 | setElementPosition('seedLookup', inputLoc); 134 | if (useAppear) { 135 | appear('seedLookup'); 136 | callLater(1, function() { document['seedForm'].seedKanji.focus();}); 137 | } else { 138 | setOpacity('seedLookup', 1.0); 139 | document['seedForm'].seedKanji.focus(); 140 | } 141 | } 142 | 143 | /* 144 | * clearSeedingInput() 145 | * Clear the initial seeding input dialog. 146 | */ 147 | function clearSeedingInput() { 148 | hideElement('seedLookup'); 149 | hideElement('errorMessage'); 150 | return; 151 | } 152 | 153 | /* 154 | * switchState() 155 | * Switches from one UI state to another. 156 | */ 157 | function switchState(newState, stateArg) { 158 | g_clearState[g_currentState](); 159 | g_currentState = newState; 160 | g_initState[newState](stateArg); 161 | } 162 | 163 | /* 164 | * submitSeed() 165 | */ 166 | function submitSeed() { 167 | var value = document['seedForm'].seedKanji.value; 168 | 169 | var messageEn = "Please enter a single kanji only."; 170 | var messageJp = '一つの漢字を入力してください。'; 171 | 172 | if (value.length != 1) { 173 | drawError(messageEn, messageJp, 6); 174 | } else { 175 | // Check that the input is a kanji. 176 | logDebug("Ok value: " + value); 177 | var valueOrd = ord(value); 178 | if (valueOrd < 12353 || valueOrd > 40869) { 179 | drawError(messageEn, messageJp, 6); 180 | } else { 181 | // Valid! 182 | switchState("lookup", value); 183 | } 184 | } 185 | return false; 186 | } 187 | 188 | /* 189 | * initLookup() 190 | * Loads the kanji which are similar to the current pivot. 191 | */ 192 | function initLookup(pivotKanjiVal) { 193 | if (currentIndex != null) { 194 | var previousKanji = g_historyStore[currentIndex].pivotKanji; 195 | var newDoc = loadJSONDoc(g_pivotPath + pivotKanjiVal + "/", {}); 196 | } else { 197 | var newDoc = loadJSONDoc(g_pivotPath + pivotKanjiVal + "/", {}); 198 | } 199 | 200 | var success = function(obj) { 201 | // Store current values, and redraw the screen. 202 | // Determine whether truncation is needed. 203 | if (currentIndex == null) { 204 | // First time. 205 | currentIndex = 0; 206 | } else if (g_historyStore.length > currentIndex + 1) { 207 | // Truncation needed! 208 | currentIndex++; 209 | g_historyStore.length = currentIndex; 210 | } else { 211 | currentIndex++; 212 | } 213 | g_historyStore[currentIndex] = obj; 214 | 215 | drawBorder(); 216 | drawLookup(); 217 | } 218 | 219 | var failure = function(err) { 220 | logDebug("Couldn't load data: " + err); 221 | switchState('seeding', null); 222 | drawError('No data found for the kanji ' + pivotKanjiVal + '.', '', 223 | 6); 224 | } 225 | 226 | newDoc.addCallbacks(success, failure); 227 | }; 228 | 229 | /* 230 | * fullRedraw() 231 | * Redraws the whole screen, using the existing kanji. 232 | */ 233 | function fullRedraw() { 234 | g_windowDirty = true; 235 | g_drawState[g_currentState](); 236 | g_windowDirty = false; 237 | }; 238 | 239 | // Redraw the window if it is resized. 240 | window.onresize = fullRedraw; 241 | 242 | /* 243 | * getLookupPlane() 244 | * Fetches the full plane size of the area. 245 | */ 246 | function getLookupPlane() { 247 | var windowSize = getWindowSize(); 248 | var lookupPlane = {}; 249 | lookupPlane.w = g_useFraction*windowSize.w; 250 | lookupPlane.h = g_useFraction*windowSize.h; 251 | 252 | // Make it square. 253 | if (lookupPlane.w < lookupPlane.h) { 254 | lookupPlane.h = lookupPlane.w; 255 | } else { 256 | lookupPlane.w = lookupPlane.h; 257 | } 258 | 259 | // Make it centered. 260 | lookupPlane.left = Math.round((windowSize.w - lookupPlane.w)/2); 261 | lookupPlane.right = lookupPlane.left + lookupPlane.w; 262 | lookupPlane.top = Math.round((windowSize.h - lookupPlane.h)/2); 263 | lookupPlane.bottom = lookupPlane.top + lookupPlane.h; 264 | 265 | lookupPlane.center = new Coordinates( 266 | lookupPlane.left + lookupPlane.w/2, 267 | lookupPlane.top + lookupPlane.h/2 268 | ); 269 | 270 | return lookupPlane; 271 | }; 272 | 273 | /* 274 | * drawBorder() 275 | * Draws a border around the kanji area. 276 | */ 277 | function drawBorder() { 278 | if (g_currentObjects['border'] && !g_windowDirty) { 279 | return; 280 | } 281 | logDebug('Drawing border'); 282 | // Work out the dimensions. 283 | var lookupPlane = getLookupPlane(); 284 | 285 | // Generate the new border. 286 | var styleString = "position:absolute; "; 287 | styleString += 'left: ' + lookupPlane.left + 'px; ' 288 | styleString += 'right: ' + lookupPlane.right + 'px; ' 289 | styleString += 'top: ' + lookupPlane.top + 'px; ' 290 | styleString += 'bottom: ' + lookupPlane.bottom + 'px; ' 291 | styleString += 'width: ' + lookupPlane.w + 'px; ' 292 | styleString += 'height: ' + lookupPlane.h + 'px; ' 293 | var newBorder = DIV({style: styleString, id:"lookupBorder"}, ""); 294 | 295 | // Swap it for the old border. 296 | swapDOM("lookupBorder", newBorder); 297 | 298 | setOpacity("lookupBorder", 1.0); 299 | g_currentObjects['border'] = true; 300 | }; 301 | 302 | /* 303 | * clearBorder() 304 | * Clears the border around the kanji area. 305 | */ 306 | function clearBorder() { 307 | logDebug('Clearing border'); 308 | setOpacity('lookupBorder', 0.0); 309 | g_currentObjects['border'] = false; 310 | } 311 | 312 | /* 313 | * getControlPlane() 314 | * Determine the placement of the control plane. 315 | */ 316 | function getControlPlane() { 317 | var windowSize = getWindowSize(); 318 | var lookupPlane = getLookupPlane(); 319 | 320 | var controlPlane = {}; 321 | controlPlane.top = lookupPlane.bottom; 322 | controlPlane.bottom = windowSize.h; 323 | controlPlane.left = lookupPlane.left; 324 | controlPlane.right = lookupPlane.right; 325 | controlPlane.w = controlPlane.right - controlPlane.left; 326 | controlPlane.h = controlPlane.bottom - controlPlane.top; 327 | 328 | controlPlane.center = new Coordinates( 329 | controlPlane.left + controlPlane.w/2, 330 | controlPlane.top + controlPlane.h/2 331 | ); 332 | 333 | return controlPlane; 334 | } 335 | 336 | /* 337 | * clearControls() 338 | * Clears the controls from the display. 339 | */ 340 | function clearControls() { 341 | hideElement("backControl"); 342 | hideElement("forwardControl"); 343 | hideElement("resetControl"); 344 | return; 345 | } 346 | 347 | /* 348 | * drawControls() 349 | * Repositions the back and forward links dynamically. 350 | */ 351 | function drawControls() { 352 | // Fetch location and size information needed for calculations. 353 | var controlPlane = getControlPlane(); 354 | 355 | //var backSize = elementDimensions(document["backControl"]); 356 | //var forwardSize = elementDimensions(document["forwardControl"]); 357 | var backSize = new Dimensions(30, 35); 358 | var forwardSize = new Dimensions(30, 35); 359 | var resetSize = new Dimensions(35, 35); 360 | 361 | var top = Math.round(controlPlane.top + controlPlane.h/2 - 35/2); 362 | var backPos = new Coordinates(controlPlane.left, top); 363 | 364 | var forwardPos = new Coordinates(controlPlane.right - backSize.w, top); 365 | 366 | var resetPos = new Coordinates( 367 | Math.round(controlPlane.left + controlPlane.w/2 - resetSize.w/2), 368 | top 369 | ); 370 | 371 | setElementPosition("backControl", backPos); 372 | setElementPosition("forwardControl", forwardPos); 373 | setElementPosition("resetControl", resetPos); 374 | 375 | appear("resetControl"); 376 | 377 | if (hasHistory()) { 378 | appear("backControl"); 379 | } else { 380 | hideElement("backControl"); 381 | } 382 | 383 | if (hasFuture()) { 384 | appear("forwardControl"); 385 | } else { 386 | hideElement("forwardControl"); 387 | } 388 | 389 | g_currentObjects['controls'] = true; 390 | 391 | return; 392 | }; 393 | 394 | /* 395 | * clearKanji() 396 | */ 397 | function clearKanji() { 398 | // Hide the existing elements. 399 | var existingElements = getElementsByTagAndClassName("div", "similarKanji"); 400 | for (var i = 0; i < existingElements.length; i++) { 401 | removeElement(existingElements[i]); 402 | } 403 | setOpacity('pivotKanji', 0.0); 404 | 405 | g_currentObjects['controls'] = false; 406 | } 407 | 408 | /* 409 | * clearLookup() 410 | * Clears any kanji currently displayed. 411 | */ 412 | function clearLookup() { 413 | logDebug('Clearing lookup'); 414 | 415 | clearKanji(); 416 | clearControls(); 417 | clearBorder(); 418 | 419 | // Clear the history too. 420 | g_historyStore.length = 0; 421 | currentIndex = null; 422 | return; 423 | } 424 | 425 | /* 426 | * drawLookup() 427 | * Draws the existing dataset to the screen. 428 | */ 429 | function drawLookup() { 430 | drawBorder(); 431 | drawControls(); 432 | drawKanji(); 433 | } 434 | 435 | /* 436 | * drawKanji() 437 | * Draws the lookup kanji to the window. 438 | */ 439 | function drawKanji() { 440 | logDebug("Drawing kanji"); 441 | clearKanji(); 442 | 443 | // Draw the new pivot. 444 | var lookupPlane = getLookupPlane(); 445 | var pivotLoc = toCornerLoc(lookupPlane.center); 446 | var pivotKanji = g_historyStore[currentIndex].pivot_kanji; 447 | var path = "" 448 | for (var i = 0; i < g_historyStore.length; i++) { 449 | path += g_historyStore[i]['pivot_kanji'] 450 | } 451 | 452 | newPivot = DIV( 453 | { 454 | id: "pivotKanji", 455 | style: locToStyle(pivotLoc) 456 | }, 457 | A( 458 | {href: g_translatePath + pivotKanji + "/?path=" + path}, 459 | pivotKanji 460 | ) 461 | ); 462 | swapDOM("pivotKanji", newPivot); 463 | 464 | var tier1 = g_historyStore[currentIndex].tier1; 465 | var tier2 = g_historyStore[currentIndex].tier2; 466 | var tier3 = g_historyStore[currentIndex].tier3; 467 | 468 | drawTier(tier1, 1); 469 | drawTier(tier2, 2); 470 | drawTier(tier3, 3); 471 | 472 | callLater(0.1, function() { 473 | showClass('tier1Kanji'); 474 | callLater(0.1, function() { 475 | showClass('tier2Kanji'); 476 | callLater(0.1, function() { 477 | showClass('tier3Kanji'); 478 | }); 479 | }); 480 | }); 481 | 482 | return; 483 | } 484 | 485 | /* 486 | * locAdd(locA, locB) 487 | * Addition of coordinates, returns the result. 488 | */ 489 | function locAdd(locA, locB) { 490 | return new Coordinates(locA.x + locB.x, locA.y + locB.y); 491 | } 492 | 493 | /* 494 | * drawTier(kanjiArray, tierNumber) 495 | * Draws the given numbered tier (can be 1, 2 or 3), and spaces out the 496 | * given kanji evenly upon that tier. 497 | */ 498 | function drawTier(kanjiArray, tierNumber) { 499 | logDebug("Drawing tier " + tierNumber); 500 | var lookupPlane = getLookupPlane(); 501 | 502 | var center = lookupPlane.center; 503 | var angleFraction = 2*Math.PI/kanjiArray.length; 504 | var radius = tierNumber*lookupPlane.w/8; 505 | var initialFraction = Math.random()*angleFraction; 506 | 507 | // Generate new elements for the kanji. 508 | var newElements = new Array(); 509 | for (var i = 0; i < kanjiArray.length; i++) { 510 | var kanji = kanjiArray[i]; 511 | var loadCommand = "initLookup('" + kanji + "')"; 512 | 513 | var kanjiAngle = initialFraction + i*angleFraction; 514 | var loc = new Coordinates( 515 | radius*Math.cos(kanjiAngle), 516 | radius*Math.sin(kanjiAngle) 517 | ) 518 | loc = locAdd(loc, lookupPlane.center); 519 | loc = toCornerLoc(loc); 520 | newElements[i] = DIV( 521 | { 522 | class: "tier" + tierNumber + "Kanji similarKanji", 523 | style: "opacity:0.0; " + locToStyle(loc), 524 | }, 525 | A( 526 | { 527 | href: "javascript:;", 528 | onclick: loadCommand 529 | }, 530 | kanji 531 | ) 532 | ); 533 | } 534 | 535 | var body = document.getElementsByTagName("body")[0]; 536 | appendChildNodes(body, newElements); 537 | 538 | return; 539 | }; 540 | 541 | /* 542 | * showClass() 543 | * Make all the DIV elements of this class appear. 544 | */ 545 | function showClass(className) { 546 | newElements = getElementsByTagAndClassName("div", className); 547 | for (var i = 0; i < newElements.length; i++) { 548 | setOpacity(newElements[i], 1.0); 549 | } 550 | } 551 | 552 | /* 553 | * getWindowSize() 554 | * Returns the size in pixels of the browser window as a (width, height) 555 | * array of 2 elements. 556 | */ 557 | function getWindowSize() { 558 | var windowSize = new Dimensions(); 559 | if (navigator.appName == "Netscape") { 560 | windowSize.w = window.innerWidth; 561 | windowSize.h = window.innerHeight; 562 | 563 | } else if (navigator.appName == "Microsoft Internet Explorer") { 564 | windowSize.w = document.body.clientWidth; 565 | windowSize.h = document.body.clientHeight; 566 | } 567 | 568 | return windowSize; 569 | }; 570 | 571 | /* 572 | * toCornerLoc(centerLoc, object=kanji) 573 | * Given the coordinates for the center of the object, returns the 574 | * coordinates for the top-left corner of the object, which is used 575 | * to lay it out. 576 | */ 577 | function toCornerLoc(centerLoc, object) { 578 | var lookupPlane = getLookupPlane(); 579 | 580 | if (object == null) { 581 | var objectSize = getElementDimensions("pivotKanji"); 582 | } else { 583 | var objectSize = getElemendDimensions(object); 584 | } 585 | 586 | var retVal = new Coordinates(); 587 | retVal.x = Math.round(centerLoc.x - objectSize.w/2); 588 | retVal.y = Math.round(centerLoc.y - objectSize.h/2); 589 | 590 | return retVal; 591 | }; 592 | 593 | /* 594 | * hasHistory() 595 | * Returns true if there is a previous lookup state recorded. 596 | */ 597 | function hasHistory() { 598 | return (currentIndex > 0); 599 | } 600 | 601 | /* 602 | * hasFuture() 603 | * Returns true if the user has browsed back from a lookup state. 604 | */ 605 | function hasFuture() { 606 | if (currentIndex == null) { 607 | return false; 608 | } else { 609 | return (currentIndex < g_historyStore.length - 1); 610 | } 611 | } 612 | 613 | /* 614 | * previousPivot() 615 | * Fetches the previous pivot (if one exists) and redraws. 616 | */ 617 | function previousPivot() { 618 | if (hasHistory()) { 619 | currentIndex--; 620 | clearKanji(); 621 | drawKanji(); 622 | drawControls(); 623 | } 624 | }; 625 | 626 | /* 627 | * nextPivot() 628 | * Fetches the next pivot (if one exists) and redraws. 629 | */ 630 | function nextPivot() { 631 | if (hasFuture()) { 632 | currentIndex++; 633 | clearKanji(); 634 | drawKanji(); 635 | drawControls(); 636 | } 637 | } 638 | 639 | /* 640 | * ord() 641 | * Returns the character code of a character. 642 | */ 643 | function ord(char) { 644 | return char.charCodeAt(0); 645 | } 646 | 647 | /* 648 | * chr(charCode) 649 | * Returns the character indicated by the given character code. 650 | */ 651 | function chr(charCode) { 652 | return String.fromCharCode(charCode); 653 | } 654 | 655 | var g_clearState = { 656 | 'seeding': clearSeedingInput, 657 | 'lookup': clearLookup, 658 | } 659 | 660 | var g_drawState = { 661 | 'seeding': drawSeedingInput, 662 | 'lookup': drawLookup, 663 | } 664 | 665 | var g_initState = { 666 | 'seeding': drawSeedingInput, 667 | 'lookup': initLookup, 668 | } 669 | 670 | function initInterface() { 671 | drawSeedingInput(); 672 | callLater(4, function() { 673 | if (emptyInput()) { 674 | drawError('Enter a kanji similar to', 675 | 'to the one you want to find.', 0); 676 | } 677 | }); 678 | } 679 | 680 | function emptyInput() { 681 | return g_currentState == 'seeding' 682 | && document['seedForm'].seedKanji.value == ''; 683 | } 684 | 685 | /* 686 | * Converts coordinates to a style string. 687 | */ 688 | function locToStyle(loc) { 689 | return "position:absolute; left:" + loc.x + "px; top:" + loc.y + "px; "; 690 | } 691 | 692 | // Image rollover function. 693 | function roll(imageName, imageSrc) { 694 | document[imageName].src = imageSrc; 695 | } 696 | 697 | -------------------------------------------------------------------------------- /simsearch/static/js/raphael-min.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Raphael 1.5.2 - JavaScript Vector Library 3 | * 4 | * Copyright (c) 2010 Dmitry Baranovskiy (http://raphaeljs.com) 5 | * Licensed under the MIT (http://raphaeljs.com/license.html) license. 6 | */ 7 | (function(){function a(){if(a.is(arguments[0],G)){var b=arguments[0],d=bV[m](a,b.splice(0,3+a.is(b[0],E))),e=d.set();for(var g=0,h=b[w];g";bg=bf.firstChild;bg.style.behavior="url(#default#VML)";if(!(bg&&typeof bg.adj=="object"))return a.type=null;bf=null}a.svg=!(a.vml=a.type=="VML");j[e]=a[e];k=j[e];a._id=0;a._oid=0;a.fn={};a.is=function(a,b){b=x.call(b);if(b=="finite")return!O[f](+a);return b=="null"&&a===null||b==typeof a||b=="object"&&a===Object(a)||b=="array"&&Array.isArray&&Array.isArray(a)||J.call(a).slice(8,-1).toLowerCase()==b};a.angle=function(b,c,d,e,f,g){{if(f==null){var h=b-d,i=c-e;if(!h&&!i)return 0;return((h<0)*180+y.atan(-i/-h)*180/D+360)%360}return a.angle(b,c,f,g)-a.angle(d,e,f,g)}};a.rad=function(a){return a%360*D/180};a.deg=function(a){return a*180/D%360};a.snapTo=function(b,c,d){d=a.is(d,"finite")?d:10;if(a.is(b,G)){var e=b.length;while(e--)if(B(b[e]-c)<=d)return b[e]}else{b=+b;var f=c%b;if(fb-d)return c-f+b}return c};function bh(){var a=[],b=0;for(;b<32;b++)a[b]=(~(~(y.random()*16)))[H](16);a[12]=4;a[16]=(a[16]&3|8)[H](16);return"r-"+a[v]("")}a.setWindow=function(a){h=a;g=h.document};var bi=function(b){if(a.vml){var c=/^\s+|\s+$/g,d;try{var e=new ActiveXObject("htmlfile");e.write("");e.close();d=e.body}catch(a){d=createPopup().document.body}var f=d.createTextRange();bi=bm(function(a){try{d.style.color=r(a)[Y](c,p);var b=f.queryCommandValue("ForeColor");b=(b&255)<<16|b&65280|(b&16711680)>>>16;return"#"+("000000"+b[H](16)).slice(-6)}catch(a){return"none"}})}else{var h=g.createElement("i");h.title="Raphaël Colour Picker";h.style.display="none";g.body[l](h);bi=bm(function(a){h.style.color=a;return g.defaultView.getComputedStyle(h,p).getPropertyValue("color")})}return bi(b)},bj=function(){return"hsb("+[this.h,this.s,this.b]+")"},bk=function(){return"hsl("+[this.h,this.s,this.l]+")"},bl=function(){return this.hex};a.hsb2rgb=function(b,c,d,e){if(a.is(b,"object")&&"h"in b&&"s"in b&&"b"in b){d=b.b;c=b.s;b=b.h;e=b.o}return a.hsl2rgb(b,c,d/2,e)};a.hsl2rgb=function(b,c,d,e){if(a.is(b,"object")&&"h"in b&&"s"in b&&"l"in b){d=b.l;c=b.s;b=b.h}if(b>1||c>1||d>1){b/=360;c/=100;d/=100}var f={},g=["r","g","b"],h,i,j,k,l,m;if(c){d<0.5?h=d*(1+c):h=d+c-d*c;i=2*d-h;for(var n=0;n<3;n++){j=b+1/3*-(n-1);j<0&&j++;j>1&&j--;j*6<1?f[g[n]]=i+(h-i)*6*j:j*2<1?f[g[n]]=h:j*3<2?f[g[n]]=i+(h-i)*(2/3-j)*6:f[g[n]]=i}}else f={r:d,g:d,b:d};f.r*=255;f.g*=255;f.b*=255;f.hex="#"+(16777216|f.b|f.g<<8|f.r<<16).toString(16).slice(1);a.is(e,"finite")&&(f.opacity=e);f.toString=bl;return f};a.rgb2hsb=function(b,c,d){if(c==null&&a.is(b,"object")&&"r"in b&&"g"in b&&"b"in b){d=b.b;c=b.g;b=b.r}if(c==null&&a.is(b,F)){var e=a.getRGB(b);b=e.r;c=e.g;d=e.b}if(b>1||c>1||d>1){b/=255;c/=255;d/=255}var f=z(b,c,d),g=A(b,c,d),h,i,j=f;{if(g==f)return{h:0,s:0,b:f,toString:bj};var k=f-g;i=k/f;b==f?h=(c-d)/k:c==f?h=2+(d-b)/k:h=4+(b-c)/k;h/=6;h<0&&h++;h>1&&h--}return{h:h,s:i,b:j,toString:bj}};a.rgb2hsl=function(b,c,d){if(c==null&&a.is(b,"object")&&"r"in b&&"g"in b&&"b"in b){d=b.b;c=b.g;b=b.r}if(c==null&&a.is(b,F)){var e=a.getRGB(b);b=e.r;c=e.g;d=e.b}if(b>1||c>1||d>1){b/=255;c/=255;d/=255}var f=z(b,c,d),g=A(b,c,d),h,i,j=(f+g)/2,k;if(g==f)k={h:0,s:0,l:j};else{var l=f-g;i=j<0.5?l/(f+g):l/(2-f-g);b==f?h=(c-d)/l:c==f?h=2+(d-b)/l:h=4+(b-c)/l;h/=6;h<0&&h++;h>1&&h--;k={h:h,s:i,l:j}}k.toString=bk;return k};a._path2string=function(){return this.join(",")[Y](ba,"$1")};function bm(a,b,c){function d(){var g=Array[e].slice.call(arguments,0),h=g[v]("►"),i=d.cache=d.cache||{},j=d.count=d.count||[];if(i[f](h))return c?c(i[h]):i[h];j[w]>=1000&&delete i[j.shift()];j[L](h);i[h]=a[m](b,g);return c?c(i[h]):i[h]}return d}a.getRGB=bm(function(b){if(!b||!(!((b=r(b)).indexOf("-")+1)))return{r:-1,g:-1,b:-1,hex:"none",error:1};if(b=="none")return{r:-1,g:-1,b:-1,hex:"none"};!(_[f](b.toLowerCase().substring(0,2))||b.charAt()=="#")&&(b=bi(b));var c,d,e,g,h,i,j,k=b.match(N);if(k){if(k[2]){g=T(k[2].substring(5),16);e=T(k[2].substring(3,5),16);d=T(k[2].substring(1,3),16)}if(k[3]){g=T((i=k[3].charAt(3))+i,16);e=T((i=k[3].charAt(2))+i,16);d=T((i=k[3].charAt(1))+i,16)}if(k[4]){j=k[4][s]($);d=S(j[0]);j[0].slice(-1)=="%"&&(d*=2.55);e=S(j[1]);j[1].slice(-1)=="%"&&(e*=2.55);g=S(j[2]);j[2].slice(-1)=="%"&&(g*=2.55);k[1].toLowerCase().slice(0,4)=="rgba"&&(h=S(j[3]));j[3]&&j[3].slice(-1)=="%"&&(h/=100)}if(k[5]){j=k[5][s]($);d=S(j[0]);j[0].slice(-1)=="%"&&(d*=2.55);e=S(j[1]);j[1].slice(-1)=="%"&&(e*=2.55);g=S(j[2]);j[2].slice(-1)=="%"&&(g*=2.55);(j[0].slice(-3)=="deg"||j[0].slice(-1)=="°")&&(d/=360);k[1].toLowerCase().slice(0,4)=="hsba"&&(h=S(j[3]));j[3]&&j[3].slice(-1)=="%"&&(h/=100);return a.hsb2rgb(d,e,g,h)}if(k[6]){j=k[6][s]($);d=S(j[0]);j[0].slice(-1)=="%"&&(d*=2.55);e=S(j[1]);j[1].slice(-1)=="%"&&(e*=2.55);g=S(j[2]);j[2].slice(-1)=="%"&&(g*=2.55);(j[0].slice(-3)=="deg"||j[0].slice(-1)=="°")&&(d/=360);k[1].toLowerCase().slice(0,4)=="hsla"&&(h=S(j[3]));j[3]&&j[3].slice(-1)=="%"&&(h/=100);return a.hsl2rgb(d,e,g,h)}k={r:d,g:e,b:g};k.hex="#"+(16777216|g|e<<8|d<<16).toString(16).slice(1);a.is(h,"finite")&&(k.opacity=h);return k}return{r:-1,g:-1,b:-1,hex:"none",error:1}},a);a.getColor=function(a){var b=this.getColor.start=this.getColor.start||{h:0,s:1,b:a||0.75},c=this.hsb2rgb(b.h,b.s,b.b);b.h+=0.075;if(b.h>1){b.h=0;b.s-=0.2;b.s<=0&&(this.getColor.start={h:0,s:1,b:b.b})}return c.hex};a.getColor.reset=function(){delete this.start};a.parsePathString=bm(function(b){if(!b)return null;var c={a:7,c:6,h:1,l:2,m:2,q:4,s:4,t:2,v:1,z:0},d=[];a.is(b,G)&&a.is(b[0],G)&&(d=bo(b));d[w]||r(b)[Y](bb,function(a,b,e){var f=[],g=x.call(b);e[Y](bc,function(a,b){b&&f[L](+b)});if(g=="m"&&f[w]>2){d[L]([b][n](f.splice(0,2)));g="l";b=b=="m"?"l":"L"}while(f[w]>=c[g]){d[L]([b][n](f.splice(0,c[g])));if(!c[g])break}});d[H]=a._path2string;return d});a.findDotsAtSegment=function(a,b,c,d,e,f,g,h,i){var j=1-i,k=C(j,3)*a+C(j,2)*3*i*c+j*3*i*i*e+C(i,3)*g,l=C(j,3)*b+C(j,2)*3*i*d+j*3*i*i*f+C(i,3)*h,m=a+2*i*(c-a)+i*i*(e-2*c+a),n=b+2*i*(d-b)+i*i*(f-2*d+b),o=c+2*i*(e-c)+i*i*(g-2*e+c),p=d+2*i*(f-d)+i*i*(h-2*f+d),q=(1-i)*a+i*c,r=(1-i)*b+i*d,s=(1-i)*e+i*g,t=(1-i)*f+i*h,u=90-y.atan((m-o)/(n-p))*180/D;(m>o||n1){x=y.sqrt(x);c=x*c;d=x*d}var z=c*c,A=d*d,C=(f==g?-1:1)*y.sqrt(B((z*A-z*u*u-A*t*t)/(z*u*u+A*t*t))),E=C*c*u/d+(a+h)/2,F=C*-d*t/c+(b+i)/2,G=y.asin(((b-F)/d).toFixed(9)),H=y.asin(((i-F)/d).toFixed(9));G=aH&&(G=G-D*2);!g&&H>G&&(H=H-D*2)}var I=H-G;if(B(I)>k){var J=H,K=h,L=i;H=G+k*(g&&H>G?1:-1);h=E+c*y.cos(H);i=F+d*y.sin(H);m=bt(h,i,c,d,e,0,g,K,L,[H,J,E,F])}I=H-G;var M=y.cos(G),N=y.sin(G),O=y.cos(H),P=y.sin(H),Q=y.tan(I/4),R=4/3*c*Q,S=4/3*d*Q,T=[a,b],U=[a+R*N,b-S*M],V=[h+R*P,i-S*O],W=[h,i];U[0]=2*T[0]-U[0];U[1]=2*T[1]-U[1];{if(j)return[U,V,W][n](m);m=[U,V,W][n](m)[v]()[s](",");var X=[];for(var Y=0,Z=m[w];Y"1e12"&&(l=0.5);B(n)>"1e12"&&(n=0.5);if(l>0&&l<1){q=bu(a,b,c,d,e,f,g,h,l);p[L](q.x);o[L](q.y)}if(n>0&&n<1){q=bu(a,b,c,d,e,f,g,h,n);p[L](q.x);o[L](q.y)}i=f-2*d+b-(h-2*f+d);j=2*(d-b)-2*(f-d);k=b-d;l=(-j+y.sqrt(j*j-4*i*k))/2/i;n=(-j-y.sqrt(j*j-4*i*k))/2/i;B(l)>"1e12"&&(l=0.5);B(n)>"1e12"&&(n=0.5);if(l>0&&l<1){q=bu(a,b,c,d,e,f,g,h,l);p[L](q.x);o[L](q.y)}if(n>0&&n<1){q=bu(a,b,c,d,e,f,g,h,n);p[L](q.x);o[L](q.y)}return{min:{x:A[m](0,p),y:A[m](0,o)},max:{x:z[m](0,p),y:z[m](0,o)}}}),bw=bm(function(a,b){var c=bq(a),d=b&&bq(b),e={x:0,y:0,bx:0,by:0,X:0,Y:0,qx:null,qy:null},f={x:0,y:0,bx:0,by:0,X:0,Y:0,qx:null,qy:null},g=function(a,b){var c,d;if(!a)return["C",b.x,b.y,b.x,b.y,b.x,b.y];!(a[0]in{T:1,Q:1})&&(b.qx=b.qy=null);switch(a[0]){case"M":b.X=a[1];b.Y=a[2];break;case"A":a=["C"][n](bt[m](0,[b.x,b.y][n](a.slice(1))));break;case"S":c=b.x+(b.x-(b.bx||b.x));d=b.y+(b.y-(b.by||b.y));a=["C",c,d][n](a.slice(1));break;case"T":b.qx=b.x+(b.x-(b.qx||b.x));b.qy=b.y+(b.y-(b.qy||b.y));a=["C"][n](bs(b.x,b.y,b.qx,b.qy,a[1],a[2]));break;case"Q":b.qx=a[1];b.qy=a[2];a=["C"][n](bs(b.x,b.y,a[1],a[2],a[3],a[4]));break;case"L":a=["C"][n](br(b.x,b.y,a[1],a[2]));break;case"H":a=["C"][n](br(b.x,b.y,a[1],b.y));break;case"V":a=["C"][n](br(b.x,b.y,b.x,a[1]));break;case"Z":a=["C"][n](br(b.x,b.y,b.X,b.Y));break}return a},h=function(a,b){if(a[b][w]>7){a[b].shift();var e=a[b];while(e[w])a.splice(b++,0,["C"][n](e.splice(0,6)));a.splice(b,1);k=z(c[w],d&&d[w]||0)}},i=function(a,b,e,f,g){if(a&&b&&a[g][0]=="M"&&b[g][0]!="M"){b.splice(g,0,["M",f.x,f.y]);e.bx=0;e.by=0;e.x=a[g][1];e.y=a[g][2];k=z(c[w],d&&d[w]||0)}};for(var j=0,k=z(c[w],d&&d[w]||0);j0.5)*2-1;C(e-0.5,2)+C(f-0.5,2)>0.25&&(f=y.sqrt(0.25-C(e-0.5,2))*g+0.5)&&f!=0.5&&(f=f.toFixed(5)-0.00001*g)}return p});b=b[s](/\s*\-\s*/);if(d=="linear"){var i=b.shift();i=-S(i);if(isNaN(i))return null;var j=[0,0,y.cos(i*D/180),y.sin(i*D/180)],k=1/(z(B(j[2]),B(j[3]))||1);j[2]*=k;j[3]*=k;if(j[2]<0){j[0]=-j[2];j[2]=0}if(j[3]<0){j[1]=-j[3];j[3]=0}}var m=bx(b);if(!m)return null;var n=a.getAttribute(I);n=n.match(/^url\(#(.*)\)$/);n&&c.defs.removeChild(g.getElementById(n[1]));var o=bG(d+"Gradient");o.id=bh();bG(o,d=="radial"?{fx:e,fy:f}:{x1:j[0],y1:j[1],x2:j[2],y2:j[3]});c.defs[l](o);for(var q=0,t=m[w];q1?G.opacity/100:G.opacity});case"stroke":G=a.getRGB(o);h[R](n,G.hex);n=="stroke"&&G[f]("opacity")&&bG(h,{"stroke-opacity":G.opacity>1?G.opacity/100:G.opacity});break;case"gradient":(({circle:1,ellipse:1})[f](c.type)||r(o).charAt()!="r")&&bI(h,o,c.paper);break;case"opacity":i.gradient&&!i[f]("stroke-opacity")&&bG(h,{"stroke-opacity":o>1?o/100:o});case"fill-opacity":if(i.gradient){var H=g.getElementById(h.getAttribute(I)[Y](/^url\(#|\)$/g,p));if(H){var J=H.getElementsByTagName("stop");J[J[w]-1][R]("stop-opacity",o)}break}default:n=="font-size"&&(o=T(o,10)+"px");var K=n[Y](/(\-.)/g,function(a){return V.call(a.substring(1))});h.style[K]=o;h[R](n,o);break}}}bM(c,d);m?c.rotate(m.join(q)):S(j)&&c.rotate(j,true)},bL=1.2,bM=function(b,c){if(b.type!="text"||!(c[f]("text")||c[f]("font")||c[f]("font-size")||c[f]("x")||c[f]("y")))return;var d=b.attrs,e=b.node,h=e.firstChild?T(g.defaultView.getComputedStyle(e.firstChild,p).getPropertyValue("font-size"),10):10;if(c[f]("text")){d.text=c.text;while(e.firstChild)e.removeChild(e.firstChild);var i=r(c.text)[s]("\n");for(var j=0,k=i[w];jb.height&&(b.height=e.y+e.height-b.y);e.x+e.width-b.x>b.width&&(b.width=e.x+e.width-b.x)}}a&&this.hide();return b};bN[e].attr=function(b,c){if(this.removed)return this;if(b==null){var d={};for(var e in this.attrs)this.attrs[f](e)&&(d[e]=this.attrs[e]);this._.rt.deg&&(d.rotation=this.rotate());(this._.sx!=1||this._.sy!=1)&&(d.scale=this.scale());d.gradient&&d.fill=="none"&&(d.fill=d.gradient)&&delete d.gradient;return d}if(c==null&&a.is(b,F)){if(b=="translation")return cz.call(this);if(b=="rotation")return this.rotate();if(b=="scale")return this.scale();if(b==I&&this.attrs.fill=="none"&&this.attrs.gradient)return this.attrs.gradient;return this.attrs[b]}if(c==null&&a.is(b,G)){var g={};for(var h=0,i=b.length;h"));m.W=h.w=m.paper.span.offsetWidth;m.H=h.h=m.paper.span.offsetHeight;m.X=h.x;m.Y=h.y+Q(m.H/2);switch(h["text-anchor"]){case"start":m.node.style["v-text-align"]="left";m.bbx=Q(m.W/2);break;case"end":m.node.style["v-text-align"]="right";m.bbx=-Q(m.W/2);break;default:m.node.style["v-text-align"]="center";break}}};bI=function(a,b){a.attrs=a.attrs||{};var c=a.attrs,d,e="linear",f=".5 .5";a.attrs.gradient=b;b=r(b)[Y](bd,function(a,b,c){e="radial";if(b&&c){b=S(b);c=S(c);C(b-0.5,2)+C(c-0.5,2)>0.25&&(c=y.sqrt(0.25-C(b-0.5,2))*((c>0.5)*2-1)+0.5);f=b+q+c}return p});b=b[s](/\s*\-\s*/);if(e=="linear"){var g=b.shift();g=-S(g);if(isNaN(g))return null}var h=bx(b);if(!h)return null;a=a.shape||a.node;d=a.getElementsByTagName(I)[0]||cd(I);!d.parentNode&&a.appendChild(d);if(h[w]){d.on=true;d.method="none";d.color=h[0].color;d.color2=h[h[w]-1].color;var i=[];for(var j=0,k=h[w];j")}}catch(a){cd=function(a){return g.createElement("<"+a+" xmlns=\"urn:schemas-microsoft.com:vml\" class=\"rvml\">")}}bV=function(){var b=by[m](0,arguments),c=b.container,d=b.height,e,f=b.width,h=b.x,i=b.y;if(!c)throw new Error("VML container not found.");var k=new j,n=k.canvas=g.createElement("div"),o=n.style;h=h||0;i=i||0;f=f||512;d=d||342;f==+f&&(f+="px");d==+d&&(d+="px");k.width=1000;k.height=1000;k.coordsize=b_*1000+q+b_*1000;k.coordorigin="0 0";k.span=g.createElement("span");k.span.style.cssText="position:absolute;left:-9999em;top:-9999em;padding:0;margin:0;line-height:1;display:inline;";n[l](k.span);o.cssText=a.format("top:0;left:0;width:{0};height:{1};display:inline-block;position:relative;clip:rect(0 {0} {1} 0);overflow:hidden",f,d);if(c==1){g.body[l](n);o.left=h+"px";o.top=i+"px";o.position="absolute"}else c.firstChild?c.insertBefore(n,c.firstChild):c[l](n);bz.call(k,k,a.fn);return k};k.clear=function(){this.canvas.innerHTML=p;this.span=g.createElement("span");this.span.style.cssText="position:absolute;left:-9999em;top:-9999em;padding:0;margin:0;line-height:1;display:inline;";this.canvas[l](this.span);this.bottom=this.top=null};k.remove=function(){this.canvas.parentNode.removeChild(this.canvas);for(var a in this)this[a]=bF(a);return true}}var ce=navigator.userAgent.match(/Version\\x2f(.*?)\s/);navigator.vendor=="Apple Computer, Inc."&&(ce&&ce[1]<4||navigator.platform.slice(0,2)=="iP")?k.safari=function(){var a=this.rect(-99,-99,this.width+99,this.height+99).attr({stroke:"none"});h.setTimeout(function(){a.remove()})}:k.safari=function(){};var cf=function(){this.returnValue=false},cg=function(){return this.originalEvent.preventDefault()},ch=function(){this.cancelBubble=true},ci=function(){return this.originalEvent.stopPropagation()},cj=(function(){{if(g.addEventListener)return function(a,b,c,d){var e=o&&u[b]?u[b]:b,g=function(e){if(o&&u[f](b))for(var g=0,h=e.targetTouches&&e.targetTouches.length;g1&&(a=Array[e].splice.call(arguments,0,arguments[w]));return new cC(a)};k.setSize=bU;k.top=k.bottom=null;k.raphael=a;function co(){return this.x+q+this.y}bO.resetScale=function(){if(this.removed)return this;this._.sx=1;this._.sy=1;this.attrs.scale="1 1"};bO.scale=function(a,b,c,d){if(this.removed)return this;if(a==null&&b==null)return{x:this._.sx,y:this._.sy,toString:co};b=b||a;!(+b)&&(b=a);var e,f,g,h,i=this.attrs;if(a!=0){var j=this.getBBox(),k=j.x+j.width/2,l=j.y+j.height/2,m=B(a/this._.sx),o=B(b/this._.sy);c=+c||c==0?c:k;d=+d||d==0?d:l;var r=this._.sx>0,s=this._.sy>0,t=~(~(a/B(a))),u=~(~(b/B(b))),x=m*t,y=o*u,z=this.node.style,A=c+B(k-c)*x*(k>c==r?1:-1),C=d+B(l-d)*y*(l>d==s?1:-1),D=a*t>b*u?o:m;switch(this.type){case"rect":case"image":var E=i.width*m,F=i.height*o;this.attr({height:F,r:i.r*D,width:E,x:A-E/2,y:C-F/2});break;case"circle":case"ellipse":this.attr({rx:i.rx*m,ry:i.ry*o,r:i.r*D,cx:A,cy:C});break;case"text":this.attr({x:A,y:C});break;case"path":var G=bp(i.path),H=true,I=r?x:m,J=s?y:o;for(var K=0,L=G[w];Kr)p=n.data[r*l];else{p=a.findDotsAtSegment(b,c,d,e,f,g,h,i,r/l);n.data[r]=p}r&&(k+=C(C(o.x-p.x,2)+C(o.y-p.y,2),0.5));if(j!=null&&k>=j)return p;o=p}if(j==null)return k},cr=function(b,c){return function(d,e,f){d=bw(d);var g,h,i,j,k="",l={},m,n=0;for(var o=0,p=d.length;oe){if(c&&!l.start){m=cq(g,h,i[1],i[2],i[3],i[4],i[5],i[6],e-n);k+=["C",m.start.x,m.start.y,m.m.x,m.m.y,m.x,m.y];if(f)return k;l.start=k;k=["M",m.x,m.y+"C",m.n.x,m.n.y,m.end.x,m.end.y,i[5],i[6]][v]();n+=j;g=+i[5];h=+i[6];continue}if(!b&&!c){m=cq(g,h,i[1],i[2],i[3],i[4],i[5],i[6],e-n);return{x:m.x,y:m.y,alpha:m.alpha}}}n+=j;g=+i[5];h=+i[6]}k+=i}l.end=k;m=b?n:c?l:a.findDotsAtSegment(g,h,i[1],i[2],i[3],i[4],i[5],i[6],1);m.alpha&&(m={x:m.x,y:m.y,alpha:m.alpha});return m}},cs=cr(1),ct=cr(),cu=cr(0,1);bO.getTotalLength=function(){if(this.type!="path")return;if(this.node.getTotalLength)return this.node.getTotalLength();return cs(this.attrs.path)};bO.getPointAtLength=function(a){if(this.type!="path")return;return ct(this.attrs.path,a)};bO.getSubpath=function(a,b){if(this.type!="path")return;if(B(this.getTotalLength()-b)<"1e-6")return cu(this.attrs.path,a).end;var c=cu(this.attrs.path,b,1);return a?cu(c,a).end:c};a.easing_formulas={linear:function(a){return a},"<":function(a){return C(a,3)},">":function(a){return C(a-1,3)+1},"<>":function(a){a=a*2;if(a<1)return C(a,3)/2;a-=2;return(C(a,3)+2)/2},backIn:function(a){var b=1.70158;return a*a*((b+1)*a-b)},backOut:function(a){a=a-1;var b=1.70158;return a*a*((b+1)*a+b)+1},elastic:function(a){if(a==0||a==1)return a;var b=0.3,c=b/4;return C(2,-10*a)*y.sin((a-c)*(2*D)/b)+1},bounce:function(a){var b=7.5625,c=2.75,d;if(a<1/c)d=b*a*a;else if(a<2/c){a-=1.5/c;d=b*a*a+0.75}else if(a<2.5/c){a-=2.25/c;d=b*a*a+0.9375}else{a-=2.625/c;d=b*a*a+0.984375}return d}};var cv=[],cw=function(){var b=+(new Date);for(var c=0;cd)return d;while(cf?c=e:d=e;e=(d-c)/2+c}return e}return n(a,1/(200*f))}bO.onAnimation=function(a){this._run=a||0;return this};bO.animate=function(c,d,e,g){var h=this;h.timeouts=h.timeouts||[];if(a.is(e,"function")||!e)g=e||null;if(h.removed){g&&g.call(h);return h}var i={},j={},k=false,l={};for(var m in c)if(c[f](m)){if(X[f](m)||h.paper.customAttributes[f](m)){k=true;i[m]=h.attr(m);i[m]==null&&(i[m]=W[m]);j[m]=c[m];switch(X[m]){case"along":var n=cs(c[m]),o=ct(c[m],n*!(!c.back)),p=h.getBBox();l[m]=n/d;l.tx=p.x;l.ty=p.y;l.sx=o.x;l.sy=o.y;j.rot=c.rot;j.back=c.back;j.len=n;c.rot&&(l.r=S(h.rotate())||0);break;case E:l[m]=(j[m]-i[m])/d;break;case"colour":i[m]=a.getRGB(i[m]);var q=a.getRGB(j[m]);l[m]={r:(q.r-i[m].r)/d,g:(q.g-i[m].g)/d,b:(q.b-i[m].b)/d};break;case"path":var t=bw(i[m],j[m]);i[m]=t[0];var u=t[1];l[m]=[];for(var v=0,x=i[m][w];v