├── .gitignore ├── .idea ├── .name ├── ClojureProjectResolveSettings.xml ├── codeStyles │ └── codeStyleConfig.xml ├── compiler.xml ├── encodings.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── Dockerfile ├── README.md ├── deploy.sh ├── dev └── src │ ├── figwheel_repl.clj │ └── user.clj ├── project.clj ├── resources ├── config.edn └── public │ ├── android-chrome-192x192.png │ ├── android-chrome-512x512.png │ ├── apple-touch-icon.png │ ├── browserconfig.xml │ ├── css │ └── main.css │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ ├── favicon.ico │ ├── favicon_generator.md │ ├── html_code.html │ ├── img │ ├── favicon.png │ ├── favicon.svg │ ├── logo.svg │ ├── logo_dark.svg │ ├── logo_dark_min.svg │ ├── logo_min.svg │ └── search.svg │ ├── index.html │ ├── mstile-150x150.png │ ├── safari-pinned-tab.svg │ └── site.webmanifest ├── src └── sinostudy │ ├── cofx.cljs │ ├── config.cljs │ ├── core.cljs │ ├── db.cljs │ ├── dictionary │ ├── core.cljc │ ├── data.cljc │ ├── embed.cljc │ └── load.clj │ ├── events │ ├── actions.cljs │ ├── core.cljs │ └── scrolling.cljs │ ├── fx.cljs │ ├── macros │ └── core.clj │ ├── navigation │ ├── handler.clj │ ├── pages.cljc │ └── routes.cljs │ ├── pinyin │ ├── core.cljc │ ├── data.cljc │ ├── eval.cljc │ └── patterns.cljc │ ├── rim │ └── core.cljc │ ├── spec │ ├── dictionary.cljc │ └── pages.cljc │ ├── subs.cljs │ └── views │ ├── common.cljs │ ├── core.cljs │ └── dictionary.cljs └── test └── sinostudy └── pinyin └── core_test.clj /.gitignore: -------------------------------------------------------------------------------- 1 | /*.log 2 | /target 3 | /*-init.clj 4 | /resources/public/js/compiled 5 | out 6 | *.iml 7 | resources/tatoeba/links.csv 8 | resources/tatoeba/sentences_detailed.csv 9 | resources/tatoeba/tags.csv 10 | resources/tatoeba/users_sentences.csv 11 | resources/makemeahanzi/ 12 | resources/Unihan 13 | resources/frequency/ 14 | 15 | # Created by https://www.gitignore.io/api/macos,clojure,intellij,leiningen 16 | 17 | ### Clojure ### 18 | pom.xml 19 | pom.xml.asc 20 | *.jar 21 | *.class 22 | /lib/ 23 | /classes/ 24 | /target/ 25 | /checkouts/ 26 | .lein-deps-sum 27 | .lein-repl-history 28 | .lein-plugins/ 29 | .lein-failures 30 | .nrepl-port 31 | 32 | ### Intellij ### 33 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 34 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 35 | 36 | # User-specific stuff: 37 | .idea/**/workspace.xml 38 | .idea/**/tasks.xml 39 | .idea/dictionaries 40 | 41 | # Sensitive or high-churn files: 42 | .idea/**/dataSources/ 43 | .idea/**/dataSources.ids 44 | .idea/**/dataSources.xml 45 | .idea/**/dataSources.local.xml 46 | .idea/**/sqlDataSources.xml 47 | .idea/**/dynamic.xml 48 | .idea/**/uiDesigner.xml 49 | 50 | # Gradle: 51 | .idea/**/gradle.xml 52 | .idea/**/libraries 53 | 54 | # CMake 55 | cmake-build-debug/ 56 | 57 | # Mongo Explorer plugin: 58 | .idea/**/mongoSettings.xml 59 | 60 | ## File-based project format: 61 | *.iws 62 | 63 | ## Plugin-specific files: 64 | 65 | # IntelliJ 66 | /out/ 67 | 68 | # mpeltonen/sbt-idea plugin 69 | .idea_modules/ 70 | 71 | # JIRA plugin 72 | atlassian-ide-plugin.xml 73 | 74 | # Cursive Clojure plugin 75 | .idea/replstate.xml 76 | 77 | # Ruby plugin and RubyMine 78 | /.rakeTasks 79 | 80 | # Crashlytics plugin (for Android Studio and IntelliJ) 81 | com_crashlytics_export_strings.xml 82 | crashlytics.properties 83 | crashlytics-build.properties 84 | fabric.properties 85 | 86 | ### Intellij Patch ### 87 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 88 | 89 | # *.iml 90 | # modules.xml 91 | # .idea/misc.xml 92 | # *.ipr 93 | 94 | # Sonarlint plugin 95 | .idea/sonarlint 96 | 97 | ### Leiningen ### 98 | 99 | ### macOS ### 100 | *.DS_Store 101 | .AppleDouble 102 | .LSOverride 103 | 104 | # Icon must end with two \r 105 | Icon 106 | 107 | # Thumbnails 108 | ._* 109 | 110 | # Files that might appear in the root of a volume 111 | .DocumentRevisions-V100 112 | .fseventsd 113 | .Spotlight-V100 114 | .TemporaryItems 115 | .Trashes 116 | .VolumeIcon.icns 117 | .com.apple.timemachine.donotpresent 118 | 119 | # Directories potentially created on remote AFP share 120 | .AppleDB 121 | .AppleDesktop 122 | Network Trash Folder 123 | Temporary Items 124 | .apdisk 125 | 126 | # End of https://www.gitignore.io/api/macos,clojure,intellij,leiningen 127 | 128 | .idea/codeStyleSettings.xml 129 | cedict_ts.u8 130 | resources/version.edn 131 | .rebel_readline_history 132 | -------------------------------------------------------------------------------- /.idea/.name: -------------------------------------------------------------------------------- 1 | sino.study -------------------------------------------------------------------------------- /.idea/ClojureProjectResolveSettings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | IDE 5 | 6 | -------------------------------------------------------------------------------- /.idea/codeStyles/codeStyleConfig.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM openjdk:10 2 | ARG JARPATH 3 | ARG JARFILE 4 | ENV JARFILE "$JARFILE" 5 | MAINTAINER Simon Gray 6 | ADD "$JARPATH" /usr/src/myapp/ 7 | WORKDIR /usr/src/myapp 8 | EXPOSE 8080 9 | CMD java -XX:+PrintFlagsFinal -jar "$JARFILE" 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![sino·study](./resources/public/img/logo_min.svg) 2 | 3 | This is the repository for [sino·study](http://sino.study), 4 | a web app designed to assist students of the Chinese language in various ways. 5 | At the moment, it is primarily an advanced dictionary, 6 | but in the future it will also include functionality for grammatical analysis. 7 | 8 | It is a single-page application written in [Clojure](https://clojure.org/) 9 | and [ClojureScript](https://clojurescript.org/). 10 | The frontend uses [Reagent](https://github.com/reagent-project/reagent) 11 | and [re-frame](https://github.com/Day8/re-frame). 12 | Furthermore, it makes use of [secretary](https://github.com/gf3/secretary) 13 | and [Accountant](https://github.com/venantius/accountant) for frontend routing. 14 | The backend is a [Compojure](https://github.com/weavejester/compojure) service 15 | that is served by [http-kit](https://github.com/http-kit/http-kit). 16 | Communication between the backend web service and the frontend app is 17 | facilitated by [Transit](https://github.com/cognitect/transit-format). 18 | The functionality is built around my own wrapper library for Stanford CoreNLP, 19 | [Computerese](https://github.com/simongray/Computerese), as well as numerous 20 | open-source datasets, most notably [CC-CEDICT](https://cc-cedict.org/) and 21 | [makemeahanzi](https://github.com/skishore/makemeahanzi). 22 | 23 | 24 | # Development 25 | * Running the sino.study app requires the sinostudy-data git repository to be 26 | located at ~/Code/sinostudy-data. Make sure that directory exists and pull from: 27 | [sino.study-data](https://github.com/simongray/sino.study-data). 28 | **Note: this applies to both dev and production environments.** 29 | 30 | * The REPL starts out in the `user` ns with various other namespaces required. 31 | The user ns also includes relevant custom functions for development. 32 | Changes to e.g. dictionary data structures and most other backend development 33 | is best tested in the REPL. 34 | 35 | 36 | ## Developing with live re-loading 37 | Typical development involves running a development web service locally, 38 | while accessing the data from the service through a live-reloading frontend app. 39 | 40 | ### Local backend server 41 | Start a lein nREPL for the project, then evaluate the following: 42 | 43 | ```` 44 | (start) 45 | ```` 46 | 47 | This will load the dictionary and start a production server using html-kit. 48 | Wait a bit, then browse to [http://localhost:8080](http://localhost:8080). 49 | 50 | The default port is `8080`, but it can be configured in `resources/config.edn`. 51 | The system can be stopped again by evaluating: 52 | 53 | ``` 54 | (stop) 55 | ``` 56 | 57 | Pieces of state can be reloaded evaluating e.g. ```(restart #'dict)```. 58 | 59 | ### Run live-reloading frontend app 60 | For frontend development, the fighweel experience is currently [integrated with 61 | Cursive using `fighweel-sidecar`](https://github.com/bhauman/lein-figwheel/wiki/Running-figwheel-in-a-Cursive-Clojure-REPL). 62 | An IntelliJ REPL should be configured using the `Use clojure.main in normal JVM 63 | process` option and with `dev/src/clj/figwheel_repl.clj` set as the parameter. 64 | 65 | As an alternative, it is also possible to simply run ```lein figwheel dev```. 66 | 67 | Launch the REPL once configured, wait a bit, then browse to 68 | [http://localhost:3449](http://localhost:3449). 69 | 70 | Figwheel will automatically push CLJS changes to the browser, 71 | while preserving the application state. A hard page reload will reset the state. 72 | 73 | If there are any issues getting the app to show up (e.g. blank page), 74 | then try clearing the browser cache. Note that most functionality will require 75 | the development backend service to be running too. 76 | 77 | ### Running dev app on a mobile phone 78 | If I want to test on a mobile phone I will typically run 79 | 80 | ```` 81 | ipconfig getifaddr en0 82 | ```` 83 | 84 | to get the local IP address of my Mac and then visit that address on port 3449 85 | (or whatever port is being used). 86 | 87 | 88 | ## Deploying to production 89 | Currently, there are three steps to deploying a production Docker image: 90 | 91 | 1. compiling an uberjar 92 | 2. building the docker image 93 | 3. running a container from the image in production 94 | 95 | ### Compiling an uberjar for rapid deployment 96 | This will create a standalone JAR file including the entire compiled app 97 | (note: target JAR filename subject to change). 98 | 99 | ```` 100 | lein uberjar 101 | ```` 102 | 103 | The uberjar is a self-contained backend+frontend, although it does expect 104 | the sino.study-datafiles repo to be present at the correct path! 105 | To test that the uberjar was packaged correctly, run: 106 | 107 | ```` 108 | java -jar target/sinostudy-standalone.jar 109 | ```` 110 | 111 | (remember to replace $VERSION with the correct version number) 112 | 113 | Wait a bit, then browse to [http://localhost:8080](http://localhost:8080). 114 | 115 | 116 | ### Building and deploying docker image 117 | 118 | To build an image from the Dockerfile, run: 119 | 120 | ```` 121 | docker build -t simongray/sino.study:latest -t simongray/sino.study:${version} --build-arg JARPATH=${jarpath} --build-arg JARFILE=${jarfile} . 122 | ```` 123 | 124 | Note: this requires the uberjar built during the previous step as well as the 125 | correct name and path of the jarfile. 126 | 127 | It can then be pushed and pulled from the docker store by running e.g. 128 | 129 | ```` 130 | docker push simongray/sino.study 131 | docker pull simongray/sino.study 132 | ```` 133 | 134 | The image can be run as a Docker container using: 135 | 136 | ```` 137 | # in production 138 | docker run -v /root/Code/sinostudy-data:/root/Code/sinostudy-data -p 80:8080 simongray/sino.study:latest 139 | 140 | # testing locally 141 | docker run -v /Users/simon/Code/sinostudy-data:/root/Code/sinostudy-data -p 80:8080 simongray/sino.study:latest 142 | ```` 143 | 144 | (this will tunnel the exposed `8080` port of the docker container 145 | to the production system's port `80`) 146 | 147 | Wait a little while, then visit [http://localhost:80](http://localhost:80) 148 | or [http://sino.study](http://sino.study). 149 | 150 | Use ````docker ps -a```` to list all containers and their assigned names. 151 | Stop and remove containers using other relevant docker commands. 152 | -------------------------------------------------------------------------------- /deploy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This script automates the following: 4 | # * compiles an uberjar 5 | # * builds and tags a docker image containing the uberjar 6 | # * pushes the docker image to the docker store 7 | 8 | re=":tag \"v([^\"]+)" 9 | 10 | # Needs to run a lein action at least once to make sure version.edn is built. 11 | lein version 12 | 13 | if [[ $(cat resources/version.edn) =~ $re ]]; then 14 | version=${BASH_REMATCH[1]} 15 | jarfile="sinostudy-standalone.jar" 16 | jarpath="target/sinostudy-standalone.jar" 17 | echo "version: ${version}"; 18 | 19 | echo "removing old build artifacts" 20 | lein clean 21 | 22 | echo "building uberjar: ${jarfile}" 23 | lein uberjar 24 | 25 | echo "building docker image" 26 | docker build -t simongray/sino.study:latest -t simongray/sino.study:${version} --build-arg JARPATH=${jarpath} --build-arg JARFILE=${jarfile} . 27 | 28 | echo "pushing docker image" 29 | docker push simongray/sino.study 30 | else 31 | echo "ERROR: could not determine current version" 32 | fi 33 | -------------------------------------------------------------------------------- /dev/src/figwheel_repl.clj: -------------------------------------------------------------------------------- 1 | ;;; See: https://github.com/bhauman/lein-figwheel/wiki/Running-figwheel-in-a-Cursive-Clojure-REPL 2 | (use 'figwheel-sidecar.repl-api) 3 | (start-figwheel!) ;; <-- fetches configuration 4 | (cljs-repl) -------------------------------------------------------------------------------- /dev/src/user.clj: -------------------------------------------------------------------------------- 1 | (ns user 2 | (:require [clojure.java.io :as io] 3 | [clojure.spec.alpha :as s] 4 | [clojure.spec.gen.alpha :as gen] 5 | [mount.core :as mount :refer [start stop]] 6 | [mount-up.core :as mount-up] 7 | [sinostudy.spec.dictionary :as sd] 8 | [sinostudy.dictionary.core :as d] 9 | [sinostudy.dictionary.load :as load] 10 | [sinostudy.navigation.handler :as handler :refer [dict config server]] 11 | [sinostudy.pinyin.core :as p])) 12 | 13 | (mount-up/on-upndown :info mount-up/log :before) 14 | 15 | (defn restart 16 | "Restart one or more pieces of mount state." 17 | [& states] 18 | (apply stop states) 19 | (apply start states)) 20 | 21 | (defn look-up* 22 | "A version of look-up that performs both the backend and frontend processing. 23 | Useful for testing what the search results on the frontend look like." 24 | [term] 25 | (->> term 26 | (d/look-up dict) 27 | (d/reduce-result) 28 | (d/sort-result))) 29 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject sinostudy "_" 2 | :description "The sino.study project." 3 | :url "http://sino.study" 4 | :min-lein-version "2.8.1" 5 | :source-paths ["src"] 6 | :resource-paths ["resources"] 7 | :jar-name "sinostudy.jar" 8 | :uberjar-name "sinostudy-standalone.jar" 9 | 10 | :dependencies [[org.clojure/clojure "1.10.1"] 11 | [org.clojure/clojurescript "1.10.520"] 12 | [org.clojure/data.csv "0.1.4"] 13 | [org.clojure/test.check "0.10.0"] 14 | [computerese "0.1.0-SNAPSHOT"] 15 | [mount "0.1.16"] 16 | [tolitius/mount-up "0.1.2"] 17 | [reagent "0.8.1"] 18 | [re-frame "0.10.8"] 19 | [day8.re-frame/http-fx "0.1.6"] 20 | [clj-commons/secretary "1.2.4"] 21 | [venantius/accountant "0.2.4"] 22 | [com.cognitect/transit-clj "0.8.313"] 23 | [com.cognitect/transit-cljs "0.8.256"] 24 | [compojure "1.6.1"] 25 | [http-kit "2.3.0"] 26 | [ring/ring-defaults "0.3.2"] 27 | [clj-json "0.5.3"]] 28 | 29 | :plugins [[me.arrdem/lein-git-version "2.0.8"] 30 | [lein-cljsbuild "1.1.7"]] 31 | 32 | :git-version {:version-file "resources/version.edn" 33 | :version-file-keys [:tag ; Name of the last git tag if any 34 | :ahead ; Number of commits ahead of the last tag, or 0 35 | :ahead? ; Is the head ahead by more than 0 commits 36 | :ref ; The full current ref 37 | :ref-short ; The "short" current ref 38 | :branch ; The name of the current branch 39 | :dirty? ; Optional. Boolean. Are there un-committed changes. 40 | :message ; Optional. The last commit message when clean. 41 | :timestamp]} ; Optional. The last commit date when clean.]} 42 | 43 | :profiles {:dev {:dependencies [[binaryage/devtools "0.9.10"] 44 | [day8.re-frame/re-frame-10x "0.4.2"] 45 | [figwheel-sidecar "0.5.19"]] ; for Cursive-integrated figwheel REPL 46 | :plugins [[lein-figwheel "0.5.19"]] ; for running `lein fighweel dev` 47 | :source-paths ["dev/src"] 48 | :repl-options {:init-ns user}} 49 | 50 | :uberjar {:main sinostudy.handler 51 | :aot [sinostudy.handler] 52 | :prep-tasks ["clean" 53 | "compile" 54 | ["cljsbuild" "once" "min"]]}} 55 | 56 | :clean-targets ^{:protect false} ["resources/public/js/compiled" "target"] 57 | :figwheel {:css-dirs ["resources/public/css"]} 58 | :cljsbuild {:builds [{:id "dev" 59 | :source-paths ["src"] 60 | :figwheel {:on-jsload "sinostudy.core/mount-root"} 61 | :compiler {:main sinostudy.core 62 | :output-to "resources/public/js/compiled/app.js" 63 | :output-dir "resources/public/js/compiled/out" 64 | :asset-path "js/compiled/out" 65 | :source-map-timestamp true 66 | :optimizations :none 67 | :closure-defines {"re_frame.trace.trace_enabled_QMARK_" true} 68 | :preloads [devtools.preload 69 | day8.re-frame-10x.preload] 70 | :external-config {:devtools/config {:features-to-install :all}}}} 71 | 72 | {:id "min" 73 | :source-paths ["src"] 74 | :compiler {:main sinostudy.core 75 | :output-to "resources/public/js/compiled/app.js" 76 | :optimizations :advanced 77 | :closure-defines {goog.DEBUG false} 78 | :pretty-print false}}]}) 79 | -------------------------------------------------------------------------------- /resources/config.edn: -------------------------------------------------------------------------------- 1 | {:server {:port {:internal 8080 2 | :external 80}} 3 | :evaluation {:delay 250}} -------------------------------------------------------------------------------- /resources/public/android-chrome-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simongray/sino.study/b1b2954011841bc96449a1aa61eb51656930aee5/resources/public/android-chrome-192x192.png -------------------------------------------------------------------------------- /resources/public/android-chrome-512x512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simongray/sino.study/b1b2954011841bc96449a1aa61eb51656930aee5/resources/public/android-chrome-512x512.png -------------------------------------------------------------------------------- /resources/public/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simongray/sino.study/b1b2954011841bc96449a1aa61eb51656930aee5/resources/public/apple-touch-icon.png -------------------------------------------------------------------------------- /resources/public/browserconfig.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | #da532c 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /resources/public/css/main.css: -------------------------------------------------------------------------------- 1 | /* === GLOBAL === */ 2 | 3 | * { 4 | font-family: "Gill Sans", "Gill Sans MT", Calibri, "KaiTi", "楷体", STKaiti, "华文楷体", sans-serif; 5 | font-weight: 300; 6 | color: #555; 7 | padding: 0; 8 | margin: 0; 9 | hyphens: auto; 10 | 11 | /* otherwise safari fonts become too thin */ 12 | -webkit-font-smoothing: subpixel-antialiased; 13 | } 14 | 15 | /* Hanzi are made a bit darker than latin text to make them stand out */ 16 | :lang(zh) { 17 | letter-spacing: 0.4ch; 18 | color: #333; 19 | } 20 | 21 | :lang(en) { 22 | letter-spacing: 0.2ch; 23 | } 24 | 25 | /* https://www.sitepoint.com/understanding-and-using-rem-units-in-css/ */ 26 | html { 27 | font-size: 62.5%; /* = 10px (down from 16px) */ 28 | } 29 | 30 | html, body { 31 | height: 100%; 32 | } 33 | 34 | a { 35 | /* So that links won't suddenly reset a custom font to the global one. */ 36 | font-family: inherit; 37 | color: inherit; 38 | text-decoration: none; 39 | } 40 | 41 | a:hover { 42 | color: #4477DD; 43 | } 44 | 45 | /* headings and content inside them get the serif fonts */ 46 | h1, h2, h3, h1 *, h2 *, h3 * { 47 | color: #555555; 48 | font-family: Didot, "Didot LT STD", "Hoefler Text", Garamond, "Times New Roman", "KaiTi", "楷体", STKaiti, "华文楷体", serif; 49 | } 50 | 51 | /* basic document margin */ 52 | h1, h2, p, ol, ul, dl, dd, table { 53 | margin-top: 1rem; 54 | margin-bottom: 2rem; 55 | } 56 | 57 | h1 + p { 58 | margin-top: -1rem; 59 | } 60 | 61 | h1 { 62 | font-size: 2.8rem; 63 | } 64 | 65 | h2 { 66 | font-size: 2.4rem; 67 | } 68 | 69 | p { 70 | font-size: 1.8rem; 71 | } 72 | 73 | ol, ul { 74 | margin-left: 4rem; 75 | } 76 | 77 | section + section { 78 | margin-top: 1.5rem; 79 | } 80 | 81 | dt { 82 | font-size: 2.4rem; 83 | } 84 | 85 | li { 86 | font-size: 1.8rem; 87 | margin-bottom: 0.8rem; 88 | } 89 | 90 | table { 91 | font-size: 1.4rem; 92 | 93 | /* remove double border */ 94 | border-collapse: collapse; 95 | } 96 | 97 | td, th { 98 | padding: 1rem; 99 | } 100 | 101 | tr { 102 | border-left: 2px solid #DD8888; 103 | border-right: 2px solid #DD8888; 104 | } 105 | 106 | tr:first-child { 107 | border-top: 2px solid #DD8888; 108 | } 109 | 110 | tr:last-child { 111 | border-bottom: 2px solid #DD8888; 112 | } 113 | 114 | 115 | 116 | 117 | /* === GLOBAL CLASSES === */ 118 | .pinyin { 119 | color: #DD8888; 120 | font-family: Didot, "Didot LT STD", "Hoefler Text", Garamond, "Times New Roman", "KaiTi", "楷体", STKaiti, "华文楷体", serif; 121 | } 122 | 123 | .pinyin::before { 124 | content: "["; 125 | } 126 | 127 | .pinyin::after { 128 | content: "]"; 129 | } 130 | 131 | 132 | 133 | 134 | 135 | /* === ANIMATIONS === */ 136 | 137 | @keyframes fade-in { 138 | from { 139 | opacity: 0; 140 | } 141 | to { 142 | opacity: 1; 143 | } 144 | } 145 | 146 | 147 | 148 | 149 | /* === MAIN & HEADER === */ 150 | 151 | 152 | div#app { 153 | height: 100%; /* also set for all parent elements */ 154 | 155 | display: flex; 156 | flex-direction: column; 157 | align-items: center; 158 | justify-content: space-between; 159 | 160 | background-color: #444; 161 | background-image: radial-gradient(ellipse at bottom, rgba(255,255,255,0.2) 0%, transparent 100%); 162 | box-shadow: 0 0 15rem 0 rgba(0, 0, 0, 0.3) inset; 163 | } 164 | 165 | /* would have styled BODY or HTML instead if React could access them */ 166 | main { 167 | /* take up all middle space and allow for scrolling */ 168 | height: 100%; 169 | width: calc(100% - 1rem); 170 | max-width: 65rem; 171 | overflow: auto; 172 | padding: 0.5rem; 173 | flex-grow: 1; 174 | } 175 | 176 | main#splash { 177 | display: flex; 178 | flex-direction: column; 179 | align-items: center; 180 | justify-content: center; 181 | } 182 | 183 | main#splash img { 184 | animation: fade-in 0.5s ease; 185 | width: calc(100% - 1rem); 186 | margin: 2rem auto; 187 | } 188 | 189 | main#splash blockquote { 190 | animation: fade-in 1s ease; 191 | font-size: 1.8rem; 192 | line-height: 1.5; 193 | color: #999; 194 | width: calc(100% - 6rem); 195 | border-left: 0.5rem solid #333; 196 | border-right: 0.5rem solid #333; 197 | border-radius: 1rem; 198 | padding: 0 2rem; 199 | text-align: justify; 200 | } 201 | 202 | /* prevent squashing on mobile when the soft-keyboard is open */ 203 | @media only screen and (max-height: 25rem) { 204 | main#splash blockquote { 205 | display: none; 206 | } 207 | } 208 | 209 | main#splash a { 210 | color: #DD9999; 211 | font-variant: small-caps; 212 | white-space: nowrap; 213 | } 214 | 215 | main#splash a:hover { 216 | text-decoration: underline; 217 | } 218 | 219 | header { 220 | animation: fade-in 1s ease; 221 | 222 | width: 100%; 223 | 224 | background: #BB5544; 225 | box-shadow: 0 0 1rem 0 rgba(0, 0, 0, 0.3); 226 | background-image: linear-gradient(to top, #BB5544, #CC6644); 227 | 228 | /* make sure search results are covered by shadow */ 229 | z-index: 1; 230 | } 231 | 232 | /* TODO: do I need this? 233 | aligner is necessary for limiting width and centering horisontally! 234 | without "min-width:98%;" in .vcenter, .vcentor gets crammed, 235 | so this extra class is necessary to get a max-width. 236 | */ 237 | header div#aligner { 238 | max-width: 65rem; /* sync with main */ 239 | margin: auto; 240 | padding: 0.5rem; 241 | text-align: center; 242 | vertical-align: middle; 243 | 244 | /* makes logo img able to assume 0 height */ 245 | line-height: 0; 246 | } 247 | 248 | 249 | /* Also known as the #study-form */ 250 | div#header-input { 251 | display: flex; 252 | } 253 | 254 | /* Formerly known as the #study-input */ 255 | div#header-input > input { 256 | animation: fade-in 1s ease; 257 | transition: all 0.3s; 258 | 259 | font-size: 2.4rem; 260 | color: #CCC; 261 | 262 | /* fixes overflow in Chrome iPhone 5/SE device inspector */ 263 | max-width: 100%; 264 | box-sizing: border-box; 265 | 266 | padding: 0.5rem 3.5rem 0.5rem 0.5rem; 267 | border: none; 268 | border-radius: 0.3rem; 269 | background: #2A2A2A url("/img/search.svg"); 270 | background-repeat: no-repeat; 271 | background-size: auto calc(100% - 1.5rem); 272 | background-position: calc(100% - 0.75rem) 50%; 273 | box-shadow: inset 0 0 0.5rem 0 rgba(0, 0, 0, 1), 274 | 0 0 0.5rem 0 rgba(255, 255, 255, 0.3); 275 | /* Take up full width */ 276 | flex: 1; 277 | } 278 | 279 | div#header-input > input:focus { 280 | padding: 0.5rem; 281 | background-position: calc(100% + 3.5rem) 50%; 282 | } 283 | 284 | /* Grey colouring of input when action-chooser is active */ 285 | div#header-input > input[disabled], 286 | div#header-input > input::placeholder { 287 | color: #666; 288 | } 289 | 290 | /* Grey colouring of input when action-chooser is active */ 291 | div#header-input > input.unknown { 292 | text-decoration: underline; 293 | text-decoration-color: #884433; 294 | text-decoration-style: dotted; 295 | } 296 | 297 | /* Formerly known as the #study-button */ 298 | div#header-input > button { 299 | border: 0; 300 | padding: 0; 301 | font-size: 0; 302 | width: 0; 303 | } 304 | 305 | header p#title { 306 | color: #FFBBBB; 307 | font-size: 1.8rem; 308 | margin: 1.7rem 0 -0.5rem 0; 309 | animation: fade-in 1s ease; 310 | } 311 | 312 | header p#title em { 313 | color: #FFBBBB; 314 | font-style: bold; 315 | } 316 | 317 | header p#title + div#filters { 318 | margin-top: 3.2rem; 319 | } 320 | 321 | header p#title + div#filters.hidden { 322 | margin-top: 1.8rem; 323 | } 324 | 325 | 326 | 327 | /* === VERSION NUMBER === */ 328 | address { 329 | animation: fade-in 3s ease; 330 | color: #333; 331 | font-size: 1.4rem; 332 | position: absolute; 333 | padding: 1rem; 334 | right: 0; 335 | bottom: 6rem; 336 | transition: all 0.5s; 337 | } 338 | 339 | address.hidden { 340 | opacity: 0; 341 | } 342 | 343 | @media only screen and (max-height: 15rem) { 344 | address { 345 | display: none; 346 | } 347 | } 348 | 349 | 350 | 351 | 352 | /* === FILTERS === */ 353 | div#filters { 354 | transition: all 0.2s; 355 | margin: 1.8rem 0 0.7rem 0; /* TODO: weird values here */ 356 | font-size: 1.6rem; 357 | color: #FFBBBB; /* for the separating dots */ 358 | letter-spacing: 0.2rem; 359 | word-spacing: 0.3rem; 360 | text-align: center; 361 | } 362 | 363 | div#filters.hidden { 364 | height: 0; 365 | margin: 0; 366 | opacity: 0;'' 367 | } 368 | 369 | div#filters input[type=radio] { 370 | /* hide the actual radio button */ 371 | -webkit-appearance: none; 372 | -moz-appearance: none; 373 | -ms-appearance: none; 374 | -o-appearance: none; 375 | appearance: none; 376 | } 377 | 378 | div#filters input[type=radio] + label { 379 | cursor: pointer; 380 | color: white; 381 | animation: fade-in 0.5s ease; 382 | } 383 | 384 | div#filters input[type=radio] + label:hover { 385 | text-decoration: underline; 386 | } 387 | 388 | div#filters input[type=radio]:checked + label { 389 | color: #661111; 390 | } 391 | 392 | div#filters input[type=radio]:checked + label:hover { 393 | text-decoration: none; 394 | cursor: default; 395 | } 396 | 397 | 398 | 399 | 400 | /* === ARTICLE === */ 401 | 402 | article { 403 | transition: all 0.2s; /* should be synchronised with filters transition */ 404 | 405 | background: white; 406 | border-radius: 0.3rem; 407 | padding: 1.5rem; 408 | 409 | box-sizing: border-box; /* allow 100% width + padding with not overflow */ 410 | width: 100%; 411 | } 412 | 413 | /* deal with Firefox quirk (bottom padding on main is being ignored) */ 414 | @-moz-document url-prefix() { 415 | #entries article:last-child { 416 | margin-bottom: 0.5rem; 417 | } 418 | } 419 | 420 | /* controls where the content appears */ 421 | article.full { 422 | /* take up all middle space and allow for scrolling */ 423 | flex-grow: 1; 424 | height: 100%; /* enables border all the way down (in tandem with article) */ 425 | overflow: auto; 426 | } 427 | 428 | 429 | /* === FOOTER === */ 430 | 431 | footer { 432 | animation: fade-in 1.5s ease; 433 | 434 | /* text */ 435 | text-align: center; 436 | 437 | /* box */ 438 | background: #BB5544; 439 | background-image: linear-gradient(to bottom, #BB5544, #CC6644); 440 | padding: 1.5rem 0; 441 | width: 100%; 442 | 443 | /* make sure search results are covered by shadow */ 444 | z-index: 1; 445 | 446 | /* shadow */ 447 | box-shadow: 0 0 1rem 0 rgba(0, 0, 0, 0.3); 448 | } 449 | 450 | nav { 451 | /* text-related */ 452 | font-size: 1.6rem; 453 | word-spacing: 0.4rem; 454 | line-height: 1; 455 | color: #FFBBBB; 456 | } 457 | 458 | nav > #script-changer { 459 | padding: 0.4rem; 460 | border: 1pt solid #CC6666; 461 | border-radius: 0.5rem; 462 | color: #FFBBBB; 463 | cursor: pointer; 464 | 465 | /* nav links shouldn't jump around when changing script */ 466 | display: inline-block; 467 | min-width: 6ch; 468 | 469 | /* fix for #14 (translation pop-up on Chrome mobile) */ 470 | user-drag: none; 471 | user-select: none; 472 | } 473 | 474 | nav > #script-changer:hover { 475 | text-decoration: none; 476 | color: white; 477 | border-color: white; 478 | } 479 | 480 | footer a { 481 | color: white; 482 | } 483 | 484 | footer a:hover { 485 | color: white; 486 | text-decoration: underline; 487 | } 488 | 489 | footer a.current-page, footer a.current-page:hover { 490 | color: #661111; 491 | text-decoration: none; 492 | } 493 | 494 | 495 | 496 | 497 | /* === ACTION CHOOSER === */ 498 | 499 | fieldset#actions { 500 | /* box */ 501 | padding: 1rem; 502 | width: 90%; 503 | max-width: 40rem; 504 | background: #BB5544; 505 | border: none; 506 | border-radius: 1rem; 507 | box-shadow: 0 1rem 2rem 0 rgba(0, 0, 0, 0.2), 508 | 0 1rem 2rem 0 rgba(0, 0, 0, 0.19), 509 | inset 0 0 1rem 0 rgba(0, 0, 0, 0.3); 510 | 511 | /* center vertically */ 512 | z-index: 2; 513 | position: fixed; 514 | top: 50%; /* using 50% looks off somehow...*/ 515 | left: 50%; 516 | transform: translate(-50%, -70%); 517 | -webkit-transform: translate(-50%, -70%); 518 | -moz-transform: translate(-50%, -70%); 519 | -o-transform: translate(-50%, -70%); 520 | -ms-transform: translate(-50%, -70%); 521 | 522 | /* expand to fit content 523 | https://teamtreehouse.com/community/how-can-i-make-my-divs-grow-wider-according-to-their-content */ 524 | -moz-box-sizing: border-box; 525 | -webkit-box-sizing: border-box; 526 | box-sizing: border-box; 527 | 528 | animation: fade-in 0.3s linear; 529 | } 530 | 531 | fieldset#actions * { 532 | color: #FFBBBB; 533 | } 534 | 535 | /* the action chooser h1 is (unlike other h1 elements) styled sans-serif */ 536 | fieldset#actions > legend { 537 | font-family: "Gill Sans", "Gill Sans MT", Calibri, "KaiTi", "楷体", STKaiti, "华文楷体", sans-serif; 538 | font-size: 2.2rem; 539 | text-align: center; 540 | 541 | /* positioning is a bit weird with this thing */ 542 | position: relative; 543 | top: 2.2rem; 544 | margin: 2.2rem 0; 545 | padding: 1rem 0; 546 | width: 100%; 547 | 548 | /* displays as a kind of HR */ 549 | border-bottom: 1px solid #CC6666; 550 | 551 | /* fake top border of the fieldset itself */ 552 | border-top: none; 553 | } 554 | 555 | fieldset#actions > ol { 556 | margin: 0; 557 | padding: 0 0 0 3rem; 558 | } 559 | 560 | fieldset#actions > ol > li > input[type=radio] { 561 | /* hide the actual radio button */ 562 | -webkit-appearance: none; 563 | -moz-appearance: none; 564 | -ms-appearance: none; 565 | -o-appearance: none; 566 | appearance: none; 567 | } 568 | 569 | fieldset#actions > ol > li > label { 570 | cursor: pointer; 571 | } 572 | 573 | /* Reveal that the options are also clickable */ 574 | fieldset#actions > ol > li > label:hover { 575 | text-decoration: underline; 576 | } 577 | 578 | /* Colour the currently checked button white */ 579 | fieldset#actions > ol > li > input[type=radio]:checked + label { 580 | color: white; 581 | } 582 | 583 | 584 | 585 | 586 | /* === DICTIONARY ENTRY === */ 587 | 588 | article.entry { 589 | display: flex; 590 | } 591 | 592 | article.entry h1 { 593 | writing-mode: vertical-lr; 594 | text-orientation: upright; 595 | font-size: 8vh; /* vh better supports small screens */ 596 | margin-right: 1.5rem; 597 | 598 | /* don't overflow onto usages */ 599 | white-space: nowrap; 600 | 601 | /* remove hidden left margin */ 602 | line-height: 1; 603 | } 604 | 605 | article.entry div.content { 606 | box-sizing: border-box; /* allow 100% height + padding with not overflow */ 607 | width: 100%; 608 | height: 100%; 609 | border-left: 2px solid #EEE; 610 | padding: 0 0 2rem 1.5rem; 611 | 612 | display: flex; 613 | flex-direction: column; 614 | 615 | /* TODO: should I find a way to make entire page scrollable */ 616 | overflow: auto; 617 | } 618 | 619 | section#usages { 620 | margin-bottom: auto; 621 | } 622 | 623 | section.details table { 624 | font-size: 1.2rem; 625 | margin-bottom: 0; 626 | } 627 | 628 | section.details table tbody > tr > td:first-child { 629 | white-space: nowrap; 630 | font-weight: 400; 631 | text-align: right; 632 | text-transform: uppercase; 633 | color: #662222; 634 | background: #DD8888; 635 | } 636 | 637 | section.details table *:lang(zh) { 638 | font-size: 2rem; 639 | } 640 | 641 | section.details table td:last-child { 642 | width: 100%; 643 | } 644 | 645 | 646 | 647 | /* === DICTIONARY SEARCH RESULT === */ 648 | 649 | #entries article a { 650 | display: flex; 651 | flex-direction: row; 652 | align-items: stretch; 653 | height: 100%; 654 | } 655 | 656 | #entries article + article { 657 | margin-top: 0.5rem; 658 | } 659 | 660 | #entries h1 { 661 | writing-mode: vertical-lr; 662 | text-orientation: upright; 663 | white-space: nowrap; 664 | align-self: center; 665 | font-size: 3.2rem; 666 | margin: 0; 667 | padding-right: 1rem; 668 | } 669 | 670 | #entries article:hover { 671 | background: #EEFFFF; 672 | } 673 | 674 | #entries h1 + dl { 675 | padding-left: 1rem; 676 | border-left: 2px solid #EEE; 677 | list-style-type: none; 678 | padding: 0 0 0 1rem; 679 | margin: 0; 680 | } 681 | 682 | /* TODO: check this */ 683 | #entries span.pinyin { 684 | color: #999999; 685 | } 686 | 687 | #entries dl > dt { 688 | font-size: 1.8rem; 689 | } 690 | 691 | #entries dl > dd { 692 | font-size: 1.8rem; 693 | margin-top: 1rem; 694 | } 695 | 696 | .understated { 697 | color: #CCC; 698 | } 699 | 700 | .understated em { 701 | color: initial; 702 | font-style: normal; 703 | } 704 | -------------------------------------------------------------------------------- /resources/public/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simongray/sino.study/b1b2954011841bc96449a1aa61eb51656930aee5/resources/public/favicon-16x16.png -------------------------------------------------------------------------------- /resources/public/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simongray/sino.study/b1b2954011841bc96449a1aa61eb51656930aee5/resources/public/favicon-32x32.png -------------------------------------------------------------------------------- /resources/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simongray/sino.study/b1b2954011841bc96449a1aa61eb51656930aee5/resources/public/favicon.ico -------------------------------------------------------------------------------- /resources/public/favicon_generator.md: -------------------------------------------------------------------------------- 1 | # Your Favicon Package 2 | 3 | This package was generated with [RealFaviconGenerator](https://realfavicongenerator.net/) [v0.16](https://realfavicongenerator.net/change_log#v0.16) 4 | 5 | ## Install instructions 6 | 7 | To install this package: 8 | 9 | Extract this package in the root of your web site. If your site is http://www.example.com, you should be able to access a file named http://www.example.com/favicon.ico. 10 | 11 | Insert the following code in the `head` section of your pages: 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | *Optional* - Check your favicon with the [favicon checker](https://realfavicongenerator.net/favicon_checker) -------------------------------------------------------------------------------- /resources/public/html_code.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /resources/public/img/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simongray/sino.study/b1b2954011841bc96449a1aa61eb51656930aee5/resources/public/img/favicon.png -------------------------------------------------------------------------------- /resources/public/img/favicon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 22 | 24 | 48 | 53 | 54 | 56 | 57 | 59 | image/svg+xml 60 | 62 | 63 | 64 | 65 | 66 | 71 | 82 | · 98 | 104 | · 120 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /resources/public/img/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 19 | 21 | 45 | 50 | 51 | 53 | 54 | 56 | image/svg+xml 57 | 59 | 60 | 61 | 62 | 63 | 68 | sino·study 85 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /resources/public/img/logo_dark.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 19 | 21 | 45 | 50 | 51 | 53 | 54 | 56 | image/svg+xml 57 | 59 | 60 | 61 | 62 | 63 | 68 | sino·study 85 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /resources/public/img/logo_dark_min.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 19 | 21 | 45 | 50 | 51 | 53 | 54 | 56 | image/svg+xml 57 | 59 | 60 | 61 | 62 | 63 | 68 | 73 | 77 | 81 | 85 | 89 | 93 | 97 | 101 | 105 | 109 | 113 | 114 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /resources/public/img/logo_min.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 19 | 21 | 45 | 50 | 51 | 53 | 54 | 56 | image/svg+xml 57 | 59 | 60 | 61 | 62 | 63 | 68 | 73 | 77 | 81 | 85 | 89 | 93 | 97 | 101 | 105 | 109 | 113 | 114 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /resources/public/img/search.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 19 | 21 | 39 | 41 | 42 | 44 | image/svg+xml 45 | 47 | 48 | 49 | 50 | 51 | 56 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /resources/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | sino·study 17 | 18 | 19 | 20 |
21 | 22 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /resources/public/mstile-150x150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simongray/sino.study/b1b2954011841bc96449a1aa61eb51656930aee5/resources/public/mstile-150x150.png -------------------------------------------------------------------------------- /resources/public/safari-pinned-tab.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /resources/public/site.webmanifest: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sino\u00b7study", 3 | "short_name": "sino\u00b7study", 4 | "icons": [ 5 | { 6 | "src": "/android-chrome-192x192.png", 7 | "sizes": "192x192", 8 | "type": "image/png" 9 | }, 10 | { 11 | "src": "/android-chrome-512x512.png", 12 | "sizes": "512x512", 13 | "type": "image/png" 14 | } 15 | ], 16 | "theme_color": "#fcfcfb", 17 | "background_color": "#fcfcfb", 18 | "start_url": "http://sino.study", 19 | "display": "standalone" 20 | } 21 | -------------------------------------------------------------------------------- /src/sinostudy/cofx.cljs: -------------------------------------------------------------------------------- 1 | (ns sinostudy.cofx 2 | (:require [clojure.string :as str] 3 | [re-frame.core :as rf])) 4 | 5 | (rf/reg-cofx 6 | ::now 7 | (fn [cofx _] 8 | (assoc cofx ::now (js/Date.)))) 9 | 10 | ;; Retrieves scroll states for all tags defined by the selector string. 11 | ;; In the current design, the window/document itself is no longer scrollable, 12 | ;; so there is no need to retrieve its scroll state. 13 | (rf/reg-cofx 14 | ::scroll-state 15 | (fn [cofx _] 16 | (let [selector "*[id], main, body" 17 | elements (array-seq (js/document.querySelectorAll selector)) 18 | element->selector (fn [element] 19 | (->> [(.-tagName element) (.-id element)] 20 | (remove nil?) 21 | (str/join "#"))) 22 | scroll-state (into {} (for [element elements 23 | :let [x (.-scrollLeft element) 24 | y (.-scrollTop element)]] 25 | (when (or (> x 0) 26 | (> y 0)) 27 | [(element->selector element) [x y]])))] 28 | (assoc cofx ::scroll-state scroll-state)))) 29 | 30 | (rf/reg-cofx 31 | ::active-element 32 | (fn [cofx _] 33 | (assoc cofx ::active-element (.-activeElement js/document)))) 34 | 35 | (rf/reg-cofx 36 | ::pathname 37 | (fn [cofx _] 38 | (assoc cofx ::pathname (js/decodeURIComponent js/window.location.pathname)))) 39 | 40 | (rf/reg-cofx 41 | ::local-storage 42 | (fn [cofx key] 43 | (assoc cofx ::local-storage (js->clj (.getItem js/localStorage key))))) 44 | -------------------------------------------------------------------------------- /src/sinostudy/config.cljs: -------------------------------------------------------------------------------- 1 | (ns sinostudy.config) 2 | 3 | ;; Allows for certain constants to be defined at compile time, 4 | ;; e.g. if debug? is false the production URI should be used. 5 | ;; See: :closure-defines in project.clj 6 | (def debug? 7 | ^boolean goog.DEBUG) 8 | -------------------------------------------------------------------------------- /src/sinostudy/core.cljs: -------------------------------------------------------------------------------- 1 | (ns sinostudy.core 2 | (:require [reagent.core :as reagent] 3 | [re-frame.core :as rf] 4 | [day8.re-frame.http-fx] 5 | [secretary.core :as secretary] 6 | [sinostudy.navigation.routes :as routes] 7 | [sinostudy.events.core :as events] 8 | [sinostudy.events.actions :as actions] 9 | [sinostudy.subs :as subs] 10 | [sinostudy.views.core :as views] 11 | [sinostudy.config :as config])) 12 | 13 | (defn dev-setup [] 14 | (when config/debug? 15 | (enable-console-print!) 16 | (println "dev mode"))) 17 | 18 | (defn mount-root [] 19 | (rf/clear-subscription-cache!) 20 | (reagent/render [views/app] (.getElementById js/document "app")) 21 | 22 | ;; Start the CLJS app from current page in the address bar. 23 | ;; The routing mostly takes place on the frontend, 24 | ;; so the app needs to orient itself on hard page loads. 25 | (let [current-page (-> js/window .-location .-pathname)] 26 | (secretary/dispatch! current-page)) 27 | 28 | ;; The input bar needs to have immediate focus on page load. 29 | (.focus (.getElementById js/document "input-field")) 30 | 31 | ;; Intercepts all key presses in the document. 32 | ;; Only defers from normal operation in the action-chooser mode. 33 | ;; This is important, since calling .preventDefault on all key presses 34 | ;; is a recipe for creating many bugs -- now and down the line, too. 35 | (set! (.-onkeydown js/document) 36 | (fn [e] (when @(rf/subscribe [::subs/actions]) 37 | (.preventDefault e) 38 | (rf/dispatch [::actions/on-key-down (.-key e)]))))) 39 | 40 | (defn ^:export init [] 41 | (routes/app-routes) 42 | (rf/dispatch-sync [::events/initialize-db]) 43 | (dev-setup) 44 | (mount-root)) 45 | -------------------------------------------------------------------------------- /src/sinostudy/db.cljs: -------------------------------------------------------------------------------- 1 | (ns sinostudy.db 2 | (:require [cljs.reader :as reader] 3 | [sinostudy.config :as cf] 4 | [sinostudy.navigation.pages :as pages]) 5 | (:require-macros [sinostudy.macros.core :as macros])) 6 | 7 | (def config 8 | (reader/read-string (macros/slurp "resources/config.edn"))) 9 | 10 | ;; TODO: fix this so that running a JAR locally will still work 11 | (def query-uri 12 | (let [hostname js/window.location.hostname 13 | port (if cf/debug? 14 | (get-in config [:server :port :internal]) 15 | (get-in config [:server :port :external]))] 16 | (str "http://" hostname ":" port "/query"))) 17 | 18 | ;; TODO: these are views, move to appropriate ns 19 | (def static-pages 20 | {"/404" [:main 21 | [:article.full 22 | [:h1 "Sorry,"] 23 | [:p "that page doesn't exist."]]] 24 | "/" [:main#splash 25 | [:img {:src "/img/logo_dark_min.svg"}] 26 | [:blockquote 27 | "... a modern Chinese dictionary and grammar tool. " 28 | "Here you can look up unknown words or find out what is going on in a sentence. " 29 | [:a {:href "/about" 30 | :title "Learn more about sino.study"} 31 | "Learn More."]]] 32 | "/about" [:main 33 | [:article.full 34 | [:h1 "About"] 35 | [:p "This is the About page."]]] 36 | "/settings" [:main 37 | [:article.full 38 | [:h1 "Settings"] 39 | [:p "This is the Settings page."]]]}) 40 | 41 | ;; When used in conjunction with `sorted-set-by`, this comparator can be used to 42 | ;; get the functionality of a set, but ordered by time for occasional trimming. 43 | (defn- timestamp-comparator 44 | "Compare by timestamp as set in the metadata." 45 | [x y] 46 | (let [ts (comp :ts meta)] 47 | (if (= x y) 48 | 0 49 | (compare (ts x) (ts y))))) 50 | 51 | (def initial-db 52 | "This is the db map used as the initial state of the db." 53 | {;; The current contents of the text input field. Shown directly in the UI. 54 | ;; This usually just reflects what the user is typing in, but can also be 55 | ;; affected by conversion operations, e.g. `digits->diacritics`. 56 | :input nil 57 | 58 | ;; A page is basically a 2-tuple describing a URL in the SPA. They can 59 | ;; be directly translated into the full web browser location of a page. 60 | ;; There are two types of page: 61 | ;; 62 | ;; * Static pages that are part of the root domain, e.g. sino.study/about 63 | ;; * Dynamically generated dictionary terms that appear as a sublevel of 64 | ;; sino.study/terms. For example, sino.study/terms/你好. 65 | :pages {::pages/terms {} 66 | ::pages/static static-pages} 67 | 68 | ;; A basic history of the pages that have been navigated to. 69 | ;; Not actually used for generating content, since the in-browser navigation 70 | ;; history is sufficient to recreate pages as the page rendered is simply a 71 | ;; function of the URI. 72 | :history '() 73 | 74 | ;; A set of all unknown queries, i.e. queries that didn't resolve to anything 75 | ;; through a backend request. This is used to memoise those queries for 76 | ;; performance optimisation reasons, but also as a quick way to underline bad 77 | ;; queries in the text input, possibly highlighting spelling mistakes. 78 | :unknown-queries #{} 79 | 80 | ;; A request queue is simply a pattern for avoiding doing multiple identical 81 | ;; backend requests at the same time, e.g. maybe there's a slow connection so 82 | ;; the user manages to click the same link multiple times or spam ENTER. 83 | ;; The queue avoids this enqueuing requests and then dequeuing them when they 84 | ;; eventually return. This is another performance optimisation. 85 | :queue (sorted-set-by timestamp-comparator) 86 | 87 | ;; Result filters are a mapping from terms to user-selected result filters. 88 | ;; These filters are the ones that control whether we're searching for 89 | ;; plain Pinyin, English, the official Pinyin with diacritics, or the popular 90 | ;; online version where tone diacritics have been replaced by digits. 91 | ;; Terms where the user never deviated from the default choice of filter 92 | ;; do not appear in this map, only the ones that were actively selected. 93 | ;; These are (in a similar fashion to :scroll-states) used to recreate UI 94 | ;; state when navigating back in history during the browsing session. 95 | :result-filters {} 96 | 97 | ;; A stack of maps containing evaluations, i.e. maps of input query, output 98 | ;; actions, and timestamp. Basically used to memoise query input to its 99 | ;; results to speed up recollection. 100 | :evaluations '() 101 | 102 | ;; Holds a record of the query content. Not used for much at the moment, but 103 | ;; be used to see the history of backend requests and whether they were 104 | ;; successful or not. 105 | :queries '() 106 | 107 | ;; The preferred script is registered here and this will simply use the 108 | ;; selected script over the other whenever there is an option of both in the 109 | ;; UI. This currently doesn't change the term *itself* on term pages, as this 110 | ;; would also require mutating the URL whenever the user switches the script. 111 | :script :simplified 112 | 113 | ;; Scroll states is a in-memory collection of the scroll state of the page 114 | ;; whenever a new page is reached. Since this is an SPA, the browser doesn't 115 | ;; necessarily remember how far along the page was scrolled at a specific 116 | ;; point in the browsing history. To remedy this, the states here can be 117 | ;; recreated. This is then tied in with the page navigation mechanism. 118 | :scroll-states {} 119 | 120 | ;; The action chooser is pop-in window that can be used to select between 121 | ;; multiple different actions. The window appears spontaneously when a piece 122 | ;; user input can have multiple interpretations and the user needs to filter 123 | ;; it. In this case `actions` is a vector of possible actions options and 124 | ;; `checked-action` is the index of the currently selected option. 125 | :actions nil 126 | :checked-action 0 127 | 128 | ;; This is the content of the `config.edn` file that is read at launch. 129 | ;; While the content is mostly relevant for the backend, this can be used 130 | ;; to monitor this information in the frontend UI, e.g. for debugging 131 | ;; purposes. The `query-uri` is simply the bit of the config that defines 132 | ;; which URI to send backend queries to. 133 | :config config 134 | :query-uri query-uri}) 135 | -------------------------------------------------------------------------------- /src/sinostudy/dictionary/core.cljc: -------------------------------------------------------------------------------- 1 | (ns sinostudy.dictionary.core 2 | (:require [clojure.set :as set] 3 | [clojure.string :as str] 4 | [sinostudy.pinyin.core :as p] 5 | [sinostudy.dictionary.data :as data] 6 | [sinostudy.dictionary.embed :as embed])) 7 | 8 | ;;;; GENERAL STUFF 9 | 10 | (defn pinyin-key 11 | "Convert a CC-CEDICT Pinyin string into a form for use as a map key." 12 | [s] 13 | (-> s 14 | (str/replace "'" "") 15 | (str/replace " " "") 16 | (str/replace "·" "") ; middle dot 17 | (str/replace "," "") 18 | str/lower-case)) 19 | 20 | 21 | ;;;; EMBEDDING MANIPULATION 22 | 23 | (defn refr->m 24 | "Transform the embedded reference string into a Clojure map." 25 | [refr] 26 | (let [[hanzi-str pinyin-str] (str/split refr #"\[|\]") 27 | hanzi (str/split hanzi-str #"\|") 28 | pinyin (str/split pinyin-str #" ") 29 | traditional (first hanzi) 30 | simplified (if (second hanzi) (second hanzi) traditional)] 31 | {:traditional traditional 32 | :simplified simplified 33 | :pinyin pinyin})) 34 | 35 | 36 | ;;;; DEALING WITH CLASSIFIERS 37 | 38 | (defn cl-def? 39 | "Determine if a dictionary definition is actually a list of classifiers." 40 | [definition] 41 | (str/starts-with? definition "CL:")) 42 | 43 | (defn has-cls? 44 | "Determine if the listing's :definitions contain classifiers." 45 | [listing] 46 | (some cl-def? (:definitions listing))) 47 | 48 | (defn detach-cls 49 | "Move the classifiers of a listing from :definitions to :classifiers." 50 | [listing] 51 | (if (has-cls? listing) 52 | (let [defs (:definitions listing) 53 | cl-defs (filter cl-def? defs) 54 | get-cls (comp (partial map refr->m) (partial re-seq embed/refr)) 55 | cls (set (flatten (map get-cls cl-defs)))] 56 | (if cls 57 | (-> listing 58 | (assoc :definitions (set/difference defs cl-defs)) 59 | (assoc :classifiers cls)) 60 | listing)) 61 | listing)) 62 | 63 | 64 | ;;;; UNIFIED HANZI DICT (TRADITIONAL + SIMPLIFIED) 65 | 66 | (defn hanzi-entry 67 | "Make a hanzi dictionary entry based on a script and a CC-CEDICT listing." 68 | [script listing] 69 | (let [script-diff? (not= (:traditional listing) (:simplified listing)) 70 | make-vars (fn [script] 71 | (let [other (case script 72 | :traditional :simplified 73 | :simplified :traditional)] 74 | {other #{(get listing other)}})) 75 | classifiers (:classifiers listing) 76 | frequency (:frequency listing) 77 | decomposition (get-in listing [:info script :decomposition]) 78 | etymology (get-in listing [:info script :etymology]) 79 | radical (get-in listing [:info script :radical]) 80 | base-entry {:term (get listing script) 81 | :scripts #{script} 82 | :uses {(:pinyin listing) (:definitions listing)}}] 83 | (cond-> base-entry 84 | script-diff? (assoc :variations (make-vars script)) 85 | classifiers (assoc :classifiers classifiers) 86 | frequency (assoc :frequency frequency) 87 | decomposition (assoc :decomposition decomposition) 88 | etymology (assoc :etymology etymology) 89 | radical (assoc :radical radical)))) 90 | 91 | (defn add-hanzi* 92 | "Update the hanzi dict at the specified key k with the entry v. 93 | The entry is either inserted as is or merged with the old entry." 94 | [dict k v] 95 | (if-let [old (get dict k)] 96 | (let [scripts (set/union (:scripts old) (:scripts v)) 97 | cls (set/union (:classifiers old) (:classifiers v)) 98 | uses (merge-with set/union (:uses old) (:uses v)) 99 | vars (merge-with set/union (:variations old) (:variations v)) 100 | freq (:frequency v) 101 | decomp (:decomposition v) 102 | etym (:etymology v) 103 | radical (:radical v)] 104 | (assoc dict k (cond-> old 105 | scripts (assoc :scripts scripts) 106 | cls (assoc :classifiers cls) 107 | uses (assoc :uses uses) 108 | vars (assoc :variations vars) 109 | freq (assoc :frequency freq) 110 | decomp (assoc :decomposition decomp) 111 | etym (assoc :etymology etym) 112 | radical (assoc :radical radical)))) 113 | (assoc dict k v))) 114 | 115 | (defn add-hanzi 116 | "Add 1 to 2 entries in the hanzi dictionary from a CC-CEDICT listing." 117 | [dict listing] 118 | (-> dict 119 | (add-hanzi* (:traditional listing) (hanzi-entry :traditional listing)) 120 | (add-hanzi* (:simplified listing) (hanzi-entry :simplified listing)))) 121 | 122 | 123 | ;;;; PINYIN DICT 124 | 125 | ;; used by both pinyin-add and english-add 126 | (defn add 127 | "Add an entry to a dictionary; clashes are merged into a set." 128 | [dict k v] 129 | (if-let [old (get dict k)] 130 | (assoc dict k (set/union old v)) 131 | (assoc dict k v))) 132 | 133 | (defn pinyin-entry 134 | "Make a pinyin dictionary entry based on a CC-CEDICT listing." 135 | [listing] 136 | (hash-set (:traditional listing) (:simplified listing))) 137 | 138 | (defn add-pinyin 139 | "Add an entry to a pinyin dictionary from a CC-CEDICT listing." 140 | [key-type dict listing] 141 | (let [k (get listing key-type) 142 | v (pinyin-entry listing)] 143 | (add dict k v))) 144 | 145 | 146 | ;;;; ENGLISH DICT 147 | 148 | (defn remove-embedded 149 | "Removes embedded CC-CEDICT information from string s." 150 | [s] 151 | (-> s 152 | (str/replace embed/refr "") 153 | (str/replace embed/hanzi "") 154 | (str/replace embed/pinyin ""))) 155 | 156 | ;; Explanatory parentheses, i.e. description preceding/following a definition. 157 | (def expl 158 | #"^\([^)]+\)|\([^)]+\)$") 159 | 160 | (defn english-keys 161 | "Find English dictionary keys based on a CC-CEDICT listing. 162 | Words inside explanatory parentheses are not considered. 163 | Numbers (unless they make up part of a word) are not considered. 164 | Stop-words are removed entirely, unless they make up a full definition 165 | or if they are part of a verblike, e.g. 'to have' or 'to laugh'." 166 | [definitions] 167 | (let [definitions* (->> definitions 168 | (map #(str/replace %1 expl "")) 169 | (map str/trim) 170 | (map ^String str/lower-case) 171 | (set)) 172 | single-words (->> definitions* 173 | (map remove-embedded) 174 | (map #(str/split % #"[^a-z0-9-']+")) 175 | (flatten) 176 | (filter (comp not str/blank?)) 177 | (filter (comp not (partial re-find #"^[0-9]+$"))) 178 | (set)) 179 | verblikes (->> definitions* 180 | (filter #(str/starts-with? % "to ")) 181 | (map #(subs % 3)) 182 | (set)) 183 | stopwords* (-> data/stopwords 184 | (set/difference definitions*) 185 | (set/difference verblikes)) 186 | keys (set/union definitions* 187 | single-words 188 | verblikes)] 189 | (set/difference keys stopwords*))) 190 | 191 | ;; Used on the backend for limiting results. 192 | ;; (Indirectly) used on the frontend when sorting results. 193 | (defn- english-relevance-score 194 | "Calculates a basic relevance score based on the basic rule of term:use ratio 195 | as well as a few heuristics. All comparisons are done in lower case. 196 | 197 | Current heuristics: 198 | * ratio where explanatory parentheses are normalised to the same length: _ 199 | * ratio with prefixed 'to ' removed (common marker of verblikes)" 200 | [term use] 201 | (let [to #"^to " 202 | term* (str/lower-case term) 203 | use* (str/lower-case use)] 204 | (if (str/includes? use* term*) 205 | (let [normalised-expl "_" 206 | use-without-expl (str/replace use* expl normalised-expl) 207 | use-without-to (str/replace use* to "")] 208 | (max 209 | ;; Basic ratio comparison 210 | (/ (count term*) (count use*)) 211 | 212 | ;; Ratio comparison with explanatory parentheses normalised 213 | (if (and (str/includes? use-without-expl term*) 214 | (not= use-without-expl normalised-expl)) 215 | (/ (count term*) (count use-without-expl)) 216 | 0) 217 | 218 | ;; Ratio comparison with prefixed "to " removed 219 | (if (and (str/includes? use-without-to term*) 220 | (not= use-without-to "")) 221 | (/ (count term*) (count use-without-to)) 222 | 0))) 223 | 0))) 224 | 225 | ;; Decides which entries to include for English search results. 226 | ;; Really just an arbitrary value, but 0.33 seems to be an fair cutoff! 227 | (def relevance-cutoff 228 | 0.33) 229 | 230 | (defn- above-cutoff? 231 | "Are any of the definitions above a the relevance cutoff for english-key?" 232 | [definitions english-key] 233 | (let [english-relevance-score* (partial english-relevance-score english-key) 234 | scores (map english-relevance-score* definitions)] 235 | (if (not (empty? scores)) 236 | (> (apply max scores) 237 | relevance-cutoff)))) 238 | 239 | (defn add-english 240 | "Add an entry to the English dictionary from a CC-CEDICT listing. 241 | Keys (= single English words) are only added if they're above a certain 242 | relevance cutoff in order to limit the results list." 243 | [dict listing] 244 | (let [definitions (:definitions listing) 245 | ks (->> (english-keys definitions) 246 | (filter (partial above-cutoff? definitions))) 247 | v (hash-set (:traditional listing) (:simplified listing))] 248 | (loop [dict* dict 249 | ks* ks] 250 | (if (seq ks*) 251 | (recur (add dict* (first ks*) v) (rest ks*)) 252 | dict*)))) 253 | 254 | ;; Used on the frontend for sorting results. 255 | ;; Note that this - in addition to basic relevance - also considers frequency. 256 | (defn english-relevance 257 | "Calculate the relevance of entry based on an English word as the search term. 258 | The relevance is a score from 0 to ~1, higher being more relevant. 259 | Relevance is able to exceed 1 slightly, as word frequency is also added to the 260 | score, allowing for more accurate sorting (it is a number from 0 to 1 that 261 | tends towards 0). This is what puts e.g. 句子 ahead of 语句 for 'sentence'." 262 | [term entry] 263 | (let [uses (->> (vals (:uses entry)) 264 | (apply set/union)) 265 | score (partial english-relevance-score term) 266 | scores (map score uses) 267 | max-score (apply max scores) 268 | freq (get entry :frequency 0)] 269 | ;; Note: multiple 0.0 scores only count as a single zero! 270 | ;; This is done to not unfairly weigh down words with many meanings. 271 | (+ max-score freq))) 272 | 273 | ;;;; FREQUENCY DICTIONARY 274 | 275 | (defn add-freq 276 | "Add word frequency (not char frequency) to a listing." 277 | [freq-dict listing] 278 | (let [trad-freq (get freq-dict (:traditional listing) 0) 279 | simp-freq (get freq-dict (:simplified listing) 0) 280 | frequency (max trad-freq simp-freq)] 281 | (if (> frequency 0) 282 | (assoc listing :frequency frequency) 283 | listing))) 284 | 285 | ;;; TODO: find proper thresholds for labels 286 | (defn frequency-label 287 | "Get human-readable label for a given word frequency." 288 | [frequency] 289 | (cond 290 | (> frequency 0.01) :high 291 | (> 0.01 frequency 0.001) :medium 292 | :else :low)) 293 | 294 | ;;;; CHARACTER ETYMOLOGY, DECOMPOSITION, ETC. 295 | 296 | (defn add-info* 297 | "Helper function for add-info." 298 | [script makemeahanzi listing] 299 | (if-let [info (get makemeahanzi (get listing script))] 300 | (let [decomposition (get info "decomposition") 301 | etymology (when-let [raw (get info "etymology")] 302 | (into {} (for [[k v] raw] 303 | [(keyword k) v]))) 304 | radical (get info "radical") 305 | assoc* (fn [coll k v] 306 | (assoc-in coll [:info script k] v))] 307 | (cond-> listing 308 | decomposition (assoc* :decomposition decomposition) 309 | etymology (assoc* :etymology etymology) 310 | radical (assoc* :radical radical))) 311 | listing)) 312 | 313 | (defn add-info 314 | "Add info from makemeahanzi to a CC-CEDICT listing." 315 | [makemeahanzi listing] 316 | (->> listing 317 | (add-info* :traditional makemeahanzi) 318 | (add-info* :simplified makemeahanzi))) 319 | 320 | 321 | ;;;; CREATING DICTS AND LOOKING UP WORDS 322 | 323 | (defn make-report 324 | "Create some some rudimentary statistics about the given dict." 325 | [dict] 326 | {:entry-count (count (keys (:hanzi dict))) 327 | :english-count (count (keys (:english dict))) 328 | :pinyin-count (count (keys (:pinyin dict))) 329 | :pinyin+digits-count (count (keys (:pinyin+digits dict))) 330 | :pinyin+diacritics-count (count (keys (:pinyin+diacritics dict)))}) 331 | 332 | ;; TODO: also add listings only found in makemeahanzi (e.g. 忄) 333 | (defn create-dict 334 | "Load the contents of a CC-CEDICT dictionary file into Clojure maps. 335 | The listings convert into multiple dictionary entries based on look-up type. 336 | A freq-dict is used to add the word frequency to each entry if available." 337 | [listings freq-dict makemeahanzi] 338 | (let [listings* (->> listings 339 | (map detach-cls) 340 | (map (partial add-freq freq-dict)) 341 | (map (partial add-info makemeahanzi))) 342 | add-pinyin-key (partial add-pinyin :pinyin-key) 343 | add-digits-key (partial add-pinyin :pinyin+digits-key) 344 | add-diacritics-key (partial add-pinyin :pinyin+diacritics-key)] 345 | (->> {:hanzi (reduce add-hanzi {} listings*) 346 | :english (reduce add-english {} listings*) 347 | :pinyin (reduce add-pinyin-key {} listings*) 348 | :pinyin+digits (reduce add-digits-key {} listings*) 349 | :pinyin+diacritics (reduce add-diacritics-key {} listings*)} 350 | (#(assoc %1 :report (make-report %1)))))) 351 | 352 | (defn look-up 353 | "Look up the specified term in each dictionary type. 354 | For Pinyin search results, both the raw search term and the pinyin-key version 355 | are looked up (results merged), e.g. 'ding zuo' also gets 'dingzuo'. 356 | Limit (optional) is a set of accepted result types." 357 | ([dict term limit] 358 | (let [term* (pinyin-key term) ; unspaced 359 | look-up* (fn [dict-type word] (-> dict (get dict-type) (get word))) 360 | limited (fn [dict-type] (if limit (get limit dict-type) dict-type)) 361 | get-entries (fn [words] (set (map #(look-up* :hanzi %) words))) 362 | hanzi (look-up* (limited :hanzi) term) 363 | pinyin (set/union (look-up* (limited :pinyin) term) 364 | (look-up* (limited :pinyin) term*)) 365 | digits (set/union (look-up* (limited :pinyin+digits) term) 366 | (look-up* (limited :pinyin+digits) term*)) 367 | diacritics (set/union (look-up* (limited :pinyin+diacritics) term) 368 | (look-up* (limited :pinyin+diacritics) term*)) 369 | english (look-up* (limited :english) (str/lower-case term)) 370 | result (cond-> {:term term} 371 | hanzi (assoc :hanzi hanzi) 372 | pinyin (assoc :pinyin (get-entries pinyin)) 373 | digits (assoc :pinyin+digits (get-entries digits)) 374 | diacritics (assoc :pinyin+diacritics (get-entries diacritics)) 375 | english (assoc :english (get-entries english)))] 376 | (if (= result {:term term}) 377 | nil 378 | result))) 379 | ([dict word] 380 | (look-up dict word nil))) 381 | 382 | 383 | ;;;; POST-PROCESSING DICTIONARY LOOK-UP RESULTS 384 | 385 | (defn- safe-comparator 386 | "Create a comparator for sorting that will not lose items by accident. 387 | When fn1 cannot establish an ordering between two elements, fn2 steps in. 388 | Based on example at: https://clojuredocs.org/clojure.core/sorted-set-by" 389 | [fn1 fn2] 390 | (fn [x y] 391 | (let [comparison (compare (fn1 x) (fn1 y))] 392 | (if (not= comparison 0) 393 | comparison 394 | (compare (fn2 x) (fn2 y)))))) 395 | 396 | (defn defs-containing-term 397 | "Only keep definitions that contain the given term." 398 | [term definitions] 399 | (let [term-re (re-pattern (str "(?i)(^|[ (\"])" term "($|[ ,;.'!?)\"])")) 400 | with-term? (fn [definition] 401 | (re-find term-re (remove-embedded definition)))] 402 | (filter with-term? definitions))) 403 | 404 | (defn filter-defs 405 | "Remove definitions from entries if they do not contain the given term. 406 | Used to filter results by an English search term." 407 | [term entries] 408 | (let [relevant-defs (fn [[pinyin definitions]] 409 | [pinyin (defs-containing-term term definitions)]) 410 | non-empty (comp seq second)] 411 | (for [entry entries] 412 | (assoc entry :uses (->> (:uses entry) 413 | (map relevant-defs) 414 | (filter non-empty) 415 | (into {})))))) 416 | 417 | (defn filter-uses 418 | "Remove uses from entries if the Pinyin does not match the given term. 419 | Used to filter results by a Pinyin search term. 420 | An optional normalisation function f can be supplied to convert the uses 421 | (normally in pinyin+digits format) to a Pinyin format matching the term." 422 | ([term entries f] 423 | (let [use-matches-term? (comp (fn [s] (= s (pinyin-key term))) 424 | pinyin-key 425 | (if f f identity) 426 | first)] 427 | (for [entry entries 428 | :let [uses (:uses entry)]] 429 | (assoc entry :uses (into {} (filter use-matches-term? uses)))))) 430 | ([term entries] 431 | (filter-uses term entries nil))) 432 | 433 | (defn reduce-result 434 | "Reduce the content of a dictionary look-up result. 435 | This removes irrelevant data from the result relative to the search term, 436 | e.g. removes definitions that do not match the search term." 437 | [result] 438 | (let [term (:term result) 439 | entry (:hanzi result) ; dictionary entry, not a sequence! 440 | pinyin (:pinyin result) 441 | digits (:pinyin+digits result) 442 | diacritics (:pinyin+diacritics result) 443 | english (:english result)] 444 | ;; Reduces to a single dictionary entry when applicable, i.e. when the 445 | ;; search term consists of hanzi and happened to match an entry directly. 446 | ;; Otherwise, returns the search results for the given search term. 447 | ;; Note: `hanzi` can only be a set of length 1 or nil! 448 | (or entry 449 | (cond-> result 450 | 451 | pinyin 452 | (assoc :pinyin 453 | (filter-uses term pinyin p/no-digits)) 454 | 455 | digits 456 | (assoc :pinyin+digits 457 | (filter-uses term digits)) 458 | 459 | diacritics 460 | (assoc :pinyin+diacritics 461 | (filter-uses term diacritics p/digits->diacritics)))))) 462 | 463 | ;; TODO: disabled for now, re-enable when more intelligent (issue #37) 464 | ;english 465 | ;(assoc :english 466 | ; (filter-defs term english))))) 467 | 468 | (defn sort-result 469 | "Sort the content of a dictionary look-up result. 470 | This sorts the result relative to the search term, 471 | e.g English word results are sorted according to relevance." 472 | [result] 473 | (let [relevance (memoize (partial english-relevance (:term result))) 474 | relevance* (comp - (safe-comparator relevance :term)) 475 | sorted (fn [f coll] (apply sorted-set-by f coll)) 476 | pinyin (:pinyin result) 477 | digits (:pinyin+digits result) 478 | diacritics (:pinyin+diacritics result) 479 | english (:english result)] 480 | (cond-> result 481 | ;pinyin (assoc :pinyin (sorted > pinyin)) 482 | ;digits (assoc :pinyin+digits (sorted > digits)) 483 | ;diacritics (assoc :pinyin+diacritics (sorted > diacritics)) 484 | ;; TODO: sort Pinyin properly too 485 | pinyin (assoc :pinyin pinyin) 486 | digits (assoc :pinyin+digits digits) 487 | diacritics (assoc :pinyin+diacritics diacritics) 488 | english (assoc :english (sorted relevance* english))))) 489 | -------------------------------------------------------------------------------- /src/sinostudy/dictionary/data.cljc: -------------------------------------------------------------------------------- 1 | (ns sinostudy.dictionary.data) 2 | 3 | (def stopwords 4 | #{"a" "about" "above" "across" "after" "afterwards" "again" "against" 5 | "all" "almost" "alone" "along" "already" "also" "although" "always" "am" 6 | "among" "amongst" "amount" "an" "and" "another" "any" "anyhow" 7 | "anyone" "anything" "anyway" "anywhere" "are" "around" "as" "at" "back" 8 | "be" "became" "because" "become" "becomes" "becoming" "been" "before" 9 | "beforehand" "behind" "being" "below" "beside" "besides" "between" 10 | "beyond" "bill" "both" "bottom" "but" "by" "call" "can" "cannot" "cant" "co" 11 | "con" "could" "couldnt" "cry" "de" "describe" "detail" "do" "does" "done" 12 | "down" "due" "during" "each" "eg" "eight" "either" "eleven" "else" 13 | "elsewhere" "empty" "enough" "etc" "even" "ever" "every" "everyone" 14 | "everything" "everywhere" "except" "few" "fifteen" "fify" "fill" "find" 15 | "fire" "first" "five" "for" "former" "formerly" "forty" "found" "four" 16 | "from" "front" "full" "further" "get" "give" "go" "had" "has" "hasnt" "have" 17 | "he" "hence" "her" "here" "hereafter" "hereby" "herein" "hereupon" "hers" 18 | "herself" "him" "himself" "his" "how" "however" "hundred" "i" "ie" "if" "in" 19 | "inc" "indeed" "interest" "into" "is" "it" "its" "itself" "keep" "last" 20 | "latter" "latterly" "least" "less" "ltd" "made" "many" "may" "me" 21 | "meanwhile" "might" "mill" "mine" "more" "moreover" "most" "mostly" "move" 22 | "much" "must" "my" "myself" "name" "namely" "neither" "never" "nevertheless" 23 | "next" "nine" "no" "nobody" "none" "noone" "nor" "not" "nothing" "now" 24 | "nowhere" "of" "off" "often" "on" "once" "one" "only" "onto" "or" "other" 25 | "others" "otherwise" "our" "ours" "ourselves" "out" "over" "own" "part" 26 | "per" "perhaps" "please" "put" "rather" "re" "same" "see" "seem" "seemed" 27 | "seeming" "seems" "serious" "several" "she" "should" "show" "side" "since" 28 | "sincere" "six" "sixty" "so" "some" "somehow" "someone" "something" 29 | "sometime" "sometimes" "somewhere" "still" "such" "system" "take" "ten" 30 | "than" "that" "the" "their" "them" "themselves" "then" "thence" "there" 31 | "thereafter" "thereby" "therefore" "therein" "thereupon" "these" "they" 32 | "thick" "thin" "third" "this" "those" "though" "three" "through" 33 | "throughout" "thru" "thus" "to" "together" "too" "top" "toward" "towards" 34 | "twelve" "twenty" "two" "un" "under" "until" "up" "upon" "us" "very" "via" 35 | "was" "we" "well" "were" "what" "whatever" "when" "whence" "whenever" 36 | "where" "whereafter" "whereas" "whereby" "wherein" "whereupon" "wherever" 37 | "whether" "which" "while" "whither" "who" "whoever" "whole" "whom" "whose" 38 | "why" "will" "with" "within" "without" "would" "yet" "you" "your" "yours" 39 | "yourself" "yourselves" 40 | 41 | ;; Common contractions with apostrophe (and a few without) 42 | ;; https://en.wikipedia.org/wiki/Wikipedia:List_of_English_contractions 43 | "ain't" 44 | "aren't" 45 | "can't" 46 | "could've" 47 | "couldn't" 48 | "daren't" 49 | "daresn't" 50 | "dasn't" 51 | "didn't" 52 | "doesn't" 53 | "don't" 54 | "e'er" 55 | "everyone's" 56 | "finna" 57 | "gimme" 58 | "gonna" 59 | "gotta" 60 | "hadn't" 61 | "hasn't" 62 | "haven't" 63 | "he'd" 64 | "he'll" 65 | "he's" 66 | "he've" 67 | "how'd" 68 | "how'll" 69 | "how're" 70 | "how's" 71 | "I'd" 72 | "I'll" 73 | "I'm" 74 | "I'm'a" 75 | "I'm'o" 76 | "I've" 77 | "isn't" 78 | "it'd" 79 | "it'll" 80 | "it's" 81 | "let's" 82 | "ma'am" 83 | "mayn't" 84 | "may've" 85 | "mightn't" 86 | "might've" 87 | "mustn't" 88 | "mustn't've" 89 | "must've" 90 | "needn't" 91 | "ne'er" 92 | "o'clock" 93 | "o'er" 94 | "ol'" 95 | "oughtn't" 96 | "'s" 97 | "shan't" 98 | "she'd" 99 | "she'll" 100 | "she's" 101 | "should've" 102 | "shouldn't" 103 | "somebody's" 104 | "someone's" 105 | "something's" 106 | "that'll" 107 | "that're" 108 | "that's" 109 | "that'd" 110 | "there'd" 111 | "there'll" 112 | "there're" 113 | "there's" 114 | "these're" 115 | "they'd" 116 | "they'll" 117 | "they're" 118 | "they've" 119 | "this's" 120 | "those're" 121 | "'tis" 122 | "'twas" 123 | "wasn't" 124 | "we'd" 125 | "we'd've" 126 | "we'll" 127 | "we're" 128 | "we've" 129 | "weren't" 130 | "what'd" 131 | "what'll" 132 | "what're" 133 | "what's" 134 | "what've" 135 | "when's" 136 | "where'd" 137 | "where're" 138 | "where's" 139 | "where've" 140 | "which's" 141 | "who'd" 142 | "who'd've" 143 | "who'll" 144 | "who're" 145 | "who's" 146 | "who've" 147 | "why'd" 148 | "why're" 149 | "why's" 150 | "won't" 151 | "would've" 152 | "wouldn't" 153 | "y'all" 154 | "y'all'd've" 155 | "yesn't" 156 | "you'd" 157 | "you'll" 158 | "you're" 159 | "you've" 160 | "noun's" 161 | "noun(s)'re" 162 | 163 | ;; Special cases (common throughout CC-CEDICT definitions) 164 | "variant" "loanword" "cf" "lit" "tw" "pr" "abbr" "taiwan" "radical" "kangxi" 165 | "arch" "archaic" "...er" "written" 166 | 167 | ;; Numbers are excluded 168 | "1" 169 | "2" 170 | "3" 171 | "4" 172 | "5" 173 | "6" 174 | "7" 175 | "8" 176 | "9" 177 | "0" 178 | 179 | ;; The entire English alphabet is excluded ("a" and "i" found above) 180 | ;"a" 181 | "b" 182 | "c" 183 | "d" 184 | "e" 185 | "f" 186 | "g" 187 | "h" 188 | ;"i" 189 | "j" 190 | "k" 191 | "l" 192 | "m" 193 | "n" 194 | "o" 195 | "p" 196 | "q" 197 | "r" 198 | "s" 199 | "t" 200 | "u" 201 | "v" 202 | "w" 203 | "x" 204 | "y" 205 | "z"}) 206 | -------------------------------------------------------------------------------- /src/sinostudy/dictionary/embed.cljc: -------------------------------------------------------------------------------- 1 | (ns sinostudy.dictionary.embed 2 | (:require [clojure.string :as str] 3 | [sinostudy.pinyin.data :as pd])) 4 | 5 | ;;;; CC-CEDICT EMBEDDINGS 6 | 7 | (def refr 8 | "A pattern used in CC-CEDICT to embed a hanzi reference with Pinyin." 9 | #"[^ ,:\[a-zA-Z0-9]+\[[^\]]+\]+") 10 | 11 | ;; CLJS regex seems to have some issues with doing (str pp/hanzi-pattern), 12 | ;; so I've copied over whole implementation. 13 | (def hanzi 14 | "A pattern used in CC-CEDICT to embed a hanzi reference (no Pinyin)." 15 | (let [hanzi+ (str "[" (str/join (map str (vals pd/hanzi-unicode))) "]+")] 16 | (re-pattern (str hanzi+ "\\|?" hanzi+)))) 17 | 18 | (def pinyin 19 | "A pattern used in CC-CEDICT to embed a Pinyin pronunciation." 20 | #"\[[a-zA-Z0-9 ]+\]+") 21 | -------------------------------------------------------------------------------- /src/sinostudy/dictionary/load.clj: -------------------------------------------------------------------------------- 1 | (ns sinostudy.dictionary.load 2 | (:require [clojure.java.io :as io] 3 | [clojure.data.csv :as csv] 4 | [clojure.string :as str] 5 | [clj-json.core :as json] 6 | [sinostudy.dictionary.core :as d] 7 | [sinostudy.pinyin.core :as p] 8 | [sinostudy.pinyin.eval :as pe])) 9 | 10 | ;;;; CC-CEDICT 11 | 12 | (defn- u:->umlaut 13 | "Replace the CC-CEDICT substitute u: with the proper Pinyin ü." 14 | [pinyin] 15 | (str/replace pinyin "u:" "ü")) 16 | 17 | (defn- join-abbr 18 | "Join the uppercase letters in a CC-CEDICT Pinyin string into blocks." 19 | [pinyin] 20 | (let [abbr-letters #"([A-Z]( [A-Z])+)( |$)" 21 | remove-spaces #(str (str/replace (% 1) " " "") (% 3))] 22 | (str/replace pinyin abbr-letters remove-spaces))) 23 | 24 | (defn neutral-as-0 25 | "Convert the neutral tone digits (represented as 5 in CC-CEDICT) to 0. 26 | This ensures that the Pinyin strings are alphabetically sortable." 27 | [s] 28 | (if (pe/pinyin-block+digits? s) 29 | (str/replace s "5" "0") 30 | s)) 31 | 32 | (defn split-defs 33 | "Split the CC-CEDICT definition string into separate, unique parts." 34 | [definition] 35 | (set (str/split definition #"/"))) 36 | 37 | (defn line->cedict-listing 38 | "Extract the constituents of a line in a CC-CEDICT dictionary file. 39 | Returns a map representation suitable for use as a dictionary entry." 40 | [line] 41 | (let [pattern #"^([^ ]+) ([^ ]+) \[([^]]+)\] /(.+)/" 42 | [_ trad simp pinyin defs :as entry] (re-matches pattern line)] 43 | (when entry 44 | (let [pinyin* (u:->umlaut (neutral-as-0 pinyin))] 45 | {:traditional trad 46 | :simplified simp 47 | :pinyin (join-abbr pinyin*) 48 | :pinyin-key (d/pinyin-key (str/replace pinyin* #"\d" "")) 49 | :pinyin+digits-key (d/pinyin-key pinyin*) 50 | :pinyin+diacritics-key (d/pinyin-key (p/digits->diacritics pinyin*)) 51 | :definitions (split-defs defs)})))) 52 | 53 | (defn load-cedict 54 | "Load the listings of a CC-CEDICT dictionary file into Clojure maps." 55 | [file] 56 | (with-open [reader (io/reader file)] 57 | (->> (doall (line-seq reader)) 58 | (remove #(str/starts-with? % "#")) 59 | (map line->cedict-listing)))) 60 | 61 | 62 | ;;;; WORD FREQUENCY 63 | 64 | (defn line->freq-listing 65 | "Extract the constituents of a line in a CC-CEDICT dictionary file. 66 | Returns a map representation suitable for use as a dictionary entry." 67 | [line] 68 | (let [re #"^([^ ]+) ([^ ]+) ([^ ]+)" 69 | [_ _ freq word :as entry] (re-matches re line)] 70 | (when entry 71 | {:frequency (Double/parseDouble freq) 72 | :word word}))) 73 | 74 | (defn normalise 75 | "Normalise the frequency of a freq-listing." 76 | [max-freq freq-listing] 77 | (assoc freq-listing :frequency (/ (:frequency freq-listing) 78 | max-freq))) 79 | 80 | (defn load-freq-dict 81 | "Load the listings of 1 or more frequency files into a Clojure map." 82 | ([file] 83 | (with-open [reader (io/reader file)] 84 | (let [raw-listings (->> (doall (line-seq reader)) 85 | (filter #(re-find #"^\d+ " %)) 86 | (map line->freq-listing) 87 | (filter (comp not nil?))) 88 | max-freq (:frequency (first raw-listings))] 89 | (->> raw-listings 90 | (map (partial normalise max-freq)) 91 | (reduce #(assoc %1 (:word %2) (:frequency %2)) {}))))) 92 | ([file & files] 93 | (let [m (load-freq-dict file) 94 | ms (map load-freq-dict files)] 95 | (reduce (partial merge-with #(/ (+ %1 %2) 2)) m ms)))) 96 | 97 | 98 | ;;;; CHARACTER COMPOSITION, ETYMOLOGY, ETC. 99 | 100 | (defn load-makemeahanzi 101 | "Load the listings of a makemeahanzi file into a Clojure map." 102 | [file] 103 | (with-open [reader (io/reader file)] 104 | (let [raw-listings (->> (doall (line-seq reader)) 105 | (map json/parse-string))] 106 | (reduce #(assoc %1 (get %2 "character") %2) {} raw-listings)))) 107 | 108 | 109 | ;;;; EXAMPLE SENTENCES + THEIR RELATIONS AND METADATA 110 | (defn load-sentences 111 | [sentences-file links-file] 112 | (with-open [sentences-reader (io/reader sentences-file) 113 | links-reader (io/reader links-file)] 114 | (let [entries (->> (csv/read-csv sentences-reader :separator \tab :quote \^) 115 | ;(take-nth 1000) ; TODO: remove 116 | (map (partial take 3)) 117 | (filter (comp #{"eng" "cmn"} second)) 118 | (doall)) 119 | 120 | ;; We only want to keep stuff around that is present in both eng/cmn. 121 | cmn-ids (->> entries 122 | (filter (comp #{"cmn"} second)) 123 | (set)) 124 | 125 | ;; It seems like the links in this dataset include both directions. 126 | links (->> (csv/read-csv links-reader :separator \tab) 127 | (filter (comp cmn-ids second)) 128 | (doall))] ;TODO: first, second? 129 | {:entries (count entries) 130 | :cmn-ids (count cmn-ids) 131 | :links (count links)}))) 132 | 133 | 134 | (defn load-test 135 | [] 136 | (load-sentences 137 | (str (System/getProperty "user.home") "/" "Code/sinostudy-data/" 138 | "tatoeba/sentences_detailed.csv") 139 | (str (System/getProperty "user.home") "/" "Code/sinostudy-data/" 140 | "tatoeba/links.csv"))) 141 | 142 | ;;;; FULL DICTIONARY 143 | 144 | (defn- in-home 145 | "Expands to the current user's home directory + s." 146 | [s] 147 | (str (System/getProperty "user.home") "/" s)) 148 | 149 | ;; Note: dict compilation requires the sinostudy-data git repo to be located in: 150 | ;; ~/Code/sinostudy-data 151 | (defn load-dict 152 | [] 153 | (let [data #(in-home (str "Code/sinostudy-data/" %)) 154 | listings (load-cedict 155 | (data "cedict_ts.u8")) 156 | freq-dict (load-freq-dict 157 | (data "frequency/internet-zh.num.txt") 158 | (data "frequency/giga-zh.num.txt")) 159 | makemeahanzi (load-makemeahanzi 160 | (data "makemeahanzi/dictionary.txt"))] 161 | (d/create-dict listings freq-dict makemeahanzi))) 162 | -------------------------------------------------------------------------------- /src/sinostudy/events/actions.cljs: -------------------------------------------------------------------------------- 1 | (ns sinostudy.events.actions 2 | "For all events relating to actions triggered through the text input field, 3 | including displaying and navigating the `action-chooser`." 4 | (:require [re-frame.core :as rf] 5 | [sinostudy.pinyin.core :as p] 6 | [sinostudy.cofx :as cofx] 7 | [sinostudy.fx :as fx])) 8 | 9 | ;; Only dispatched when the action-chooser is open. 10 | (rf/reg-event-fx 11 | ::on-key-down 12 | (fn [{:keys [db] :as cofx} [_ key]] 13 | (let [{:keys [actions checked-action]} db 14 | next? (fn [k] (contains? #{"ArrowRight" "ArrowDown"} k)) 15 | prev? (fn [k] (contains? #{"ArrowLeft" "ArrowUp"} k)) 16 | valid-num? (fn [k] (let [num (js/parseInt k)] 17 | (and (int? num) 18 | (< 0 num (inc (count actions))))))] 19 | (cond 20 | (= "Escape" key) 21 | (rf/dispatch [::choose-action [::close-action-chooser]]) 22 | 23 | (= "Enter" key) 24 | (rf/dispatch [::choose-action (nth actions checked-action)]) 25 | 26 | (valid-num? key) 27 | (let [action (nth actions (dec (js/parseInt key)))] 28 | (rf/dispatch [::choose-action action])) 29 | 30 | ;; Starts from beginning when upper bound is crossed. 31 | (next? key) 32 | (let [bound (dec (count actions)) 33 | n (if (< checked-action bound) 34 | (inc checked-action) 35 | 0)] 36 | (rf/dispatch [::check-action n])) 37 | 38 | ;; Goes to last action when lower bound is crossed. 39 | (prev? key) 40 | (let [n (if (> checked-action 0) 41 | (dec checked-action) 42 | (dec (count actions)))] 43 | (rf/dispatch [::check-action n])))))) 44 | 45 | (rf/reg-event-fx 46 | ::open-action-chooser 47 | [(rf/inject-cofx ::cofx/active-element)] 48 | (fn [{:keys [db ::cofx/active-element] :as cofx} _] 49 | (let [actions (:actions (first (:evaluations db)))] 50 | ;; Firefox won't get keydown events without removing focus from the input 51 | {::fx/blur active-element 52 | :db (-> db 53 | (assoc :checked-action 0) 54 | (assoc :actions (conj actions [::close-action-chooser])))}))) 55 | 56 | (rf/reg-event-db 57 | ::close-action-chooser 58 | (fn [db _] 59 | (assoc db :actions nil))) 60 | 61 | (rf/reg-event-db 62 | ::check-action 63 | (fn [db [_ n]] 64 | (assoc db :checked-action n))) 65 | 66 | ;; Dispatched by user selecting an action in the action-chooser. 67 | ;; ::close-action-chooser (= cancel) is a special action (doesn't clear input). 68 | (rf/reg-event-fx 69 | ::choose-action 70 | (fn [_ [_ action]] 71 | (if (= [::close-action-chooser] action) 72 | {:dispatch-n [[::close-action-chooser] 73 | [::regain-input-focus]]} 74 | {:dispatch-n [[::close-action-chooser] 75 | action]}))) 76 | 77 | ;; TODO: figure out a better way to regain focus for previously disabled field 78 | (rf/reg-event-fx 79 | ::regain-input-focus 80 | (fn [_ _] 81 | {::fx/set-focus [(.getElementById js/document "input-field") 100]})) 82 | 83 | (rf/reg-event-fx 84 | ::digits->diacritics 85 | (fn [{:keys [db] :as cofx} [_ input]] 86 | {:db (assoc db :input (p/digits->diacritics input)) 87 | :dispatch [::regain-input-focus]})) 88 | 89 | (rf/reg-event-fx 90 | ::diacritics->digits 91 | (fn [{:keys [db] :as cofx} [_ input]] 92 | {:db (assoc db :input (p/diacritics->digits input)) 93 | :dispatch [::regain-input-focus]})) 94 | -------------------------------------------------------------------------------- /src/sinostudy/events/core.cljs: -------------------------------------------------------------------------------- 1 | (ns sinostudy.events.core 2 | "For miscellaneous events that do not have their own more specific namespace." 3 | (:require [clojure.string :as str] 4 | [clojure.set :as set] 5 | [cljs.spec.alpha :as s] 6 | [re-frame.core :as rf] 7 | [ajax.core :as ajax] 8 | [cognitect.transit :as transit] 9 | [sinostudy.spec.dictionary :as sd] 10 | [sinostudy.db :as db] 11 | [sinostudy.pinyin.core :as p] 12 | [sinostudy.pinyin.eval :as pe] 13 | [sinostudy.dictionary.core :as d] 14 | [sinostudy.navigation.pages :as pages] 15 | [sinostudy.events.scrolling :as scrolling] 16 | [sinostudy.events.actions :as actions] 17 | [sinostudy.cofx :as cofx] 18 | [sinostudy.fx :as fx])) 19 | 20 | ;; all responses from the Compojure backend are Transit-encoded 21 | (def transit-reader 22 | (transit/reader :json)) 23 | 24 | (defn available-actions 25 | "Evaluate a query string to get a vector of possible actions." 26 | [query] 27 | (let [query* (p/with-umlaut query) 28 | pinyin-block? (or (pe/pinyin-block? query*) 29 | (pe/pinyin-block+digits? query*) 30 | (pe/pinyin-block+diacritics? query*)) 31 | diacritics->digits? (and (pe/pinyin+diacritics+punct? query*) 32 | (not (pe/pinyin+punct? query*))) 33 | digits->diacritics? (and (pe/pinyin+digits+punct? query*) 34 | (not (pe/pinyin+punct? query*)))] 35 | (cond 36 | (pe/hanzi-block? query*) 37 | [[::look-up query*]] 38 | 39 | (re-find #"^\w" query*) 40 | (cond-> [[::look-up query*]] 41 | 42 | (and pinyin-block? 43 | (not= query query*)) 44 | (conj [::look-up (d/pinyin-key query*)]) 45 | 46 | digits->diacritics? 47 | (conj [::digits->diacritics query*]) 48 | 49 | diacritics->digits? 50 | (conj [::diacritics->digits query*]))))) 51 | 52 | (defn- cache-search-result-entries 53 | "Save the individual entries of a dictionary search result in the db. 54 | Note: this is a separate step from saving the search result itself!" 55 | [db content] 56 | (let [path [:pages ::pages/terms] 57 | entry-ks #{:english 58 | :pinyin 59 | :pinyin+diacritics 60 | :pinyin+digits} 61 | entries (->> (select-keys content entry-ks) 62 | (vals) 63 | (apply set/union)) 64 | add-entry (fn [db entry] 65 | (assoc-in db (conj path (:term entry)) entry))] 66 | (reduce add-entry db entries))) 67 | 68 | (defn mk-input 69 | "What the input field should display based on a given page." 70 | [[category id :as page]] 71 | (cond 72 | (= ::pages/terms category) (when (not (pe/hanzi-block? id)) id))) 73 | 74 | ;;;; MISCELLANEOUS 75 | 76 | (rf/reg-event-db 77 | ::initialize-db 78 | (fn [_ _] 79 | db/initial-db)) 80 | 81 | (rf/reg-event-db 82 | ::decompose-char 83 | (fn [db [_ decomposition]] 84 | (assoc db :decomposed decomposition))) 85 | 86 | (rf/reg-event-db 87 | ::change-script 88 | (fn [db [_ script]] 89 | (assoc db :script script))) 90 | 91 | (rf/reg-event-db 92 | ::set-result-filter 93 | (fn [db [_ term type]] 94 | (assoc-in db [:result-filters term] type))) 95 | 96 | (rf/reg-event-fx 97 | ::blur-active-element 98 | [(rf/inject-cofx ::cofx/active-element)] 99 | (fn [{:keys [::cofx/active-element] :as cofx} _] 100 | {::fx/blur active-element})) 101 | 102 | 103 | ;;;; EVALUATION 104 | 105 | (rf/reg-event-fx 106 | ::save-evaluation 107 | [(rf/inject-cofx ::cofx/now)] 108 | (fn [{:keys [db ::cofx/now] :as cofx} [_ query actions]] 109 | {:db (update db :evaluations conj {:query query 110 | :actions actions 111 | :timestamp now})})) 112 | 113 | ;; Only evaluates the latest input (no change while still writing). 114 | ;; This improves performance when coupled with delayed dispatching 115 | ;; also doesn't evaluate the same query twice in a row! 116 | (rf/reg-event-fx 117 | ::evaluate-input 118 | (fn [{:keys [db] :as cofx} [_ input]] 119 | (let [latest-evaluation (first (:evaluations db)) 120 | latest-input? (= input (:input db)) 121 | query (str/trim input) 122 | new-query? (not= query (:query latest-evaluation)) 123 | actions (available-actions query)] 124 | (when (and latest-input? new-query?) 125 | {:dispatch-n [[::save-evaluation query actions] 126 | (when (and actions 127 | (= ::look-up (-> actions first first))) 128 | (first actions))]})))) 129 | 130 | ;; Dispatched every time the input field changes. 131 | ;; For performance reasons, non-blank queries are evaluated with a short lag 132 | ;; while blank queries are dispatched immediately for evaluation. 133 | ;; Immediate evaluation for blank input will override queued queries 134 | ;; this prevents any hint-changing misfires after clearing the input. 135 | ;; Otherwise, a queued query could modify the UI shortly after. 136 | (rf/reg-event-fx 137 | ::on-input-change 138 | (fn [{:keys [db] :as cofx} [_ input]] 139 | (let [delay (get-in db [:config :evaluation :delay]) 140 | fx {:db (assoc db :input input)}] 141 | (if (str/blank? input) 142 | (assoc fx :dispatch [::evaluate-input input]) 143 | (assoc fx :dispatch-later [{:dispatch [::evaluate-input input] 144 | :ms delay}]))))) 145 | 146 | 147 | ;;;; CHANGING LOCATION & LOADING PAGE CONTENT 148 | 149 | ;;; Force an evaluation for the latest input if it hasn't been evaluated yet. 150 | (rf/reg-event-fx 151 | ::submit 152 | (fn [{:keys [db] :as cofx} [_ input]] 153 | (let [latest-eval (first (:evaluations db)) 154 | query (str/trim input) 155 | new-query? (not= query (:query latest-eval)) 156 | actions (if new-query? 157 | (available-actions query) 158 | (:actions latest-eval)) 159 | n (count actions)] 160 | {:dispatch-n (cond-> [] 161 | new-query? (conj [::save-evaluation query actions]) 162 | (= n 1) (concat (conj actions [::blur-active-element])) 163 | (> n 1) (conj [::actions/open-action-chooser]))}))) 164 | 165 | ;; Pages are loaded on-demand from either the frontend db or (if N/A) by sending 166 | ;; a request to the backend. Currently, only dictionary pages are supported. 167 | (rf/reg-event-fx 168 | ::load-page 169 | (fn [{:keys [db] :as cofx} [_ [category _ :as page]]] 170 | (let [{:keys [unknown-queries pages]} db] 171 | (when (= category ::pages/terms) 172 | (if (and (not (contains? unknown-queries page)) 173 | (not (get-in pages page))) 174 | {:dispatch [::request page]} 175 | {:dispatch [::update-location page]}))))) 176 | 177 | (rf/reg-event-fx 178 | ::enqueue 179 | [(rf/inject-cofx ::cofx/now)] 180 | (fn [{:keys [db ::cofx/now] :as cofx} [_ page]] 181 | {:db (update db :queue conj (with-meta page {:ts now}))})) 182 | 183 | (rf/reg-event-db 184 | ::dequeue 185 | (fn [db [_ page]] 186 | (update db :queue disj page))) 187 | 188 | ;; If a page doesn't exist in the frontend db, the backend will be contacted 189 | ;; through an Ajax request. While the request is underway, the requested page 190 | ;; is put in a queue. While requests are enqueued, they will not be retried. 191 | ;; Once a request for a page has been fulfilled or failed, the page will be 192 | ;; dequeued once again, allowing for new requests to be sent. 193 | (rf/reg-event-fx 194 | ::request 195 | (fn [{:keys [db queue] :as cofx} [_ [category id :as page]]] 196 | (let [uri (str (:query-uri db) "/" (name category) "/" id)] 197 | (when (not (contains? queue page)) 198 | {:dispatch [::enqueue page] 199 | :http-xhrio {:method :get 200 | :timeout 5000 201 | :response-format (ajax/text-response-format) 202 | :on-success [::on-request-success] 203 | :on-failure [::on-request-failure] 204 | :uri uri}})))) 205 | 206 | (rf/reg-event-fx 207 | ::on-request-success 208 | [(rf/inject-cofx ::cofx/now)] 209 | (fn [{:keys [db ::cofx/now] :as cofx} [_ result]] 210 | (let [content (transit/read transit-reader result) 211 | page (:page content)] 212 | {:db (update db :queries conj {:state :success 213 | :content content 214 | :timestamp now}) 215 | :dispatch-n [[::dequeue page] 216 | [::save-page content]]}))) 217 | 218 | (rf/reg-event-fx 219 | ::on-request-failure 220 | [(rf/inject-cofx ::cofx/now)] 221 | (fn [{:keys [db ::cofx/now] :as cofx} [_ result]] 222 | {:db (update db :queries conj {:state :failure 223 | :content result 224 | :timestamp now})})) 225 | 226 | ;; Successful request to the backend lead to the retrieved page being saved in 227 | ;; the frontend db. In cases where a term does not have an associated page, 228 | ;; it is registered as unknown to prevent further retrieval attempts. 229 | (rf/reg-event-fx 230 | ::save-page 231 | (fn [_ [_ {:keys [page result]}]] 232 | (let [[category id] page] 233 | {:dispatch-n [(cond 234 | (nil? result) [::register-unknown-query id] 235 | (= category ::pages/terms) [::save-term page result]) 236 | [::update-location page]]}))) 237 | 238 | (rf/reg-event-db 239 | ::register-unknown-query 240 | (fn [db [_ term]] 241 | (update db :unknown-queries conj term))) 242 | 243 | ;; Store result directly and then store individual entries. 244 | ;; TODO: reduce overwrites for hanzi result? 245 | (rf/reg-event-db 246 | ::save-term 247 | (fn [db [_ [category id :as page] search-result]] 248 | (if-let [err (s/explain-data ::sd/search-result search-result)] 249 | (do (js/console.error (with-out-str (cljs.pprint/pprint err))) 250 | db) 251 | (-> db 252 | ;; Save the actual search result or dictionary entry in the db. 253 | (assoc-in [:pages category id] (-> search-result 254 | (d/reduce-result) 255 | (d/sort-result))) 256 | 257 | ;; Cache incidental, referenced entries for faster page rendering times. 258 | (cache-search-result-entries search-result))))) 259 | 260 | ;; Dispatched either directly by ::load-page or indirectly through a successful 261 | ;; backend request. This ensures that the address bar is only updated when 262 | ;; content actually exists. 263 | (rf/reg-event-fx 264 | ::update-location 265 | [(rf/inject-cofx ::cofx/pathname)] 266 | (fn [{:keys [db ::cofx/pathname] :as cofx} [_ [_ id :as page]]] 267 | (let [{:keys [input unknown-queries]} db] 268 | (when (and (= input id) 269 | (not (contains? unknown-queries id)) 270 | (not (pages/equivalent? pathname page))) 271 | {::fx/navigate-to (pages/page->pathname page)})))) 272 | 273 | (rf/reg-event-fx 274 | ::change-location 275 | [(rf/inject-cofx ::cofx/now) 276 | (rf/inject-cofx ::cofx/scroll-state)] 277 | (fn [{:keys [db ::cofx/now ::cofx/scroll-state] :as cofx} [_ new-page]] 278 | (let [{:keys [input history]} db 279 | current-page (first history)] 280 | {:db (-> db 281 | (update :history conj (with-meta new-page now)) 282 | (assoc :input (or input 283 | (mk-input new-page)))) 284 | :dispatch-n [[::scrolling/save-scroll-state current-page scroll-state] 285 | [::load-page (pages/shortened new-page)]]}))) 286 | 287 | (rf/reg-event-fx 288 | ::look-up 289 | (fn [_ [_ term]] 290 | {:dispatch [::load-page [::pages/terms term]]})) 291 | -------------------------------------------------------------------------------- /src/sinostudy/events/scrolling.cljs: -------------------------------------------------------------------------------- 1 | (ns sinostudy.events.scrolling 2 | "For all events relating to preservation of scroll state when navigating 3 | the browser history." 4 | (:require [re-frame.core :as rf] 5 | [sinostudy.fx :as fx])) 6 | 7 | (rf/reg-event-db 8 | ::save-scroll-state 9 | (fn [db [_ page scroll-state]] 10 | (if (not (empty? scroll-state)) 11 | (assoc-in db [:scroll-states page] scroll-state) 12 | db))) 13 | 14 | (rf/reg-event-db 15 | ::reset-scroll-state 16 | (fn [db [_ page]] 17 | (update db :scroll-states dissoc page))) 18 | 19 | (rf/reg-event-fx 20 | ::load-scroll-state 21 | (fn [{:keys [db]} [_ page]] 22 | {::fx/set-scroll-state (get-in db [:scroll-states page])})) 23 | -------------------------------------------------------------------------------- /src/sinostudy/fx.cljs: -------------------------------------------------------------------------------- 1 | (ns sinostudy.fx 2 | (:require [re-frame.core :as rf] 3 | [accountant.core :as accountant])) 4 | 5 | ;; Dispatched by actions that need to change the page (and browser history). 6 | (rf/reg-fx 7 | ::navigate-to 8 | (fn [path] 9 | (accountant/navigate! path))) 10 | 11 | ;; Dispatched by ::close-action-chooser. 12 | ;; This is definitely a less than optimal solution... 13 | (rf/reg-fx 14 | ::set-focus 15 | (fn [[element delay]] 16 | (js/setTimeout 17 | #(.focus element) 18 | delay))) 19 | 20 | ;; Dispatched by ::close-action-chooser. 21 | ;; This is definitely a less than optimal solution... 22 | (rf/reg-fx 23 | ::blur 24 | (fn [element] 25 | (when element 26 | (.blur element)))) 27 | 28 | ;; Dispatched by ::load-scroll-state. 29 | (rf/reg-fx 30 | ::set-scroll-state 31 | (fn [scroll-state] 32 | (doseq [[css-selector [x y]] scroll-state] 33 | (let [element (aget (js/document.querySelectorAll css-selector) 0)] 34 | (set! (.-scrollLeft element) x) 35 | (set! (.-scrollTop element) y))))) 36 | -------------------------------------------------------------------------------- /src/sinostudy/macros/core.clj: -------------------------------------------------------------------------------- 1 | (ns sinostudy.macros.core 2 | (:refer-clojure :exclude [slurp])) 3 | 4 | (defmacro slurp [file] 5 | (clojure.core/slurp file)) 6 | -------------------------------------------------------------------------------- /src/sinostudy/navigation/handler.clj: -------------------------------------------------------------------------------- 1 | (ns sinostudy.navigation.handler 2 | (:import [java.io ByteArrayOutputStream]) 3 | (:require [clojure.java.io :as io] 4 | [clojure.string :as str] 5 | [clojure.tools.reader :as reader] 6 | [compojure.core :refer :all] 7 | [compojure.route :as route] 8 | [ring.middleware.defaults :refer [wrap-defaults site-defaults]] 9 | [cognitect.transit :as transit] 10 | [org.httpkit.server :as hs] 11 | [mount.core :as mount :refer [defstate]] 12 | [mount-up.core :as mount-up] 13 | [sinostudy.navigation.pages :as pages] 14 | [sinostudy.dictionary.load :as dl] 15 | [sinostudy.dictionary.core :as d]) 16 | (:gen-class)) 17 | 18 | ;; TODO: split into dev/production 19 | ;; https://github.com/JulianBirch/cljs-ajax/blob/master/docs/server.md#cross-origin-requests 20 | 21 | ;; TODO: use coercions for regex check of input 22 | ;; https://weavejester.github.io/compojure/compojure.coercions.html 23 | 24 | (defstate config 25 | "System config file (EDN format)." 26 | :start (-> "config.edn" io/resource slurp reader/read-string)) 27 | 28 | (defstate dict 29 | "Dictionary used for Chinese/English/pinyin term look-ups." 30 | :start (dl/load-dict)) 31 | 32 | (def index 33 | (slurp (io/resource "public/index.html"))) 34 | 35 | ;; First Access-Control header permits cross-origin requests. 36 | ;; Second prevents Chrome from stripping Content-Type header. 37 | (def ajax-headers 38 | {"Access-Control-Allow-Origin" "*" 39 | "Access-Control-Allow-Headers" "Content-Type" 40 | "Content-Type" "application/transit+json; charset=utf-8"}) 41 | 42 | (defn transit-write [x] 43 | "Encode Clojure data using Transit (adapted from David Nolen's example)." 44 | (let [baos (ByteArrayOutputStream.) 45 | w (transit/writer baos :json) 46 | _ (transit/write w x) 47 | ret (.toString baos)] 48 | (.reset baos) 49 | ret)) 50 | 51 | (defn ns-keywords 52 | "Convert a string separated by a delimiter into namespaced keywords." 53 | [re ns s] 54 | (if (string? s) 55 | (->> (str/split s re) 56 | (map (partial keyword (str ns))) 57 | (set)) 58 | s)) 59 | 60 | (defn execute-query 61 | "Execute a query from the ClojureScript app. 62 | The queries all resolve to a type, a query, and optional parameters." 63 | [type query opts] 64 | (cond 65 | (= ::pages/terms type) (d/look-up dict query))) 66 | 67 | (defn transit-result 68 | "Get the Transit-encoded result of a query." 69 | [type query opts] 70 | (transit-write {:page [type query] 71 | :result (execute-query type query opts)})) 72 | 73 | (defroutes app-routes 74 | ;; ANY rather than GET is necessary to allow cross origin requests during dev. 75 | (ANY "/query/:type/:query" [type query & opts] 76 | {:status 200 77 | :headers ajax-headers 78 | :body (transit-result (keyword (str 'sinostudy.navigation.pages) type) 79 | query 80 | opts)}) 81 | 82 | ;; HTML page requests all resolve to the ClojureScript app. 83 | ;; The internal routing of the app creates the correct presentation. 84 | (ANY "*" [] index)) 85 | 86 | ;; Allows web resources in the JAR (such as CSS and JS) to be fetched. 87 | ;; This is especially important in production, i.e. using html-kit. 88 | ;; Otherwise, the paths referencing them in index.html will return nothing. 89 | (defroutes resources-routes 90 | (route/resources "/" {:root "public"})) 91 | 92 | (def all-routes 93 | (routes resources-routes 94 | app-routes)) 95 | 96 | (def app 97 | (wrap-defaults all-routes site-defaults)) 98 | 99 | (defstate server 100 | "Server instance (http-kit)." 101 | :start (hs/run-server #'app {:port (get-in config [:server :port :internal] 8080)}) 102 | :stop (server)) 103 | 104 | (defn -main 105 | [] 106 | (mount-up/on-upndown :info mount-up/log :before) 107 | (mount/start) 108 | (println (str "Listening on port " (get-in config [:server :port :internal] 8080)))) 109 | -------------------------------------------------------------------------------- /src/sinostudy/navigation/pages.cljc: -------------------------------------------------------------------------------- 1 | (ns sinostudy.navigation.pages 2 | "This namespace contains functions related to the page abstraction used in 3 | sino.study, as well as serving as a namespaced keyword prefix for the various 4 | page categories in use, e.g. ::pages/terms." 5 | (:require [clojure.string :as str])) 6 | 7 | (defn shortened 8 | "Helper function to make sure a page is maximum 2 items (category and id). 9 | Additional items do affect how a page is displayed, but still refer to the 10 | same basic data as the 2-item page." 11 | [page] 12 | (when (and page (> (count page) 1)) 13 | (subvec page 0 2))) 14 | 15 | (defn page->pathname 16 | "Convert a page to a window.location.pathname." 17 | [page] 18 | (str "/" (str/join "/" (map name page)))) 19 | 20 | (defn equivalent? 21 | "Is the window.location.pathname equivalent to the given page?" 22 | [pathname page] 23 | (= pathname (page->pathname page))) 24 | -------------------------------------------------------------------------------- /src/sinostudy/navigation/routes.cljs: -------------------------------------------------------------------------------- 1 | (ns sinostudy.navigation.routes 2 | (:require-macros [secretary.core :refer [defroute]]) 3 | (:import goog.History) 4 | (:require [secretary.core :as secretary] 5 | [re-frame.core :as rf] 6 | [sinostudy.events.core :as events] 7 | [sinostudy.navigation.pages :as pages] 8 | [accountant.core :as accountant])) 9 | 10 | ;; Since scroll restoration differs in implementation between e.g. Firefox 11 | ;; and Chrome -- and neither implementations are good enough -- the safest 12 | ;; choice is to carefully disable scroll restoration (default: "automatic"). 13 | (when (exists? js/window.history.scrollRestoration) 14 | (set! js/window.history.scrollRestoration "manual")) 15 | 16 | (defn app-routes [] 17 | ;; This prefixes routes with a hash for compatibility with older browsers 18 | ;; however, it might not be necessary if I don't need to support IE 9 19 | ;; furthermore, it may impede on some other functionality. 20 | (secretary/set-config! :prefix "#") 21 | 22 | ;; Combining the root route with the other page routes doesn't seem to work. 23 | (defroute "/" [] 24 | (rf/dispatch [::events/change-location [::pages/static "/"]])) 25 | 26 | (defroute "/:page" [page] 27 | (rf/dispatch [::events/change-location [::pages/static (str "/" page)]])) 28 | 29 | (defroute 30 | (str "/" (name :pages/terms) "/:term") [term] 31 | (rf/dispatch [::events/change-location [::pages/terms term]])) 32 | 33 | (defroute 34 | (str "/" (name ::pages/terms) "/:term/:attribute") [term attribute] 35 | (rf/dispatch [::events/change-location [::pages/terms term attribute]])) 36 | 37 | (defroute 38 | "*" [] 39 | (rf/dispatch [::events/change-location [::pages/static "/404"]]))) 40 | 41 | ;; Following instructions from: https://github.com/venantius/accountant 42 | (accountant/configure-navigation! 43 | {:nav-handler (fn [path] (secretary/dispatch! path)) 44 | :path-exists? (fn [path] (secretary/locate-route path))}) 45 | -------------------------------------------------------------------------------- /src/sinostudy/pinyin/core.cljc: -------------------------------------------------------------------------------- 1 | (ns sinostudy.pinyin.core 2 | (:require [clojure.string :as str] 3 | [sinostudy.rim.core :as rim] 4 | [sinostudy.pinyin.patterns :as patterns] 5 | [sinostudy.pinyin.data :as data])) 6 | 7 | (defn parse-int 8 | "Parses a string s into an integer." 9 | [s] 10 | #?(:clj (Integer/parseInt s) 11 | :cljs (js/parseInt s))) 12 | 13 | (defn with-umlaut 14 | "Replace the common substitute letter V in s with the proper Pinyin Ü." 15 | [s] 16 | (-> s 17 | (str/replace \v \ü) 18 | (str/replace \V \Ü))) 19 | 20 | (defn with-diacritic 21 | "Get the diacriticised char based on Pinyin tone (0 through 5)." 22 | [char tone] 23 | (nth (data/diacritics char) tone)) 24 | 25 | ;; derived from this guideline: http://www.pinyin.info/rules/where.html 26 | (defn diacritic-index 27 | "Get the index in s where a diacritic should be put according to Pinyin rules; 28 | s is a Pinyin syllable with/without an affixed digit (e.g. wang2 or lao)." 29 | [s] 30 | (let [s* (re-find #"[^\d]+" (str/lower-case s))] 31 | (cond 32 | (not (string? s)) nil 33 | (empty? s*) nil 34 | (str/includes? s* "a") (str/index-of s* "a") 35 | (str/includes? s* "e") (str/index-of s* "e") 36 | (str/includes? s* "ou") (str/index-of s* "o") 37 | :else (if-let [index (str/last-index-of s* "n")] 38 | (- index 1) 39 | (- (count s*) 1))))) 40 | 41 | (defn handle-m 42 | "Handle the super rare, special case final, m." 43 | [s] 44 | (let [tone (parse-int (str (last s))) 45 | skip (if (= \M (first s)) 6 0)] 46 | (nth data/m-diacritics (+ tone skip)))) 47 | 48 | (defn digit->diacritic 49 | "Convert a Pinyin syllable/final s with an affixed tone digit into one with a 50 | tone diacritic. When converting more than a single syllable at a time, 51 | use digits->diacritics instead!" 52 | [s] 53 | (cond 54 | (or (empty? s) (nil? s)) s 55 | (re-matches #"[mM]\d" s) (handle-m s) 56 | :else (let [tone (parse-int (str (last s))) 57 | s* (subs s 0 (dec (count s))) 58 | char (nth s (diacritic-index s)) 59 | char+diacritic (with-diacritic char tone)] 60 | (str/replace s* char char+diacritic)))) 61 | 62 | ;; used by diacritic-string to find the bounds of the last Pinyin final 63 | (defn- last-final 64 | "Take a string with a single affixed tone digit as input and returns the 65 | longest allowed Pinyin final + the digit. The Pinyin final that is returned 66 | is the one immediately before the digit, i.e. the last final." 67 | [s] 68 | (let [digit (last s) 69 | end (dec (count s)) ; decrementing b/c of affixed digit 70 | length (if (< end 4) end 4) ; most cases will be <4 71 | start (- end length)] 72 | (loop [candidate (subs s start end)] 73 | (cond 74 | (empty? candidate) nil 75 | (contains? data/finals (str/lower-case candidate)) (str candidate digit) 76 | :else (recur (apply str (rest candidate))))))) 77 | 78 | (defn- handle-r 79 | "Handle the common special case final, r." 80 | [s] 81 | (str/replace s #"\d" "")) 82 | 83 | ;; used by digits->diacritics to convert tone digits into diacritics 84 | (defn- diacritic-string 85 | "Take a string with a single affixed tone digit as input and substitutes the 86 | digit with a tone diacritic. The diacritic is placed in the Pinyin final 87 | immediately before tone digit." 88 | [s] 89 | (if (contains? #{"r5" "R5" "r0" "R0"} (str/trim s)) 90 | (handle-r s) 91 | (let [final (last-final s) 92 | final+diacritic (digit->diacritic final) 93 | ;; prefix = preceding neutral tone syllables + the initial 94 | prefix (subs s 0 (- (count s) (count final)))] 95 | (str prefix final+diacritic)))) 96 | 97 | (defn digits->diacritics 98 | "Convert a Pinyin string s with one or several tone digits into a string with 99 | tone diacritics. The digits 0, 1, 2, 3, 4, and 5 can be used as tone markers 100 | behind any Pinyin final in the block. Postfixing 0 or 5 (or nothing) will 101 | result in no diacritic being added, i.e. marking a neutral tone. Furthermore, 102 | any occurrence of V is treated as and implicitly converted into a Ü." 103 | [s & {:keys [v-as-umlaut?] :or {v-as-umlaut? false}}] 104 | (if (not (string? s)) 105 | s 106 | (let [s* (if v-as-umlaut? (with-umlaut s) s) 107 | digit-strings (re-seq #"[^\d]+\d" s*) 108 | diacritic-strings (map diacritic-string digit-strings) 109 | suffix (re-seq #"[^\d]+$" s*)] 110 | (apply str (concat diacritic-strings suffix))))) 111 | 112 | ;; used by the pinyin+diacritics? (allows for evaluation as plain Pinyin) 113 | (defn no-diacritics 114 | "Replace those characters in the input string s that have Pinyin diacritics 115 | with standard characters." 116 | ([s] (no-diacritics s data/diacritic-patterns)) 117 | ([s [[replacement match] & xs]] 118 | (if (nil? match) 119 | s 120 | (recur (str/replace s match replacement) xs)))) 121 | 122 | (defn no-digits 123 | "Remove digits from the input string." 124 | [s] 125 | (str/replace s #"[0-9]" "")) 126 | 127 | (defn- char->tone 128 | "Get the tone (0-4) based on a char." 129 | [char] 130 | (loop [tone 1] 131 | (cond 132 | (or (= nil char) (= 5 tone)) 0 133 | (re-matches (get data/tone-diacritics tone) char) tone 134 | :else (recur (inc tone))))) 135 | 136 | (defn- replace-at 137 | "Like clojure.string/replace, but replaces between index from and to (excl)." 138 | [s from to replacement] 139 | (str (subs s 0 from) replacement (subs s to))) 140 | 141 | (defn- diacritics->digits* 142 | "Replaces in s based on a replacements vector." 143 | [s replacements] 144 | (loop [skip 0 145 | s* s 146 | replacements* replacements] 147 | (if-let [[from syllable tone] (first replacements*)] 148 | (recur (if tone (inc skip) skip) 149 | (replace-at s* 150 | (+ skip from) 151 | (+ skip from (count syllable)) 152 | (str syllable tone)) 153 | (rest replacements*)) 154 | s*))) 155 | 156 | (defn diacritics->digits 157 | "Convert a Pinyin string s with tone diacritics into one with tone digits." 158 | [s] 159 | (let [s* (no-diacritics s) 160 | syllables (rim/re-pos patterns/pinyin-syllable s*) 161 | original #(subs s (first %) (+ (first %) (count (second %)))) 162 | diacritic #(re-find #"[^\w]" %) 163 | tone (comp #(if (= 0 %) nil %) char->tone diacritic original)] 164 | (diacritics->digits* s (map (juxt first second tone) syllables)))) 165 | -------------------------------------------------------------------------------- /src/sinostudy/pinyin/data.cljc: -------------------------------------------------------------------------------- 1 | (ns sinostudy.pinyin.data) 2 | 3 | ;; also includes special case initials w and y (technically not initials) 4 | (def initials 5 | #{"b" "p" "m" "f" "d" "t" "n" "l" 6 | "g" "k" "h" "j" "q" "x" "z" "c" 7 | "s" "zh" "ch" "sh" "r" "w" "y"}) 8 | 9 | ;; includes all possible forms in use (e.g. "ue" as shorthand for "üe") 10 | ;; r is a common special case final (technically not a final) 11 | ;; m is a super rare, special case final 12 | (def finals 13 | #{"a" "ai" "an" "ang" "ao" 14 | "e" "ei" "en" "eng" "er" 15 | "i" "ia" "ian" "iang" "iao" "ie" "in" "ing" "iong" "iu" 16 | "m" 17 | "o" "ong" "ou" 18 | "r" 19 | "u" "ua" "uai" "uan" "uang" "ue" "ui" "un" "uo" 20 | "ü" "üe"}) 21 | 22 | ;; the index of a character correspond to the tone present at that index 23 | ;; indexes 0 and 5 both represent neutral tone (= no diacritics) 24 | (def diacritics 25 | {\a "aāáǎàa", \A "AĀÁǍÀA" 26 | \o "oōóǒòo", \O "OŌÓǑÒO" 27 | \e "eēéěèe", \E "EĒÉĚÈE" 28 | \u "uūúǔùu", \U "UŪÚǓÙU" 29 | \i "iīíǐìi", \I "IĪÍǏÌI" 30 | \ü "üǖǘǚǜü", \Ü "ÜǕǗǙǛÜ"}) 31 | 32 | ;; m is a super rare, special case final 33 | ;; the vec is index-aligned like the diacritics above (skip 6 for upper case) 34 | ;; note: the diacriticised versions are multi-char and may ruin formatting! 35 | (def m-diacritics 36 | ["m" "m̄" "ḿ" "m̌" "m̀" "m" 37 | "M" "M̄" "Ḿ" "M̌" "M̀" "M"]) 38 | 39 | ;; only used to search and replace diacritics 40 | ;; also handles special case diacritic char, m 41 | (def diacritic-patterns 42 | {"a" #"[āáǎà]", "A" #"[ĀÁǍÀ]" 43 | "o" #"[ōóǒò]", "O" #"[ŌÓǑÒ]" 44 | "e" #"[ēéěè]", "E" #"[ĒÉĚÈ]" 45 | "u" #"[ūúǔù]", "U" #"[ŪÚǓÙ]" 46 | "i" #"[īíǐì]", "I" #"[ĪÍǏÌ]" 47 | "ü" #"[ǖǘǚǜ]", "Ü" #"[ǕǗǙǛ]" 48 | "m" #"(m̄|ḿ|m̌|m̀)" "M" #"(M̄|Ḿ|M̌|M̀)"}) 49 | 50 | ;; used to match diacritics to tones in diacritics->digits 51 | (def tone-diacritics 52 | {1 #"(ā|ō|ē|ū|ī|ǖ|m̄|Ā|Ō|Ē|Ū|Ī|Ǖ|M̄)" 53 | 2 #"(á|ó|é|ú|í|ǘ|ḿ|Á|Ó|É|Ú|Í|Ǘ|Ḿ)" 54 | 3 #"(ǎ|ǒ|ě|ǔ|ǐ|ǚ|m̌|Ǎ|Ǒ|Ě|Ǔ|Ǐ|Ǚ|M̌)" 55 | 4 #"(à|ò|è|ù|ì|ǜ|m̀|À|Ò|È|Ù|Ì|Ǜ|M̀)"}) 56 | 57 | ;; adapted from http://pinyin.info/rules/initials_finals.html 58 | ;; some non-standard syllables have been added: fiao, lo, m, r, sei, yo 59 | (def syllables 60 | #{"a" "ai" "an" "ang" "ao" 61 | 62 | "ba" "bai" "ban" "bang" "bao" "bei" "ben" "beng" "bi" "bian" "biao" "bie" 63 | "bin" "bing" "bo" "bu" 64 | 65 | "ca" "cai" "can" "cang" "cao" "ce" "cen" "ceng" "cha" "chai" "chan" "chang" 66 | "chao" "che" "chen" "cheng" "chi" "chong" "chou" "chu" "chua" "chuai" 67 | "chuan" "chuang" "chui" "chun" "chuo" "ci" "cong" "cou" "cu" "cuan" "cui" 68 | "cun" "cuo" 69 | 70 | "da" "dai" "dan" "dang" "dao" "de" "dei" "den" "deng" "di" "dia" "dian" 71 | "diao" "die" "ding" "diu" "dong" "dou" "du" "duan" "dui" "dun" "duo" 72 | 73 | "e" "ei" "en" "eng" "er" 74 | 75 | "fa" "fan" "fang" "fei" "fen" "feng" "fiao" "fo" "fou" "fu" 76 | 77 | "ga" "gai" "gan" "gang" "gao" "ge" "gei" "gen" "geng" "gong" "gou" "gu" 78 | "gua" "guai" "guan" "guang" "gui" "gun" "guo" 79 | 80 | "ha" "hai" "han" "hang" "hao" "he" "hei" "hen" "heng" "hong" "hou" "hu" 81 | "hua" "huai" "huan" "huang" "hui" "hun" "huo" 82 | 83 | "ji" "jia" "jian" "jiang" "jiao" "jie" "jin" "jing" "jiong" "jiu" "ju" 84 | "juan" "jue" "jun" 85 | 86 | "ka" "kai" "kan" "kang" "kao" "ke" "kei" "ken" "keng" "kong" "kou" "ku" 87 | "kua" "kuai" "kuan" "kuang" "kui" "kun" "kuo" 88 | 89 | "la" "lai" "lan" "lang" "lao" "le" "lei" "leng" "li" "lia" "lian" "liang" 90 | "liao" "lie" "lin" "ling" "liu" "lo" "long" "lou" "lu" "luan" "lun" "luo" 91 | "lü" "lüe" 92 | 93 | "m" "ma" "mai" "man" "mang" "mao" "me" "mei" "men" "meng" "mi" "mian" "miao" 94 | "mie" "min" "ming" "miu" "mo" "mou" "mu" 95 | 96 | "na" "nai" "nan" "nang" "nao" "ne" "nei" "nen" "neng" "ni" "nian" "niang" 97 | "niao" "nie" "nin" "ning" "niu" "nong" "nou" "nu" "nuan" "nun" "nuo" "nü" 98 | "nüe" 99 | 100 | "o" "ou" 101 | 102 | "pa" "pai" "pan" "pang" "pao" "pei" "pen" "peng" "pi" "pian" "piao" "pie" 103 | "pin" "ping" "po" "pou" "pu" 104 | 105 | "qi" "qia" "qian" "qiang" "qiao" "qie" "qin" "qing" "qiong" "qiu" "qu" 106 | "quan" "que" "qun" 107 | 108 | "r" "ran" "rang" "rao" "re" "ren" "reng" "ri" "rong" "rou" "ru" "rua" "ruan" 109 | "rui" "run" "ruo" 110 | 111 | "sa" "sai" "san" "sang" "sao" "se" "sei" "sen" "seng" "sha" "shai" "shan" 112 | "shang" "shao" "she" "shei" "shen" "sheng" "shi" "shou" "shu" "shua" "shuai" 113 | "shuan" "shuang" "shui" "shun" "shuo" "si" "song" "sou" "su" "suan" "sui" 114 | "sun" "suo" 115 | 116 | "ta" "tai" "tan" "tang" "tao" "te" "tei" "teng" "ti" "tian" "tiao" "tie" 117 | "ting" "tong" "tou" "tu" "tuan" "tui" "tun" "tuo" 118 | 119 | "wa" "wai" "wan" "wang" "wei" "wen" "weng" "wo" "wu" 120 | 121 | "xi" "xia" "xian" "xiang" "xiao" "xie" "xin" "xing" "xiong" "xiu" "xu" 122 | "xuan" "xun" "xue" 123 | 124 | "ya" "yan" "yang" "yao" "ye" "yi" "yin" "ying" "yo" "yong" "you" "yu" "yuan" 125 | "yun" "yue" 126 | 127 | "za" "zai" "zan" "zang" "zao" "ze" "zei" "zen" "zeng" "zha" "zhai" "zhan" 128 | "zhang" "zhao" "zhe" "zhei" "zhen" "zheng" "zhi" "zhong" "zhou" "zhu" "zhua" 129 | "zhuai" "zhuan" "zhuang" "zhui" "zhun" "zhuo" "zi" "zong" "zou" "zu" "zuan" 130 | "zui" "zun" "zuo"}) 131 | 132 | (def decomposition-symbols 133 | "Unicode range for the so-called 'Ideographic Description Characters'. 134 | They are used for decomposition of Hanzi." 135 | #"\u2FF0-\u2FFF") 136 | 137 | ;; from http://kourge.net/projects/regexp-unicode-block 138 | (def hanzi-unicode 139 | {"CJK Radicals Supplement" #"\u2E80-\u2EFF" 140 | "Kangxi Radicals" #"\u2F00-\u2FDF" 141 | "CJK Symbols and Punctuation" #"\u3000-\u303F" 142 | "CJK Strokes" #"\u31C0-\u31EF" 143 | "Enclosed CJK Letters and Months" #"\u3200-\u32FF" 144 | "CJK Compatibility" #"\u3300-\u33FF" 145 | "CJK Unified Ideographs Extension A" #"\u3400-\u4DBF" 146 | "Yijing Hexagram Symbols" #"\u4DC0-\u4DFF" 147 | "CJK Unified Ideographs" #"\u4E00-\u9FFF" 148 | "CJK Compatibility Ideographs" #"\uF900-\uFAFF" 149 | "CJK Compatibility Forms" #"\uFE30-\uFE4F"}) 150 | -------------------------------------------------------------------------------- /src/sinostudy/pinyin/eval.cljc: -------------------------------------------------------------------------------- 1 | (ns sinostudy.pinyin.eval 2 | (:require #?(:clj [clojure.spec.alpha :as spec] 3 | :cljs [cljs.spec.alpha :as spec]) 4 | [clojure.string :as str] 5 | [sinostudy.pinyin.core :as p] 6 | [sinostudy.pinyin.patterns :as patterns])) 7 | 8 | (defn pinyin-syllable? 9 | "Is this a single Pinyin syllable (no digits or diacritics allowed)?" 10 | [s] 11 | (re-matches patterns/pinyin-syllable s)) 12 | 13 | (defn pinyin-block? 14 | "Is this a plain block of Pinyin (no digits or diacritics allowed)? 15 | Also checks string in reverse to prevent false negatives, e.g. hanguo." 16 | [s] 17 | (or (re-matches patterns/pinyin-block s) 18 | (re-matches patterns/pinyin-rev-block (str/join (reverse s))))) 19 | 20 | ;; TODO: does this need to be changed similar to pinyin-block? 21 | (defn pinyin+punct? 22 | "Is this a sentence containing Pinyin without any tone digits or diacritics?" 23 | [s] 24 | (re-matches patterns/pinyin+punct s)) 25 | 26 | (defn pinyin-block+digits? 27 | "Is this a block of Pinyin with tone digits?" 28 | [s] 29 | (re-matches patterns/pinyin+digits s)) 30 | 31 | (defn pinyin+digits+punct? 32 | "Is this a sentence containing Pinyin with tone digits?" 33 | [s] 34 | (re-matches patterns/pinyin+digits+punct s)) 35 | 36 | (defn pinyin-block+diacritics? 37 | "Is this a block of Pinyin with tone diacritics? 38 | Note that this function does not validate the *placement* of diacritics!" 39 | [s] 40 | (pinyin-block? (p/no-diacritics s))) 41 | 42 | (defn pinyin+diacritics+punct? 43 | "Is this a sentence containing Pinyin with tone diacritics? 44 | Note that this function does not validate the *placement* of diacritics!" 45 | [s] 46 | (pinyin+punct? (p/no-diacritics s))) 47 | 48 | (defn hanzi-block? 49 | [s] 50 | (re-matches patterns/hanzi-block s)) 51 | 52 | (spec/def ::pinyin-syllable pinyin-syllable?) 53 | 54 | (spec/def ::pinyin-block pinyin-block?) 55 | 56 | (spec/def ::pinyin-block+digits pinyin-block+digits?) 57 | 58 | (spec/def ::pinyin+digits+punct pinyin+digits+punct?) 59 | 60 | (spec/def ::pinyin-block+diacritics pinyin-block+diacritics?) 61 | 62 | (spec/def ::pinyin+diacritics+punct pinyin+diacritics+punct?) 63 | 64 | (spec/def ::hanzi-block hanzi-block?) 65 | -------------------------------------------------------------------------------- /src/sinostudy/pinyin/patterns.cljc: -------------------------------------------------------------------------------- 1 | (ns sinostudy.pinyin.patterns 2 | (:require [clojure.string :as str] 3 | [sinostudy.pinyin.data :as data])) 4 | 5 | ;; reverse-sorting the list of syllables prevents eager resolution in JS regex 6 | ;; otherwise syllables like "wang" will not match (they eagerly resolve to "wa") 7 | (def rev-syllables 8 | (reverse (sort data/syllables))) 9 | 10 | ;; This crazy concoction is used to validate Pinyin such as "hanguo". 11 | ;; If only checking front to back, it's read as "hang" + "uo", i.e. invalid. 12 | ;; By also validating the block in reverse, we get around this issue. 13 | (def rev-rev-syllables 14 | (reverse (map #(str/join (reverse %)) (sort data/syllables)))) 15 | 16 | (def syllable 17 | (str "(" (str/join "|" rev-syllables) ")")) 18 | 19 | (def rev-syllable 20 | (str "(" (str/join "|" rev-rev-syllables) ")")) 21 | 22 | (def syllable+digit 23 | (str "((" (str/join "|" rev-syllables) ")[012345]?)")) 24 | 25 | (def block 26 | (let [syllable+ (str syllable "+") 27 | syllable* (str "('?" syllable ")*")] 28 | (str "(" syllable+ syllable* ")"))) 29 | 30 | (def rev-block 31 | (let [syllable+ (str rev-syllable "+") 32 | syllable* (str "('?" rev-syllable ")*")] 33 | (str "(" syllable+ syllable* ")"))) 34 | 35 | (def block+digit 36 | (let [syllable+digit+ (str syllable+digit "+") 37 | syllable+digit* (str "('?" syllable+digit ")*")] 38 | (str "(" syllable+digit+ syllable+digit* ")"))) 39 | 40 | ;; note: technically matches non-Latin, e.g. also matches hanzi 41 | (def punct 42 | "[^\\w]+") 43 | 44 | (def pinyin-syllable 45 | (re-pattern (str "(?i)" syllable))) 46 | 47 | (def pinyin-block 48 | (re-pattern (str "(?i)" block))) 49 | 50 | (def pinyin-rev-block 51 | (re-pattern (str "(?i)" rev-block))) 52 | 53 | (def pinyin+punct 54 | (re-pattern (str "(?i)" block "(" block "|" punct ")*"))) 55 | 56 | (def pinyin+digits 57 | (re-pattern (str "(?i)" block+digit))) 58 | 59 | (def pinyin+digits+punct 60 | (re-pattern (str "(?i)" block+digit "(" block+digit "|" punct ")*"))) 61 | 62 | (def hanzi-block 63 | (re-pattern (str "[" (str/join (map str (vals data/hanzi-unicode))) "]+"))) 64 | -------------------------------------------------------------------------------- /src/sinostudy/rim/core.cljc: -------------------------------------------------------------------------------- 1 | (ns sinostudy.rim.core 2 | (:require [clojure.string :as str])) 3 | 4 | ;;;; TEXT-RELATED FUNCTIONS 5 | 6 | ;; based on code examples from StackOverflow: 7 | ;; https://stackoverflow.com/questions/3262195/compact-clojure-code-for-regular-expression-matches-and-their-position-in-string 8 | ;; https://stackoverflow.com/questions/18735665/how-can-i-get-the-positions-of-regex-matches-in-clojurescript 9 | (defn re-pos 10 | "Like re-seq, but returns a map of indexes to matches, not a seq of matches." 11 | [re s] 12 | #?(:clj (loop [out {} 13 | m (re-matcher re s)] 14 | (if (.find m) 15 | (recur (assoc out (.start m) (.group m)) m) 16 | out)) 17 | :cljs (let [flags (fn [re] 18 | (let [m? (.-multiline re) 19 | i? (.-ignoreCase re)] 20 | (str "g" (when m? "m") (when i? "i")))) 21 | re (js/RegExp. (.-source re) (flags re))] 22 | (loop [out {}] 23 | (if-let [m (.exec re s)] 24 | (recur (assoc out (.-index m) (first m))) 25 | out))))) 26 | 27 | (defn- re-handle* 28 | "Helper function for re-handle. Only takes strings." 29 | [s re f] 30 | (let [matches (re-seq re s)] 31 | (if (empty? matches) 32 | s 33 | (let [others (str/split s re) 34 | ;; Dealing with weird behaviour present in Java/JS implementations 35 | ;; causing empty strings as the first split result. 36 | others* (if (= "" (first others)) 37 | (rest others) 38 | others) 39 | results (map f matches) 40 | [c1 c2] (if (str/starts-with? s (first matches)) 41 | [results others*] 42 | [others* results]) 43 | c3 (if (> (count c1) (count c2)) 44 | (subvec (vec c1) (count c2)) 45 | (subvec (vec c2) (count c1)))] 46 | (concat (vec (interleave c1 c2)) c3))))) 47 | 48 | (defn re-handle 49 | "Split s based on re and reinsert the matches of re in s with f applied. 50 | If s is sequential, then will apply f to matches inside any strings in s. 51 | Note: can be chained -- very useful for creating hiccup data out of a string." 52 | [s re f] 53 | (if (sequential? s) 54 | (map #(if (string? %) (re-handle* % re f) %) s) 55 | (re-handle* s re f))) 56 | -------------------------------------------------------------------------------- /src/sinostudy/spec/dictionary.cljc: -------------------------------------------------------------------------------- 1 | (ns sinostudy.spec.dictionary 2 | "Contains all specs pertaining to dictionary entries and search results." 3 | (:require [clojure.spec.alpha :as s] 4 | [clojure.string :as str])) 5 | 6 | ;; There's no built-in predicate for this. 7 | (s/def ::non-blank-string 8 | (s/and string? 9 | (complement str/blank?))) 10 | 11 | ;; TODO: expand on this 12 | (s/def ::hanzi 13 | ::non-blank-string) 14 | 15 | ;; TODO: expand on this 16 | (s/def ::pinyin+digits 17 | ::non-blank-string) 18 | 19 | (s/def ::term 20 | ::non-blank-string) 21 | 22 | (s/def ::script 23 | #{:simplified 24 | :traditional}) 25 | 26 | (s/def ::scripts 27 | (s/coll-of ::script :kind set? :into #{})) 28 | 29 | (s/def ::definition 30 | ::non-blank-string) 31 | 32 | (s/def ::definitions 33 | (s/coll-of ::definition :kind set? :into #{})) 34 | 35 | (s/def ::uses 36 | (s/map-of ::pinyin+digits ::definitions)) 37 | 38 | (s/def ::variations 39 | (s/map-of ::script (s/coll-of ::hanzi :kind set? :into #{}))) 40 | 41 | (s/def :classifier/traditional 42 | ::hanzi) 43 | 44 | (s/def :classifier/simplified 45 | ::hanzi) 46 | 47 | (s/def :classifier/pinyin 48 | (s/coll-of ::pinyin+digits)) 49 | 50 | (s/def ::classifier 51 | (s/keys :req-un [:classifier/traditional 52 | :classifier/simplified 53 | :classifier/pinyin])) 54 | 55 | (s/def ::classifiers 56 | (s/coll-of ::classifier :kind set? :into #{})) 57 | 58 | ;; TODO: expand on this 59 | (s/def ::decomposition 60 | string?) 61 | 62 | (s/def ::frequency 63 | (s/double-in :min 0 64 | :max 1)) 65 | 66 | (s/def ::radical 67 | ::hanzi) 68 | 69 | (s/def ::type 70 | #{"ideographic" 71 | "pictographic" 72 | "pictophonetic"}) 73 | 74 | (s/def ::phonetic 75 | ::hanzi) 76 | 77 | (s/def ::semantic 78 | ::hanzi) 79 | 80 | (s/def ::hint 81 | string?) 82 | 83 | ;; See: https://www.skishore.me/makemeahanzi/ 84 | (s/def ::etymology 85 | (s/keys :req-un [::type] 86 | :opt-un [::phonetic 87 | ::semantic 88 | ::hint])) 89 | 90 | (s/def ::entry 91 | (s/keys :req-un [::term 92 | ::scripts 93 | ::uses] 94 | :opt-un [::radical 95 | ::frequency 96 | ::variations 97 | ::classifiers 98 | ::etymology])) 99 | 100 | (s/def :search-result/hanzi 101 | ::entry) 102 | 103 | (s/def :search-result/pinyin 104 | (s/coll-of ::entry)) 105 | 106 | (s/def :search-result/pinyin+digits 107 | (s/coll-of ::entry)) 108 | 109 | (s/def :search-result/pinyin+diacritics 110 | (s/coll-of ::entry)) 111 | 112 | (s/def :search-result/english 113 | (s/coll-of ::entry)) 114 | 115 | (s/def ::search-result 116 | (s/keys :req-un [::term] 117 | :opt-un [:search-result/hanzi 118 | :search-result/pinyin 119 | :search-result/pinyin+digits 120 | :search-result/pinyin+diacritics 121 | :search-result/english])) 122 | -------------------------------------------------------------------------------- /src/sinostudy/spec/pages.cljc: -------------------------------------------------------------------------------- 1 | (ns sinostudy.spec.pages 2 | (:require [clojure.spec.alpha :as s])) 3 | 4 | (s/def ::category 5 | #{:term :static}) 6 | 7 | (s/def ::page 8 | (s/tuple ::category string?)) 9 | -------------------------------------------------------------------------------- /src/sinostudy/subs.cljs: -------------------------------------------------------------------------------- 1 | (ns sinostudy.subs 2 | (:require [re-frame.core :as rf] 3 | [sinostudy.navigation.pages :as pages])) 4 | 5 | (rf/reg-sub 6 | ::input 7 | (fn [db] 8 | (:input db))) 9 | 10 | (rf/reg-sub 11 | ::pages 12 | (fn [db] 13 | (:pages db))) 14 | 15 | (rf/reg-sub 16 | ::unknown-queries 17 | (fn [db] 18 | (:unknown-queries db))) 19 | 20 | (rf/reg-sub 21 | ::history 22 | (fn [db] 23 | (:history db))) 24 | 25 | (rf/reg-sub 26 | ::queries 27 | (fn [db] 28 | (:queries db))) 29 | 30 | (rf/reg-sub 31 | ::script 32 | (fn [db] 33 | (:script db))) 34 | 35 | (rf/reg-sub 36 | ::mode 37 | (fn [db] 38 | (:mode db))) 39 | 40 | (rf/reg-sub 41 | ::actions 42 | (fn [db] 43 | (:actions db))) 44 | 45 | (rf/reg-sub 46 | ::checked-action 47 | (fn [db] 48 | (:checked-action db))) 49 | 50 | (rf/reg-sub 51 | ::current-evaluation 52 | (fn [db] 53 | (first (:evaluations db)))) 54 | 55 | (rf/reg-sub 56 | ::current-query 57 | (fn [_] 58 | (rf/subscribe [::current-evaluation])) 59 | (fn [evaluation] 60 | (:query evaluation))) 61 | 62 | (rf/reg-sub 63 | ::current-page 64 | (fn [_] 65 | (rf/subscribe [::history])) 66 | (fn [history] 67 | (first history))) 68 | 69 | (rf/reg-sub 70 | ::current-category 71 | (fn [_] 72 | (rf/subscribe [::current-page])) 73 | (fn [page] 74 | (first page))) 75 | 76 | (rf/reg-sub 77 | ::current-id 78 | (fn [_] 79 | (rf/subscribe [::current-page])) 80 | (fn [page] 81 | (second page))) 82 | 83 | (rf/reg-sub 84 | ::current-attribute 85 | (fn [_] 86 | (rf/subscribe [::current-page])) 87 | (fn [page] 88 | (get page 2))) 89 | 90 | (rf/reg-sub 91 | ::content 92 | (fn [_] 93 | [(rf/subscribe [::pages]) 94 | (rf/subscribe [::current-page])]) 95 | (fn [[pages page]] 96 | (when page 97 | (get-in pages (pages/shortened page))))) 98 | 99 | ;; The result filters are stored in a map with pages as keys. 100 | (rf/reg-sub 101 | ::result-filters 102 | (fn [db] 103 | (:result-filters db))) 104 | 105 | (rf/reg-sub 106 | ::current-result-types 107 | (fn [_] 108 | [(rf/subscribe [::current-category]) 109 | (rf/subscribe [::content])]) 110 | (fn [[category content]] 111 | (when (and (= category ::pages/terms) 112 | (not (contains? content :uses))) 113 | (->> (keys content) 114 | (filter (partial not= :term)) 115 | (sort))))) 116 | 117 | (rf/reg-sub 118 | ::current-result-filter 119 | (fn [_] 120 | [(rf/subscribe [::current-category]) 121 | (rf/subscribe [::content]) 122 | (rf/subscribe [::result-filters]) 123 | (rf/subscribe [::current-result-types])]) 124 | (fn [[category 125 | {search-term :term 126 | :as content} 127 | result-filter 128 | current-result-types]] 129 | (when (= category ::pages/terms) 130 | (or (get result-filter search-term) 131 | (apply max-key (comp count (partial get content)) 132 | current-result-types))))) 133 | 134 | ;; the currently active link in the nav section 135 | ;; used to determine which top-level link to disable 136 | (rf/reg-sub 137 | ::current-nav 138 | (fn [_] 139 | (rf/subscribe [::current-page])) 140 | (fn [[page-type key]] 141 | (when (= page-type ::pages/static) key))) 142 | -------------------------------------------------------------------------------- /src/sinostudy/views/common.cljs: -------------------------------------------------------------------------------- 1 | (ns sinostudy.views.common 2 | (:require [sinostudy.navigation.pages :as pages] 3 | [sinostudy.events.scrolling :as scrolling] 4 | [sinostudy.pinyin.eval :as pe] 5 | [sinostudy.rim.core :as rim] 6 | [sinostudy.dictionary.embed :as embed] 7 | [sinostudy.pinyin.core :as p] 8 | [clojure.string :as str] 9 | [re-frame.core :as rf])) 10 | 11 | ;; The on-click handler that dispatches an event to reset the scroll state 12 | ;; is a necessity, given that it is currently not possible to distinguish 13 | ;; between back/forward button navigation events and clicking links. 14 | ;; Obviously, clicking a link should never result in a restored scroll state. 15 | ;; Similarly, some queries (e.g. look-ups) also manually reset the scroll state. 16 | (defn link-term 17 | "Add links to dictionary look-ups for each term in text. 18 | If text is a string, then each character is linked. 19 | If text is a collection (e.g. hiccup), then each collection item is linked." 20 | [text] 21 | (let [ids (range (count text)) 22 | link (fn [term id] 23 | [:a 24 | {:title (str "Look up " term) 25 | :on-click #(rf/dispatch [::scrolling/reset-scroll-state 26 | [::pages/terms term]]) 27 | :href (str "/" (name ::pages/terms) "/" term) 28 | :key (str term "-" id)} 29 | term])] 30 | (map link text ids))) 31 | 32 | (defn hanzi-link 33 | "Link the text, but only link if the text is Hanzi." 34 | [text] 35 | (if (pe/hanzi-block? text) 36 | (link-term text) 37 | text)) 38 | 39 | (defn refr->m 40 | "Transform the embedded reference string into a Clojure map." 41 | [refr] 42 | (let [[hanzi-str pinyin-str] (str/split refr #"\[|\]") 43 | hanzi (str/split hanzi-str #"\|") 44 | pinyin (->> (str/split pinyin-str #" ") 45 | (map p/digits->diacritics)) 46 | traditional (first hanzi) 47 | simplified (if (second hanzi) (second hanzi) traditional)] 48 | {:traditional traditional 49 | :simplified simplified 50 | :pinyin pinyin})) 51 | 52 | (defn zh 53 | "Get the proper Chinese lang attribute based on the script." 54 | [script] 55 | (case script 56 | :traditional "zh-Hant" 57 | :simplified "zh-Hans" 58 | "zh")) 59 | 60 | (defn- handle-ref 61 | "Handle s with f in the given script if s is a reference." 62 | [script f s] 63 | (let [zh (zh script) 64 | use-script (fn [coll] 65 | (get coll (cond 66 | (= (count coll) 1) 0 67 | (= script :simplified) 1 68 | :else 0)))] 69 | (cond 70 | (re-matches embed/refr s) (let [m (refr->m s) 71 | pinyin (->> (:pinyin m) 72 | (map f) 73 | (interpose " ")) 74 | hanzi (script m)] 75 | [:span {:key hanzi} 76 | [:span {:lang zh} 77 | (f hanzi)] 78 | [:span.pinyin 79 | pinyin]]) 80 | 81 | (re-matches embed/hanzi s) (let [hanzi (-> s 82 | (str/split #"\|") 83 | (use-script))] 84 | [:span {:lang zh :key hanzi} 85 | (f hanzi)]) 86 | 87 | (pe/hanzi-block? s) [:span {:lang zh 88 | :key s} 89 | (f s)] 90 | 91 | (re-matches embed/pinyin s) (let [pinyin (-> s 92 | (subs 1 (dec (count s))) 93 | (str/split #" "))] 94 | [:span.pinyin {:key s} 95 | (interpose " " (map f pinyin))]) 96 | 97 | ;; TODO: don't link numbers? i.e. 118 in "Kangxi radical 118" 98 | :else (f s)))) 99 | 100 | (defn handle-refs 101 | "Add hyperlink and style any references to dictionary entries in s. 102 | Script is the preferred script, i.e. traditional or simplified." 103 | [script f s] 104 | ;; The part before the first | matches the full embedded refs; 105 | ;; The part before the second | part matches embedded pinyin; 106 | ;; The latter part matches all remaining words in English or Chinese. 107 | (let [non-ref #"[^\s]+\[[^\]]+\]|\[[^\]]+\]|[^,.;'\"`´+?&()#%\s]+" 108 | handle-ref* (partial handle-ref script f)] 109 | (rim/re-handle s non-ref handle-ref*))) 110 | -------------------------------------------------------------------------------- /src/sinostudy/views/core.cljs: -------------------------------------------------------------------------------- 1 | (ns sinostudy.views.core 2 | (:require [re-frame.core :as rf] 3 | [reagent.core :as reagent] 4 | [clojure.string :as str] 5 | [cljs.reader :as reader] 6 | [sinostudy.db :as db] 7 | [sinostudy.subs :as subs] 8 | [sinostudy.events.core :as events] 9 | [sinostudy.events.scrolling :as scrolling] 10 | [sinostudy.events.actions :as actions] 11 | [sinostudy.views.dictionary :as vd] 12 | [sinostudy.navigation.pages :as pages]) 13 | (:require-macros [sinostudy.macros.core :as macros])) 14 | 15 | ;;;; HELPER FUNCTIONS 16 | 17 | (defn navlink 18 | [from to text] 19 | (let [key (str from "->" to)] 20 | (if (= from to) 21 | [:a.current-page 22 | {:key key} 23 | text] 24 | [:a 25 | {:on-click #(rf/dispatch [::scrolling/reset-scroll-state 26 | [::pages/static to]]) 27 | :href to 28 | :key key} 29 | text]))) 30 | 31 | (defn navify [from links] 32 | (map (fn [[to text]] (navlink from to text)) links)) 33 | 34 | 35 | ;;;; VIEWS 36 | 37 | (defn smart-input [] 38 | "The input field (part of the header form)." 39 | (let [input @(rf/subscribe [::subs/input]) 40 | actions @(rf/subscribe [::subs/actions]) 41 | unknown-queries @(rf/subscribe [::subs/unknown-queries]) 42 | disabled? (not (nil? actions)) 43 | unknown-query? (when input 44 | (contains? unknown-queries (str/trim input)))] 45 | [:<> 46 | [:div#header-input 47 | [:input#input-field 48 | {:type "text" 49 | :class (when unknown-query? "unknown") 50 | :placeholder "look up..." 51 | :auto-capitalize "off" 52 | :auto-correct "off" 53 | :auto-complete "off" ':spell-check false 54 | :disabled disabled? 55 | :value input 56 | :on-change (fn [e] 57 | (when (nil? actions) 58 | (rf/dispatch [::events/on-input-change 59 | (-> e .-target .-value)])))}] 60 | 61 | ;; The button is not actually displayed! 62 | ;; It's kept around to prevent "Enter" submitting the input to an unknown href. 63 | ;; If the button isn't there, pressing enter to select an action in the 64 | ;; action-chooser can misfire a submit event. The on-click event in the submit 65 | ;; button captures these submit events and sends straight them to /dev/null. 66 | [:button 67 | {:type "submit" 68 | :on-click (fn [e] 69 | (.preventDefault e) 70 | (rf/dispatch [::events/submit input]))} 71 | "go"]]])) 72 | 73 | (defn filters 74 | "Filter for what type of dictionary search result should be shown." 75 | [] 76 | (let [{search-term :term} @(rf/subscribe [::subs/content]) 77 | current-filter @(rf/subscribe [::subs/current-result-filter]) 78 | result-types @(rf/subscribe [::subs/current-result-types]) 79 | hidden? (not (and result-types 80 | (> (count result-types) 1)))] 81 | [:div#filters 82 | {:class (when hidden? "hidden")} 83 | (interpose " · " 84 | (for [result-type result-types] 85 | (let [result-type-str (str/capitalize (name result-type))] 86 | [:span {:key result-type} 87 | [:input {:type "radio" 88 | :name "result-filter" 89 | :value result-type 90 | :id result-type 91 | :checked (= current-filter result-type) 92 | :on-change (fn [_] 93 | (rf/dispatch [::events/set-result-filter 94 | search-term 95 | result-type]))}] 96 | [:label {:for result-type 97 | :title (str "View " result-type-str " results")} 98 | result-type-str]])))])) 99 | 100 | (defn header 101 | "The header contains the logo and the main input form." 102 | [] 103 | (let [page @(rf/subscribe [::subs/current-page]) 104 | input @(rf/subscribe [::subs/input])] 105 | [:header 106 | [:div#aligner 107 | [:form {:auto-complete "off"} 108 | [smart-input] 109 | (when-let [title (and (not= input (second page)) 110 | (events/mk-input page))] 111 | [:p#title "↓ " [:em title] " ↓"]) 112 | [filters]]]])) 113 | 114 | (defn main 115 | "The content pane of the site." 116 | [] 117 | (reagent/create-class 118 | {:display-name 119 | "main" 120 | 121 | :reagent-render 122 | (fn [] 123 | (let [category @(rf/subscribe [::subs/current-category]) 124 | content @(rf/subscribe [::subs/content])] 125 | (cond 126 | (= ::pages/static category) (or content (db/static-pages "/404")) 127 | (= ::pages/terms category) [vd/dictionary-page]))) 128 | 129 | ;; Ensures that scroll state is restored when pushing back/forward button. 130 | ;; Sadly, this behaviour is global for all updates, so links/buttons/etc. 131 | ;; must manually dispatch ::scrolling/reset-scroll-state to avoid this! 132 | :component-did-update 133 | (fn [_ _] 134 | (let [page @(rf/subscribe [::subs/current-page])] 135 | (rf/dispatch [::scrolling/load-scroll-state page])))})) 136 | 137 | (defn script-changer [] 138 | "The button used to toggle traditional/simplified Chinese script." 139 | (let [script @(rf/subscribe [::subs/script]) 140 | text (if (= :simplified script) 141 | "Simpl." 142 | "Trad.") 143 | alt-script (if (= :simplified script) 144 | :traditional 145 | :simplified) 146 | title (str "Click to use " (if (= :simplified alt-script) 147 | "simplified characters" 148 | "traditional characters"))] 149 | [:a#script-changer 150 | {:key alt-script 151 | :title title 152 | :on-click #(rf/dispatch [::events/change-script alt-script])} 153 | text])) 154 | 155 | (defn footer [] 156 | "The footer (contains navigation)." 157 | (let [from @(rf/subscribe [::subs/current-nav]) 158 | links [["/" "Home"] ["/about" "About"] ["/settings" "Settings"]]] 159 | [:footer 160 | [:nav (interpose " · " 161 | (conj (vec (navify from links)) 162 | [script-changer {:key "script-changer"}]))]])) 163 | 164 | (defn- action-text 165 | [[action query]] 166 | (case action 167 | ::events/look-up (str "Look up " query) 168 | ::actions/digits->diacritics "Convert to diacritics" 169 | ::actions/diacritics->digits "Convert to digits" 170 | ::actions/close-action-chooser "Cancel")) 171 | 172 | (defn- action-choice 173 | [checked action] 174 | (let [choose-action (fn [e] 175 | (.preventDefault e) 176 | (rf/dispatch [::actions/choose-action action]))] 177 | [:li {:key action} 178 | [:input {:type :radio 179 | :name "action" 180 | :value action 181 | :checked (= action checked) 182 | :id action 183 | :on-change choose-action}] 184 | [:label {:for action 185 | :on-click choose-action} 186 | (action-text action)]])) 187 | 188 | (defn action-chooser [] 189 | "The pop-in dialog that is used to select from different possible options." 190 | (let [actions @(rf/subscribe [::subs/actions]) 191 | checked @(rf/subscribe [::subs/checked-action])] 192 | (when actions 193 | [:fieldset#actions 194 | [:legend "Select an action"] 195 | [:ol 196 | (map (partial action-choice (nth actions checked)) actions)]]))) 197 | 198 | ;;; Project version based on git tag 199 | ;;; See: https://github.com/arrdem/lein-git-version 200 | (defn version-digest 201 | "Current version with link to project on Github." 202 | [attr] 203 | (let [version (reader/read-string (macros/slurp "resources/version.edn"))] 204 | [:address attr 205 | [:a {:href "https://github.com/simongray/sino.study"} 206 | (:tag version)]])) 207 | 208 | (defn app [] 209 | (let [not-home? (not= "/" @(rf/subscribe [::subs/current-nav]))] 210 | [:<> 211 | [action-chooser] 212 | [header not-home?] 213 | [main] 214 | [footer] 215 | [version-digest (when not-home? {:class "hidden"})]])) 216 | -------------------------------------------------------------------------------- /src/sinostudy/views/dictionary.cljs: -------------------------------------------------------------------------------- 1 | (ns sinostudy.views.dictionary 2 | (:require [clojure.string :as str] 3 | [re-frame.core :as rf] 4 | [sinostudy.dictionary.core :as d] 5 | [sinostudy.pinyin.core :as p] 6 | [sinostudy.views.common :as vc] 7 | [sinostudy.subs :as subs] 8 | [sinostudy.navigation.pages :as pages])) 9 | 10 | (defn entry-title 11 | "The title of the term with links to characters -OR- decomposition 12 | into components if the term is a character." 13 | [] 14 | (let [script @(rf/subscribe [::subs/script]) 15 | {term :term 16 | decomposition :decomposition} @(rf/subscribe [::subs/content]) 17 | attribute @(rf/subscribe [::subs/current-attribute]) 18 | zh (vc/zh script) 19 | decomposition* (when (not= decomposition "?") decomposition)] 20 | (cond 21 | (> (count term) 1) 22 | [:h1 {:lang zh} (vc/link-term term)] 23 | 24 | (= attribute "decomposition") 25 | [:h1 {:lang zh 26 | :title (str "Character decomposition")} 27 | (map vc/hanzi-link decomposition*)] 28 | 29 | decomposition* 30 | [:h1 31 | {:lang zh 32 | :title (str "Click to decompose")} 33 | [:a 34 | {:href (str "/" (name ::pages/terms) "/" term "/decomposition")} 35 | term]] 36 | 37 | :else 38 | [:h1 39 | {:lang zh 40 | :title term} 41 | term]))) 42 | 43 | ;; In certain cases, entries may include these "fake" definitions. 44 | ;; They're removed on the frontend since the variant may well be valid in 45 | ;; .e.g. traditional Chinese, but not in simplified Chinese (see: 喂). 46 | (defn no-fake-variants 47 | "Removes definitions of the pattern 'variant of _' if the term is identical." 48 | [script term definitions] 49 | (if (= (count term) 1) 50 | (let [variant-re (re-pattern (if (= script :traditional) 51 | (str "variant of " term) 52 | (str "variant of " term 53 | "\\[|variant of .\\|" term)))] 54 | (filter (comp not (partial re-find variant-re)) definitions)) 55 | definitions)) 56 | 57 | (defn usage-list 58 | "List of definitions for each Pinyin variation of an entry." 59 | [] 60 | (let [script @(rf/subscribe [::subs/script]) 61 | {term :term 62 | uses :uses} @(rf/subscribe [::subs/content])] 63 | [:section#usages 64 | [:dl 65 | (for [[pinyin definitions] uses] 66 | (let [pinyin* (->> (str/split pinyin #" ") 67 | (map p/digits->diacritics) 68 | (map vector) 69 | (map vc/link-term) 70 | (interpose " "))] 71 | [:<> {:key pinyin*} 72 | [:dt.pinyin pinyin*] 73 | [:dd 74 | [:ol 75 | (for [definition (no-fake-variants script term (sort definitions))] 76 | [:li {:key definition} 77 | (let [link (comp vc/link-term vector)] 78 | (vc/handle-refs script link definition))])]]]))]])) 79 | 80 | (defn details-table 81 | "Additional information about the dictionary entry." 82 | [] 83 | (let [script @(rf/subscribe [::subs/script]) 84 | zh (vc/zh script) 85 | {term :term 86 | radical :radical 87 | frequency :frequency 88 | variations :variations 89 | classifiers :classifiers 90 | etymology :etymology} @(rf/subscribe [::subs/content]) 91 | label (d/frequency-label frequency) 92 | entry-script (cond 93 | (contains? variations :traditional) :traditional 94 | (contains? variations :simplified) :simplified) 95 | entry-zh (vc/zh entry-script)] 96 | [:section.details 97 | [:table 98 | [:tbody 99 | [:tr {:key :frequency 100 | :title "Word frequency"} 101 | [:td "Freq"] 102 | [:td (cond 103 | (= label :high) "frequent" 104 | (= label :medium) "average" 105 | (= label :low) "infrequent")]] 106 | (when entry-script 107 | [:tr {:key :variations 108 | :title (str (if (= :traditional entry-script) 109 | "In Traditional Chinese" 110 | "In Simplified Chinese"))} 111 | (if (= entry-script :traditional) 112 | [:td "Trad"] 113 | [:td "Simp"]) 114 | [:td {:lang entry-zh} 115 | (interpose ", " (->> variations 116 | entry-script 117 | (map vector) 118 | (map vc/link-term) 119 | (map (fn [variation] 120 | [:span {:key variation} 121 | variation]))))]]) 122 | (when classifiers 123 | [:tr {:key :classifiers 124 | :title (str "Common classifiers")} 125 | [:td "Cl"] 126 | [:td 127 | (interpose ", " 128 | (for [classifier (sort-by :pinyin classifiers)] 129 | [:span 130 | {:lang zh 131 | :key (script classifier)} 132 | (vc/link-term (vector (script classifier)))]))]]) 133 | (when radical 134 | [:tr {:key :radical 135 | :title "Radical"} 136 | [:td "Rad"] 137 | (if (= term radical) 138 | [:td "The character is a radical"] 139 | [:td {:lang zh} (vc/link-term (vector radical))])]) 140 | (when etymology 141 | (let [{type :type 142 | hint :hint 143 | semantic :semantic 144 | phonetic :phonetic} etymology] 145 | (when-let [etym (cond 146 | (and (or (= type "pictographic") 147 | (= type "ideographic")) hint) 148 | [:<> (let [link (comp vc/link-term vector)] 149 | (vc/handle-refs script link hint))] 150 | 151 | (and (= type "pictophonetic") semantic phonetic) 152 | [:<> 153 | [:span {:lang zh} (vc/link-term semantic)] 154 | " (" hint ") + " 155 | [:span {:lang zh} (vc/link-term phonetic)]])] 156 | [:tr {:key :etymology 157 | :title "Etymology"} 158 | [:td "Hint"] 159 | [:td etym]])))]]])) 160 | 161 | (defn entry 162 | "Dictionary entry for a specific term." 163 | [] 164 | [:main 165 | [:article.entry.full 166 | [entry-title] 167 | [:div.content 168 | [usage-list] 169 | [details-table]]]]) 170 | 171 | (defn- result-entry-uses 172 | "Listed uses of a search result entry." 173 | [script search-term term uses] 174 | (for [[pronunciation definitions] uses] 175 | (let [handle-refs* (partial vc/handle-refs script identity) 176 | all-defs (no-fake-variants script term definitions) 177 | relevant-defs (->> (if search-term 178 | (d/defs-containing-term search-term all-defs) 179 | all-defs)) 180 | other-defs (->> all-defs 181 | (remove (set relevant-defs)))] 182 | (when (not (empty? relevant-defs)) 183 | [:<> {:key pronunciation} 184 | [:dt.pinyin 185 | (p/digits->diacritics pronunciation)] 186 | ;; TODO: resolve relevant and other during save step instead 187 | (into [:dd.understated] (interpose " / " 188 | (concat (->> relevant-defs 189 | (sort) 190 | (map handle-refs*) 191 | (map (fn [x] [:em x]))) 192 | (->> other-defs 193 | (sort) 194 | (map handle-refs*)))))])))) 195 | 196 | (defn- search-result-entry 197 | "Entry in a results-list." 198 | [script search-term {term :term 199 | uses :uses}] 200 | (when-let [entry-uses (result-entry-uses script search-term term uses)] 201 | [:article {:key term} 202 | [:a {:href (str "/" (name :terms) "/" term)} 203 | [:h1 {:lang (vc/zh script)} 204 | term] 205 | [:dl 206 | entry-uses]]])) 207 | 208 | (defn search-results 209 | "List of search result entries." 210 | [] 211 | (let [script @(rf/subscribe [::subs/script]) 212 | content @(rf/subscribe [::subs/content]) 213 | result-filter @(rf/subscribe [::subs/current-result-filter]) 214 | search-term (when (= result-filter :english) 215 | @(rf/subscribe [::subs/current-id])) 216 | in-current-script? #(contains? (:scripts %) script)] 217 | (when-let [entries (get content result-filter)] 218 | [:main#entries 219 | (->> entries 220 | (filter in-current-script?) 221 | (map (partial search-result-entry script search-term)))]))) 222 | 223 | (defn unknown-term 224 | "Slightly more specific than a 404." 225 | [term] 226 | [:main 227 | [:article.full 228 | [:h1 "Sorry,"] 229 | [:p "the dictionary currently doesn't contain an entry for " term "."]]]) 230 | 231 | (defn dictionary-page 232 | "A dictionary page can be 1 of 3 types: entry, search result, or unknown." 233 | [] 234 | (let [{uses :uses} @(rf/subscribe [::subs/content]) 235 | unknown-queries @(rf/subscribe [::subs/unknown-queries]) 236 | search-term @(rf/subscribe [::subs/current-id])] 237 | (cond 238 | (contains? unknown-queries search-term) [unknown-term search-term] 239 | uses [entry] 240 | :else [search-results]))) 241 | -------------------------------------------------------------------------------- /test/sinostudy/pinyin/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns sinostudy.pinyin.core-test 2 | (:require [clojure.test :refer :all] 3 | [sinostudy.pinyin.core :refer :all])) 4 | 5 | (deftest test-umlaut 6 | (testing "umlaut" 7 | (is (= (with-umlaut "VvÜü") "ÜüÜü")))) 8 | 9 | ;; only tests a single char for now! 10 | (deftest test-diacritic 11 | (testing "diacritic" 12 | (testing "added to characters?" 13 | (are [x y] (= x y) 14 | \a (with-diacritic \a 0) 15 | \ā (with-diacritic \a 1) 16 | \á (with-diacritic \a 2) 17 | \ǎ (with-diacritic \a 3) 18 | \à (with-diacritic \a 4) 19 | \a (with-diacritic \a 5) 20 | \A (with-diacritic \A 0) 21 | \Ā (with-diacritic \A 1) 22 | \Á (with-diacritic \A 2) 23 | \Ǎ (with-diacritic \A 3) 24 | \À (with-diacritic \A 4) 25 | \A (with-diacritic \A 5))) 26 | (testing "tone out of range?" 27 | (is (thrown? IndexOutOfBoundsException (with-diacritic \a 6)))) 28 | (testing "string instead of char?" 29 | (is (nil? (with-diacritic "a" 1)))))) 30 | 31 | (deftest test-diacritic-index 32 | (testing "diacritic-index" 33 | (testing "a-rule" 34 | (is (= (diacritic-index "ao1") 0)) 35 | (is (= (diacritic-index "lang4") 1)) 36 | (is (= (diacritic-index "quan") 2))) 37 | (testing "e-rule" 38 | (is (= (diacritic-index "eng") 0)) 39 | (is (= (diacritic-index "heng1") 1)) 40 | (is (= (diacritic-index "zheng") 2))) 41 | (testing "ou-rule" 42 | (is (= (diacritic-index "ou") 0)) 43 | (is (= (diacritic-index "tou2") 1)) 44 | (is (= (diacritic-index "zhou") 2))) 45 | (testing "general rule" 46 | (is (= (diacritic-index "e") 0)) 47 | (is (= (diacritic-index "eng") 0)) 48 | (is (= (diacritic-index "long2") 1)) 49 | (is (= (diacritic-index "lan") 1)) 50 | (is (= (diacritic-index "kuo4") 2))) 51 | (testing "mixed case" 52 | (is (= (diacritic-index "WANG") 1)) 53 | (is (= (diacritic-index "lI0") 1)) 54 | (is (= (diacritic-index "Qu4") 1))) 55 | (testing "undefined cases (returns nil)" 56 | (is (thrown? NullPointerException (diacritic-index nil))) 57 | (is (nil? (diacritic-index ""))) 58 | (is (nil? (diacritic-index "4"))) 59 | (is (nil? (diacritic-index [1 2 3]))) 60 | (is (nil? (diacritic-index {:foo :bar}))) 61 | (is (nil? (diacritic-index {:foo :bar})))))) 62 | 63 | (deftest test-digit->diacritic 64 | (testing "digit->diacritic" 65 | (testing "converts properly?" 66 | (is (= (digit->diacritic "long3") "lǒng")) 67 | (is (= (digit->diacritic "er2") "ér"))) 68 | (testing "exceptions" 69 | (is (thrown? NumberFormatException (digit->diacritic "long"))) 70 | (is (thrown? ClassCastException (digit->diacritic [1 2 3])))))) 71 | 72 | (deftest test-digits->diacritics 73 | (testing "digits->diacritics" 74 | (testing "converts properly?" 75 | (is (= (digits->diacritics "ni3hao3, ni3 shi4 shei2?") "nǐhǎo, nǐ shì shéi?")) 76 | (is (= (digits->diacritics "long") "long")) 77 | (is (= (digits->diacritics "") ""))) 78 | (testing "non-strings" 79 | (is (= (digits->diacritics []) [])) 80 | (is (= (digits->diacritics [1 2 3]) [1 2 3])) 81 | (is (= (digits->diacritics 0) 0)) 82 | (is (= (digits->diacritics \a) \a))))) 83 | --------------------------------------------------------------------------------