├── .gitignore
├── .idea
├── .name
├── ClojureProjectResolveSettings.xml
├── codeStyles
│ └── codeStyleConfig.xml
├── compiler.xml
├── encodings.xml
├── misc.xml
├── modules.xml
└── vcs.xml
├── Dockerfile
├── README.md
├── deploy.sh
├── dev
└── src
│ ├── figwheel_repl.clj
│ └── user.clj
├── project.clj
├── resources
├── config.edn
└── public
│ ├── android-chrome-192x192.png
│ ├── android-chrome-512x512.png
│ ├── apple-touch-icon.png
│ ├── browserconfig.xml
│ ├── css
│ └── main.css
│ ├── favicon-16x16.png
│ ├── favicon-32x32.png
│ ├── favicon.ico
│ ├── favicon_generator.md
│ ├── html_code.html
│ ├── img
│ ├── favicon.png
│ ├── favicon.svg
│ ├── logo.svg
│ ├── logo_dark.svg
│ ├── logo_dark_min.svg
│ ├── logo_min.svg
│ └── search.svg
│ ├── index.html
│ ├── mstile-150x150.png
│ ├── safari-pinned-tab.svg
│ └── site.webmanifest
├── src
└── sinostudy
│ ├── cofx.cljs
│ ├── config.cljs
│ ├── core.cljs
│ ├── db.cljs
│ ├── dictionary
│ ├── core.cljc
│ ├── data.cljc
│ ├── embed.cljc
│ └── load.clj
│ ├── events
│ ├── actions.cljs
│ ├── core.cljs
│ └── scrolling.cljs
│ ├── fx.cljs
│ ├── macros
│ └── core.clj
│ ├── navigation
│ ├── handler.clj
│ ├── pages.cljc
│ └── routes.cljs
│ ├── pinyin
│ ├── core.cljc
│ ├── data.cljc
│ ├── eval.cljc
│ └── patterns.cljc
│ ├── rim
│ └── core.cljc
│ ├── spec
│ ├── dictionary.cljc
│ └── pages.cljc
│ ├── subs.cljs
│ └── views
│ ├── common.cljs
│ ├── core.cljs
│ └── dictionary.cljs
└── test
└── sinostudy
└── pinyin
└── core_test.clj
/.gitignore:
--------------------------------------------------------------------------------
1 | /*.log
2 | /target
3 | /*-init.clj
4 | /resources/public/js/compiled
5 | out
6 | *.iml
7 | resources/tatoeba/links.csv
8 | resources/tatoeba/sentences_detailed.csv
9 | resources/tatoeba/tags.csv
10 | resources/tatoeba/users_sentences.csv
11 | resources/makemeahanzi/
12 | resources/Unihan
13 | resources/frequency/
14 |
15 | # Created by https://www.gitignore.io/api/macos,clojure,intellij,leiningen
16 |
17 | ### Clojure ###
18 | pom.xml
19 | pom.xml.asc
20 | *.jar
21 | *.class
22 | /lib/
23 | /classes/
24 | /target/
25 | /checkouts/
26 | .lein-deps-sum
27 | .lein-repl-history
28 | .lein-plugins/
29 | .lein-failures
30 | .nrepl-port
31 |
32 | ### Intellij ###
33 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
34 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
35 |
36 | # User-specific stuff:
37 | .idea/**/workspace.xml
38 | .idea/**/tasks.xml
39 | .idea/dictionaries
40 |
41 | # Sensitive or high-churn files:
42 | .idea/**/dataSources/
43 | .idea/**/dataSources.ids
44 | .idea/**/dataSources.xml
45 | .idea/**/dataSources.local.xml
46 | .idea/**/sqlDataSources.xml
47 | .idea/**/dynamic.xml
48 | .idea/**/uiDesigner.xml
49 |
50 | # Gradle:
51 | .idea/**/gradle.xml
52 | .idea/**/libraries
53 |
54 | # CMake
55 | cmake-build-debug/
56 |
57 | # Mongo Explorer plugin:
58 | .idea/**/mongoSettings.xml
59 |
60 | ## File-based project format:
61 | *.iws
62 |
63 | ## Plugin-specific files:
64 |
65 | # IntelliJ
66 | /out/
67 |
68 | # mpeltonen/sbt-idea plugin
69 | .idea_modules/
70 |
71 | # JIRA plugin
72 | atlassian-ide-plugin.xml
73 |
74 | # Cursive Clojure plugin
75 | .idea/replstate.xml
76 |
77 | # Ruby plugin and RubyMine
78 | /.rakeTasks
79 |
80 | # Crashlytics plugin (for Android Studio and IntelliJ)
81 | com_crashlytics_export_strings.xml
82 | crashlytics.properties
83 | crashlytics-build.properties
84 | fabric.properties
85 |
86 | ### Intellij Patch ###
87 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
88 |
89 | # *.iml
90 | # modules.xml
91 | # .idea/misc.xml
92 | # *.ipr
93 |
94 | # Sonarlint plugin
95 | .idea/sonarlint
96 |
97 | ### Leiningen ###
98 |
99 | ### macOS ###
100 | *.DS_Store
101 | .AppleDouble
102 | .LSOverride
103 |
104 | # Icon must end with two \r
105 | Icon
106 |
107 | # Thumbnails
108 | ._*
109 |
110 | # Files that might appear in the root of a volume
111 | .DocumentRevisions-V100
112 | .fseventsd
113 | .Spotlight-V100
114 | .TemporaryItems
115 | .Trashes
116 | .VolumeIcon.icns
117 | .com.apple.timemachine.donotpresent
118 |
119 | # Directories potentially created on remote AFP share
120 | .AppleDB
121 | .AppleDesktop
122 | Network Trash Folder
123 | Temporary Items
124 | .apdisk
125 |
126 | # End of https://www.gitignore.io/api/macos,clojure,intellij,leiningen
127 |
128 | .idea/codeStyleSettings.xml
129 | cedict_ts.u8
130 | resources/version.edn
131 | .rebel_readline_history
132 |
--------------------------------------------------------------------------------
/.idea/.name:
--------------------------------------------------------------------------------
1 | sino.study
--------------------------------------------------------------------------------
/.idea/ClojureProjectResolveSettings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | IDE
5 |
6 |
--------------------------------------------------------------------------------
/.idea/codeStyles/codeStyleConfig.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM openjdk:10
2 | ARG JARPATH
3 | ARG JARFILE
4 | ENV JARFILE "$JARFILE"
5 | MAINTAINER Simon Gray
6 | ADD "$JARPATH" /usr/src/myapp/
7 | WORKDIR /usr/src/myapp
8 | EXPOSE 8080
9 | CMD java -XX:+PrintFlagsFinal -jar "$JARFILE"
10 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 |
3 | This is the repository for [sino·study](http://sino.study),
4 | a web app designed to assist students of the Chinese language in various ways.
5 | At the moment, it is primarily an advanced dictionary,
6 | but in the future it will also include functionality for grammatical analysis.
7 |
8 | It is a single-page application written in [Clojure](https://clojure.org/)
9 | and [ClojureScript](https://clojurescript.org/).
10 | The frontend uses [Reagent](https://github.com/reagent-project/reagent)
11 | and [re-frame](https://github.com/Day8/re-frame).
12 | Furthermore, it makes use of [secretary](https://github.com/gf3/secretary)
13 | and [Accountant](https://github.com/venantius/accountant) for frontend routing.
14 | The backend is a [Compojure](https://github.com/weavejester/compojure) service
15 | that is served by [http-kit](https://github.com/http-kit/http-kit).
16 | Communication between the backend web service and the frontend app is
17 | facilitated by [Transit](https://github.com/cognitect/transit-format).
18 | The functionality is built around my own wrapper library for Stanford CoreNLP,
19 | [Computerese](https://github.com/simongray/Computerese), as well as numerous
20 | open-source datasets, most notably [CC-CEDICT](https://cc-cedict.org/) and
21 | [makemeahanzi](https://github.com/skishore/makemeahanzi).
22 |
23 |
24 | # Development
25 | * Running the sino.study app requires the sinostudy-data git repository to be
26 | located at ~/Code/sinostudy-data. Make sure that directory exists and pull from:
27 | [sino.study-data](https://github.com/simongray/sino.study-data).
28 | **Note: this applies to both dev and production environments.**
29 |
30 | * The REPL starts out in the `user` ns with various other namespaces required.
31 | The user ns also includes relevant custom functions for development.
32 | Changes to e.g. dictionary data structures and most other backend development
33 | is best tested in the REPL.
34 |
35 |
36 | ## Developing with live re-loading
37 | Typical development involves running a development web service locally,
38 | while accessing the data from the service through a live-reloading frontend app.
39 |
40 | ### Local backend server
41 | Start a lein nREPL for the project, then evaluate the following:
42 |
43 | ````
44 | (start)
45 | ````
46 |
47 | This will load the dictionary and start a production server using html-kit.
48 | Wait a bit, then browse to [http://localhost:8080](http://localhost:8080).
49 |
50 | The default port is `8080`, but it can be configured in `resources/config.edn`.
51 | The system can be stopped again by evaluating:
52 |
53 | ```
54 | (stop)
55 | ```
56 |
57 | Pieces of state can be reloaded evaluating e.g. ```(restart #'dict)```.
58 |
59 | ### Run live-reloading frontend app
60 | For frontend development, the fighweel experience is currently [integrated with
61 | Cursive using `fighweel-sidecar`](https://github.com/bhauman/lein-figwheel/wiki/Running-figwheel-in-a-Cursive-Clojure-REPL).
62 | An IntelliJ REPL should be configured using the `Use clojure.main in normal JVM
63 | process` option and with `dev/src/clj/figwheel_repl.clj` set as the parameter.
64 |
65 | As an alternative, it is also possible to simply run ```lein figwheel dev```.
66 |
67 | Launch the REPL once configured, wait a bit, then browse to
68 | [http://localhost:3449](http://localhost:3449).
69 |
70 | Figwheel will automatically push CLJS changes to the browser,
71 | while preserving the application state. A hard page reload will reset the state.
72 |
73 | If there are any issues getting the app to show up (e.g. blank page),
74 | then try clearing the browser cache. Note that most functionality will require
75 | the development backend service to be running too.
76 |
77 | ### Running dev app on a mobile phone
78 | If I want to test on a mobile phone I will typically run
79 |
80 | ````
81 | ipconfig getifaddr en0
82 | ````
83 |
84 | to get the local IP address of my Mac and then visit that address on port 3449
85 | (or whatever port is being used).
86 |
87 |
88 | ## Deploying to production
89 | Currently, there are three steps to deploying a production Docker image:
90 |
91 | 1. compiling an uberjar
92 | 2. building the docker image
93 | 3. running a container from the image in production
94 |
95 | ### Compiling an uberjar for rapid deployment
96 | This will create a standalone JAR file including the entire compiled app
97 | (note: target JAR filename subject to change).
98 |
99 | ````
100 | lein uberjar
101 | ````
102 |
103 | The uberjar is a self-contained backend+frontend, although it does expect
104 | the sino.study-datafiles repo to be present at the correct path!
105 | To test that the uberjar was packaged correctly, run:
106 |
107 | ````
108 | java -jar target/sinostudy-standalone.jar
109 | ````
110 |
111 | (remember to replace $VERSION with the correct version number)
112 |
113 | Wait a bit, then browse to [http://localhost:8080](http://localhost:8080).
114 |
115 |
116 | ### Building and deploying docker image
117 |
118 | To build an image from the Dockerfile, run:
119 |
120 | ````
121 | docker build -t simongray/sino.study:latest -t simongray/sino.study:${version} --build-arg JARPATH=${jarpath} --build-arg JARFILE=${jarfile} .
122 | ````
123 |
124 | Note: this requires the uberjar built during the previous step as well as the
125 | correct name and path of the jarfile.
126 |
127 | It can then be pushed and pulled from the docker store by running e.g.
128 |
129 | ````
130 | docker push simongray/sino.study
131 | docker pull simongray/sino.study
132 | ````
133 |
134 | The image can be run as a Docker container using:
135 |
136 | ````
137 | # in production
138 | docker run -v /root/Code/sinostudy-data:/root/Code/sinostudy-data -p 80:8080 simongray/sino.study:latest
139 |
140 | # testing locally
141 | docker run -v /Users/simon/Code/sinostudy-data:/root/Code/sinostudy-data -p 80:8080 simongray/sino.study:latest
142 | ````
143 |
144 | (this will tunnel the exposed `8080` port of the docker container
145 | to the production system's port `80`)
146 |
147 | Wait a little while, then visit [http://localhost:80](http://localhost:80)
148 | or [http://sino.study](http://sino.study).
149 |
150 | Use ````docker ps -a```` to list all containers and their assigned names.
151 | Stop and remove containers using other relevant docker commands.
152 |
--------------------------------------------------------------------------------
/deploy.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # This script automates the following:
4 | # * compiles an uberjar
5 | # * builds and tags a docker image containing the uberjar
6 | # * pushes the docker image to the docker store
7 |
8 | re=":tag \"v([^\"]+)"
9 |
10 | # Needs to run a lein action at least once to make sure version.edn is built.
11 | lein version
12 |
13 | if [[ $(cat resources/version.edn) =~ $re ]]; then
14 | version=${BASH_REMATCH[1]}
15 | jarfile="sinostudy-standalone.jar"
16 | jarpath="target/sinostudy-standalone.jar"
17 | echo "version: ${version}";
18 |
19 | echo "removing old build artifacts"
20 | lein clean
21 |
22 | echo "building uberjar: ${jarfile}"
23 | lein uberjar
24 |
25 | echo "building docker image"
26 | docker build -t simongray/sino.study:latest -t simongray/sino.study:${version} --build-arg JARPATH=${jarpath} --build-arg JARFILE=${jarfile} .
27 |
28 | echo "pushing docker image"
29 | docker push simongray/sino.study
30 | else
31 | echo "ERROR: could not determine current version"
32 | fi
33 |
--------------------------------------------------------------------------------
/dev/src/figwheel_repl.clj:
--------------------------------------------------------------------------------
1 | ;;; See: https://github.com/bhauman/lein-figwheel/wiki/Running-figwheel-in-a-Cursive-Clojure-REPL
2 | (use 'figwheel-sidecar.repl-api)
3 | (start-figwheel!) ;; <-- fetches configuration
4 | (cljs-repl)
--------------------------------------------------------------------------------
/dev/src/user.clj:
--------------------------------------------------------------------------------
1 | (ns user
2 | (:require [clojure.java.io :as io]
3 | [clojure.spec.alpha :as s]
4 | [clojure.spec.gen.alpha :as gen]
5 | [mount.core :as mount :refer [start stop]]
6 | [mount-up.core :as mount-up]
7 | [sinostudy.spec.dictionary :as sd]
8 | [sinostudy.dictionary.core :as d]
9 | [sinostudy.dictionary.load :as load]
10 | [sinostudy.navigation.handler :as handler :refer [dict config server]]
11 | [sinostudy.pinyin.core :as p]))
12 |
13 | (mount-up/on-upndown :info mount-up/log :before)
14 |
15 | (defn restart
16 | "Restart one or more pieces of mount state."
17 | [& states]
18 | (apply stop states)
19 | (apply start states))
20 |
21 | (defn look-up*
22 | "A version of look-up that performs both the backend and frontend processing.
23 | Useful for testing what the search results on the frontend look like."
24 | [term]
25 | (->> term
26 | (d/look-up dict)
27 | (d/reduce-result)
28 | (d/sort-result)))
29 |
--------------------------------------------------------------------------------
/project.clj:
--------------------------------------------------------------------------------
1 | (defproject sinostudy "_"
2 | :description "The sino.study project."
3 | :url "http://sino.study"
4 | :min-lein-version "2.8.1"
5 | :source-paths ["src"]
6 | :resource-paths ["resources"]
7 | :jar-name "sinostudy.jar"
8 | :uberjar-name "sinostudy-standalone.jar"
9 |
10 | :dependencies [[org.clojure/clojure "1.10.1"]
11 | [org.clojure/clojurescript "1.10.520"]
12 | [org.clojure/data.csv "0.1.4"]
13 | [org.clojure/test.check "0.10.0"]
14 | [computerese "0.1.0-SNAPSHOT"]
15 | [mount "0.1.16"]
16 | [tolitius/mount-up "0.1.2"]
17 | [reagent "0.8.1"]
18 | [re-frame "0.10.8"]
19 | [day8.re-frame/http-fx "0.1.6"]
20 | [clj-commons/secretary "1.2.4"]
21 | [venantius/accountant "0.2.4"]
22 | [com.cognitect/transit-clj "0.8.313"]
23 | [com.cognitect/transit-cljs "0.8.256"]
24 | [compojure "1.6.1"]
25 | [http-kit "2.3.0"]
26 | [ring/ring-defaults "0.3.2"]
27 | [clj-json "0.5.3"]]
28 |
29 | :plugins [[me.arrdem/lein-git-version "2.0.8"]
30 | [lein-cljsbuild "1.1.7"]]
31 |
32 | :git-version {:version-file "resources/version.edn"
33 | :version-file-keys [:tag ; Name of the last git tag if any
34 | :ahead ; Number of commits ahead of the last tag, or 0
35 | :ahead? ; Is the head ahead by more than 0 commits
36 | :ref ; The full current ref
37 | :ref-short ; The "short" current ref
38 | :branch ; The name of the current branch
39 | :dirty? ; Optional. Boolean. Are there un-committed changes.
40 | :message ; Optional. The last commit message when clean.
41 | :timestamp]} ; Optional. The last commit date when clean.]}
42 |
43 | :profiles {:dev {:dependencies [[binaryage/devtools "0.9.10"]
44 | [day8.re-frame/re-frame-10x "0.4.2"]
45 | [figwheel-sidecar "0.5.19"]] ; for Cursive-integrated figwheel REPL
46 | :plugins [[lein-figwheel "0.5.19"]] ; for running `lein fighweel dev`
47 | :source-paths ["dev/src"]
48 | :repl-options {:init-ns user}}
49 |
50 | :uberjar {:main sinostudy.handler
51 | :aot [sinostudy.handler]
52 | :prep-tasks ["clean"
53 | "compile"
54 | ["cljsbuild" "once" "min"]]}}
55 |
56 | :clean-targets ^{:protect false} ["resources/public/js/compiled" "target"]
57 | :figwheel {:css-dirs ["resources/public/css"]}
58 | :cljsbuild {:builds [{:id "dev"
59 | :source-paths ["src"]
60 | :figwheel {:on-jsload "sinostudy.core/mount-root"}
61 | :compiler {:main sinostudy.core
62 | :output-to "resources/public/js/compiled/app.js"
63 | :output-dir "resources/public/js/compiled/out"
64 | :asset-path "js/compiled/out"
65 | :source-map-timestamp true
66 | :optimizations :none
67 | :closure-defines {"re_frame.trace.trace_enabled_QMARK_" true}
68 | :preloads [devtools.preload
69 | day8.re-frame-10x.preload]
70 | :external-config {:devtools/config {:features-to-install :all}}}}
71 |
72 | {:id "min"
73 | :source-paths ["src"]
74 | :compiler {:main sinostudy.core
75 | :output-to "resources/public/js/compiled/app.js"
76 | :optimizations :advanced
77 | :closure-defines {goog.DEBUG false}
78 | :pretty-print false}}]})
79 |
--------------------------------------------------------------------------------
/resources/config.edn:
--------------------------------------------------------------------------------
1 | {:server {:port {:internal 8080
2 | :external 80}}
3 | :evaluation {:delay 250}}
--------------------------------------------------------------------------------
/resources/public/android-chrome-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simongray/sino.study/b1b2954011841bc96449a1aa61eb51656930aee5/resources/public/android-chrome-192x192.png
--------------------------------------------------------------------------------
/resources/public/android-chrome-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simongray/sino.study/b1b2954011841bc96449a1aa61eb51656930aee5/resources/public/android-chrome-512x512.png
--------------------------------------------------------------------------------
/resources/public/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simongray/sino.study/b1b2954011841bc96449a1aa61eb51656930aee5/resources/public/apple-touch-icon.png
--------------------------------------------------------------------------------
/resources/public/browserconfig.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | #da532c
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/resources/public/css/main.css:
--------------------------------------------------------------------------------
1 | /* === GLOBAL === */
2 |
3 | * {
4 | font-family: "Gill Sans", "Gill Sans MT", Calibri, "KaiTi", "楷体", STKaiti, "华文楷体", sans-serif;
5 | font-weight: 300;
6 | color: #555;
7 | padding: 0;
8 | margin: 0;
9 | hyphens: auto;
10 |
11 | /* otherwise safari fonts become too thin */
12 | -webkit-font-smoothing: subpixel-antialiased;
13 | }
14 |
15 | /* Hanzi are made a bit darker than latin text to make them stand out */
16 | :lang(zh) {
17 | letter-spacing: 0.4ch;
18 | color: #333;
19 | }
20 |
21 | :lang(en) {
22 | letter-spacing: 0.2ch;
23 | }
24 |
25 | /* https://www.sitepoint.com/understanding-and-using-rem-units-in-css/ */
26 | html {
27 | font-size: 62.5%; /* = 10px (down from 16px) */
28 | }
29 |
30 | html, body {
31 | height: 100%;
32 | }
33 |
34 | a {
35 | /* So that links won't suddenly reset a custom font to the global one. */
36 | font-family: inherit;
37 | color: inherit;
38 | text-decoration: none;
39 | }
40 |
41 | a:hover {
42 | color: #4477DD;
43 | }
44 |
45 | /* headings and content inside them get the serif fonts */
46 | h1, h2, h3, h1 *, h2 *, h3 * {
47 | color: #555555;
48 | font-family: Didot, "Didot LT STD", "Hoefler Text", Garamond, "Times New Roman", "KaiTi", "楷体", STKaiti, "华文楷体", serif;
49 | }
50 |
51 | /* basic document margin */
52 | h1, h2, p, ol, ul, dl, dd, table {
53 | margin-top: 1rem;
54 | margin-bottom: 2rem;
55 | }
56 |
57 | h1 + p {
58 | margin-top: -1rem;
59 | }
60 |
61 | h1 {
62 | font-size: 2.8rem;
63 | }
64 |
65 | h2 {
66 | font-size: 2.4rem;
67 | }
68 |
69 | p {
70 | font-size: 1.8rem;
71 | }
72 |
73 | ol, ul {
74 | margin-left: 4rem;
75 | }
76 |
77 | section + section {
78 | margin-top: 1.5rem;
79 | }
80 |
81 | dt {
82 | font-size: 2.4rem;
83 | }
84 |
85 | li {
86 | font-size: 1.8rem;
87 | margin-bottom: 0.8rem;
88 | }
89 |
90 | table {
91 | font-size: 1.4rem;
92 |
93 | /* remove double border */
94 | border-collapse: collapse;
95 | }
96 |
97 | td, th {
98 | padding: 1rem;
99 | }
100 |
101 | tr {
102 | border-left: 2px solid #DD8888;
103 | border-right: 2px solid #DD8888;
104 | }
105 |
106 | tr:first-child {
107 | border-top: 2px solid #DD8888;
108 | }
109 |
110 | tr:last-child {
111 | border-bottom: 2px solid #DD8888;
112 | }
113 |
114 |
115 |
116 |
117 | /* === GLOBAL CLASSES === */
118 | .pinyin {
119 | color: #DD8888;
120 | font-family: Didot, "Didot LT STD", "Hoefler Text", Garamond, "Times New Roman", "KaiTi", "楷体", STKaiti, "华文楷体", serif;
121 | }
122 |
123 | .pinyin::before {
124 | content: "[";
125 | }
126 |
127 | .pinyin::after {
128 | content: "]";
129 | }
130 |
131 |
132 |
133 |
134 |
135 | /* === ANIMATIONS === */
136 |
137 | @keyframes fade-in {
138 | from {
139 | opacity: 0;
140 | }
141 | to {
142 | opacity: 1;
143 | }
144 | }
145 |
146 |
147 |
148 |
149 | /* === MAIN & HEADER === */
150 |
151 |
152 | div#app {
153 | height: 100%; /* also set for all parent elements */
154 |
155 | display: flex;
156 | flex-direction: column;
157 | align-items: center;
158 | justify-content: space-between;
159 |
160 | background-color: #444;
161 | background-image: radial-gradient(ellipse at bottom, rgba(255,255,255,0.2) 0%, transparent 100%);
162 | box-shadow: 0 0 15rem 0 rgba(0, 0, 0, 0.3) inset;
163 | }
164 |
165 | /* would have styled BODY or HTML instead if React could access them */
166 | main {
167 | /* take up all middle space and allow for scrolling */
168 | height: 100%;
169 | width: calc(100% - 1rem);
170 | max-width: 65rem;
171 | overflow: auto;
172 | padding: 0.5rem;
173 | flex-grow: 1;
174 | }
175 |
176 | main#splash {
177 | display: flex;
178 | flex-direction: column;
179 | align-items: center;
180 | justify-content: center;
181 | }
182 |
183 | main#splash img {
184 | animation: fade-in 0.5s ease;
185 | width: calc(100% - 1rem);
186 | margin: 2rem auto;
187 | }
188 |
189 | main#splash blockquote {
190 | animation: fade-in 1s ease;
191 | font-size: 1.8rem;
192 | line-height: 1.5;
193 | color: #999;
194 | width: calc(100% - 6rem);
195 | border-left: 0.5rem solid #333;
196 | border-right: 0.5rem solid #333;
197 | border-radius: 1rem;
198 | padding: 0 2rem;
199 | text-align: justify;
200 | }
201 |
202 | /* prevent squashing on mobile when the soft-keyboard is open */
203 | @media only screen and (max-height: 25rem) {
204 | main#splash blockquote {
205 | display: none;
206 | }
207 | }
208 |
209 | main#splash a {
210 | color: #DD9999;
211 | font-variant: small-caps;
212 | white-space: nowrap;
213 | }
214 |
215 | main#splash a:hover {
216 | text-decoration: underline;
217 | }
218 |
219 | header {
220 | animation: fade-in 1s ease;
221 |
222 | width: 100%;
223 |
224 | background: #BB5544;
225 | box-shadow: 0 0 1rem 0 rgba(0, 0, 0, 0.3);
226 | background-image: linear-gradient(to top, #BB5544, #CC6644);
227 |
228 | /* make sure search results are covered by shadow */
229 | z-index: 1;
230 | }
231 |
232 | /* TODO: do I need this?
233 | aligner is necessary for limiting width and centering horisontally!
234 | without "min-width:98%;" in .vcenter, .vcentor gets crammed,
235 | so this extra class is necessary to get a max-width.
236 | */
237 | header div#aligner {
238 | max-width: 65rem; /* sync with main */
239 | margin: auto;
240 | padding: 0.5rem;
241 | text-align: center;
242 | vertical-align: middle;
243 |
244 | /* makes logo img able to assume 0 height */
245 | line-height: 0;
246 | }
247 |
248 |
249 | /* Also known as the #study-form */
250 | div#header-input {
251 | display: flex;
252 | }
253 |
254 | /* Formerly known as the #study-input */
255 | div#header-input > input {
256 | animation: fade-in 1s ease;
257 | transition: all 0.3s;
258 |
259 | font-size: 2.4rem;
260 | color: #CCC;
261 |
262 | /* fixes overflow in Chrome iPhone 5/SE device inspector */
263 | max-width: 100%;
264 | box-sizing: border-box;
265 |
266 | padding: 0.5rem 3.5rem 0.5rem 0.5rem;
267 | border: none;
268 | border-radius: 0.3rem;
269 | background: #2A2A2A url("/img/search.svg");
270 | background-repeat: no-repeat;
271 | background-size: auto calc(100% - 1.5rem);
272 | background-position: calc(100% - 0.75rem) 50%;
273 | box-shadow: inset 0 0 0.5rem 0 rgba(0, 0, 0, 1),
274 | 0 0 0.5rem 0 rgba(255, 255, 255, 0.3);
275 | /* Take up full width */
276 | flex: 1;
277 | }
278 |
279 | div#header-input > input:focus {
280 | padding: 0.5rem;
281 | background-position: calc(100% + 3.5rem) 50%;
282 | }
283 |
284 | /* Grey colouring of input when action-chooser is active */
285 | div#header-input > input[disabled],
286 | div#header-input > input::placeholder {
287 | color: #666;
288 | }
289 |
290 | /* Grey colouring of input when action-chooser is active */
291 | div#header-input > input.unknown {
292 | text-decoration: underline;
293 | text-decoration-color: #884433;
294 | text-decoration-style: dotted;
295 | }
296 |
297 | /* Formerly known as the #study-button */
298 | div#header-input > button {
299 | border: 0;
300 | padding: 0;
301 | font-size: 0;
302 | width: 0;
303 | }
304 |
305 | header p#title {
306 | color: #FFBBBB;
307 | font-size: 1.8rem;
308 | margin: 1.7rem 0 -0.5rem 0;
309 | animation: fade-in 1s ease;
310 | }
311 |
312 | header p#title em {
313 | color: #FFBBBB;
314 | font-style: bold;
315 | }
316 |
317 | header p#title + div#filters {
318 | margin-top: 3.2rem;
319 | }
320 |
321 | header p#title + div#filters.hidden {
322 | margin-top: 1.8rem;
323 | }
324 |
325 |
326 |
327 | /* === VERSION NUMBER === */
328 | address {
329 | animation: fade-in 3s ease;
330 | color: #333;
331 | font-size: 1.4rem;
332 | position: absolute;
333 | padding: 1rem;
334 | right: 0;
335 | bottom: 6rem;
336 | transition: all 0.5s;
337 | }
338 |
339 | address.hidden {
340 | opacity: 0;
341 | }
342 |
343 | @media only screen and (max-height: 15rem) {
344 | address {
345 | display: none;
346 | }
347 | }
348 |
349 |
350 |
351 |
352 | /* === FILTERS === */
353 | div#filters {
354 | transition: all 0.2s;
355 | margin: 1.8rem 0 0.7rem 0; /* TODO: weird values here */
356 | font-size: 1.6rem;
357 | color: #FFBBBB; /* for the separating dots */
358 | letter-spacing: 0.2rem;
359 | word-spacing: 0.3rem;
360 | text-align: center;
361 | }
362 |
363 | div#filters.hidden {
364 | height: 0;
365 | margin: 0;
366 | opacity: 0;''
367 | }
368 |
369 | div#filters input[type=radio] {
370 | /* hide the actual radio button */
371 | -webkit-appearance: none;
372 | -moz-appearance: none;
373 | -ms-appearance: none;
374 | -o-appearance: none;
375 | appearance: none;
376 | }
377 |
378 | div#filters input[type=radio] + label {
379 | cursor: pointer;
380 | color: white;
381 | animation: fade-in 0.5s ease;
382 | }
383 |
384 | div#filters input[type=radio] + label:hover {
385 | text-decoration: underline;
386 | }
387 |
388 | div#filters input[type=radio]:checked + label {
389 | color: #661111;
390 | }
391 |
392 | div#filters input[type=radio]:checked + label:hover {
393 | text-decoration: none;
394 | cursor: default;
395 | }
396 |
397 |
398 |
399 |
400 | /* === ARTICLE === */
401 |
402 | article {
403 | transition: all 0.2s; /* should be synchronised with filters transition */
404 |
405 | background: white;
406 | border-radius: 0.3rem;
407 | padding: 1.5rem;
408 |
409 | box-sizing: border-box; /* allow 100% width + padding with not overflow */
410 | width: 100%;
411 | }
412 |
413 | /* deal with Firefox quirk (bottom padding on main is being ignored) */
414 | @-moz-document url-prefix() {
415 | #entries article:last-child {
416 | margin-bottom: 0.5rem;
417 | }
418 | }
419 |
420 | /* controls where the content appears */
421 | article.full {
422 | /* take up all middle space and allow for scrolling */
423 | flex-grow: 1;
424 | height: 100%; /* enables border all the way down (in tandem with article) */
425 | overflow: auto;
426 | }
427 |
428 |
429 | /* === FOOTER === */
430 |
431 | footer {
432 | animation: fade-in 1.5s ease;
433 |
434 | /* text */
435 | text-align: center;
436 |
437 | /* box */
438 | background: #BB5544;
439 | background-image: linear-gradient(to bottom, #BB5544, #CC6644);
440 | padding: 1.5rem 0;
441 | width: 100%;
442 |
443 | /* make sure search results are covered by shadow */
444 | z-index: 1;
445 |
446 | /* shadow */
447 | box-shadow: 0 0 1rem 0 rgba(0, 0, 0, 0.3);
448 | }
449 |
450 | nav {
451 | /* text-related */
452 | font-size: 1.6rem;
453 | word-spacing: 0.4rem;
454 | line-height: 1;
455 | color: #FFBBBB;
456 | }
457 |
458 | nav > #script-changer {
459 | padding: 0.4rem;
460 | border: 1pt solid #CC6666;
461 | border-radius: 0.5rem;
462 | color: #FFBBBB;
463 | cursor: pointer;
464 |
465 | /* nav links shouldn't jump around when changing script */
466 | display: inline-block;
467 | min-width: 6ch;
468 |
469 | /* fix for #14 (translation pop-up on Chrome mobile) */
470 | user-drag: none;
471 | user-select: none;
472 | }
473 |
474 | nav > #script-changer:hover {
475 | text-decoration: none;
476 | color: white;
477 | border-color: white;
478 | }
479 |
480 | footer a {
481 | color: white;
482 | }
483 |
484 | footer a:hover {
485 | color: white;
486 | text-decoration: underline;
487 | }
488 |
489 | footer a.current-page, footer a.current-page:hover {
490 | color: #661111;
491 | text-decoration: none;
492 | }
493 |
494 |
495 |
496 |
497 | /* === ACTION CHOOSER === */
498 |
499 | fieldset#actions {
500 | /* box */
501 | padding: 1rem;
502 | width: 90%;
503 | max-width: 40rem;
504 | background: #BB5544;
505 | border: none;
506 | border-radius: 1rem;
507 | box-shadow: 0 1rem 2rem 0 rgba(0, 0, 0, 0.2),
508 | 0 1rem 2rem 0 rgba(0, 0, 0, 0.19),
509 | inset 0 0 1rem 0 rgba(0, 0, 0, 0.3);
510 |
511 | /* center vertically */
512 | z-index: 2;
513 | position: fixed;
514 | top: 50%; /* using 50% looks off somehow...*/
515 | left: 50%;
516 | transform: translate(-50%, -70%);
517 | -webkit-transform: translate(-50%, -70%);
518 | -moz-transform: translate(-50%, -70%);
519 | -o-transform: translate(-50%, -70%);
520 | -ms-transform: translate(-50%, -70%);
521 |
522 | /* expand to fit content
523 | https://teamtreehouse.com/community/how-can-i-make-my-divs-grow-wider-according-to-their-content */
524 | -moz-box-sizing: border-box;
525 | -webkit-box-sizing: border-box;
526 | box-sizing: border-box;
527 |
528 | animation: fade-in 0.3s linear;
529 | }
530 |
531 | fieldset#actions * {
532 | color: #FFBBBB;
533 | }
534 |
535 | /* the action chooser h1 is (unlike other h1 elements) styled sans-serif */
536 | fieldset#actions > legend {
537 | font-family: "Gill Sans", "Gill Sans MT", Calibri, "KaiTi", "楷体", STKaiti, "华文楷体", sans-serif;
538 | font-size: 2.2rem;
539 | text-align: center;
540 |
541 | /* positioning is a bit weird with this thing */
542 | position: relative;
543 | top: 2.2rem;
544 | margin: 2.2rem 0;
545 | padding: 1rem 0;
546 | width: 100%;
547 |
548 | /* displays as a kind of HR */
549 | border-bottom: 1px solid #CC6666;
550 |
551 | /* fake top border of the fieldset itself */
552 | border-top: none;
553 | }
554 |
555 | fieldset#actions > ol {
556 | margin: 0;
557 | padding: 0 0 0 3rem;
558 | }
559 |
560 | fieldset#actions > ol > li > input[type=radio] {
561 | /* hide the actual radio button */
562 | -webkit-appearance: none;
563 | -moz-appearance: none;
564 | -ms-appearance: none;
565 | -o-appearance: none;
566 | appearance: none;
567 | }
568 |
569 | fieldset#actions > ol > li > label {
570 | cursor: pointer;
571 | }
572 |
573 | /* Reveal that the options are also clickable */
574 | fieldset#actions > ol > li > label:hover {
575 | text-decoration: underline;
576 | }
577 |
578 | /* Colour the currently checked button white */
579 | fieldset#actions > ol > li > input[type=radio]:checked + label {
580 | color: white;
581 | }
582 |
583 |
584 |
585 |
586 | /* === DICTIONARY ENTRY === */
587 |
588 | article.entry {
589 | display: flex;
590 | }
591 |
592 | article.entry h1 {
593 | writing-mode: vertical-lr;
594 | text-orientation: upright;
595 | font-size: 8vh; /* vh better supports small screens */
596 | margin-right: 1.5rem;
597 |
598 | /* don't overflow onto usages */
599 | white-space: nowrap;
600 |
601 | /* remove hidden left margin */
602 | line-height: 1;
603 | }
604 |
605 | article.entry div.content {
606 | box-sizing: border-box; /* allow 100% height + padding with not overflow */
607 | width: 100%;
608 | height: 100%;
609 | border-left: 2px solid #EEE;
610 | padding: 0 0 2rem 1.5rem;
611 |
612 | display: flex;
613 | flex-direction: column;
614 |
615 | /* TODO: should I find a way to make entire page scrollable */
616 | overflow: auto;
617 | }
618 |
619 | section#usages {
620 | margin-bottom: auto;
621 | }
622 |
623 | section.details table {
624 | font-size: 1.2rem;
625 | margin-bottom: 0;
626 | }
627 |
628 | section.details table tbody > tr > td:first-child {
629 | white-space: nowrap;
630 | font-weight: 400;
631 | text-align: right;
632 | text-transform: uppercase;
633 | color: #662222;
634 | background: #DD8888;
635 | }
636 |
637 | section.details table *:lang(zh) {
638 | font-size: 2rem;
639 | }
640 |
641 | section.details table td:last-child {
642 | width: 100%;
643 | }
644 |
645 |
646 |
647 | /* === DICTIONARY SEARCH RESULT === */
648 |
649 | #entries article a {
650 | display: flex;
651 | flex-direction: row;
652 | align-items: stretch;
653 | height: 100%;
654 | }
655 |
656 | #entries article + article {
657 | margin-top: 0.5rem;
658 | }
659 |
660 | #entries h1 {
661 | writing-mode: vertical-lr;
662 | text-orientation: upright;
663 | white-space: nowrap;
664 | align-self: center;
665 | font-size: 3.2rem;
666 | margin: 0;
667 | padding-right: 1rem;
668 | }
669 |
670 | #entries article:hover {
671 | background: #EEFFFF;
672 | }
673 |
674 | #entries h1 + dl {
675 | padding-left: 1rem;
676 | border-left: 2px solid #EEE;
677 | list-style-type: none;
678 | padding: 0 0 0 1rem;
679 | margin: 0;
680 | }
681 |
682 | /* TODO: check this */
683 | #entries span.pinyin {
684 | color: #999999;
685 | }
686 |
687 | #entries dl > dt {
688 | font-size: 1.8rem;
689 | }
690 |
691 | #entries dl > dd {
692 | font-size: 1.8rem;
693 | margin-top: 1rem;
694 | }
695 |
696 | .understated {
697 | color: #CCC;
698 | }
699 |
700 | .understated em {
701 | color: initial;
702 | font-style: normal;
703 | }
704 |
--------------------------------------------------------------------------------
/resources/public/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simongray/sino.study/b1b2954011841bc96449a1aa61eb51656930aee5/resources/public/favicon-16x16.png
--------------------------------------------------------------------------------
/resources/public/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simongray/sino.study/b1b2954011841bc96449a1aa61eb51656930aee5/resources/public/favicon-32x32.png
--------------------------------------------------------------------------------
/resources/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simongray/sino.study/b1b2954011841bc96449a1aa61eb51656930aee5/resources/public/favicon.ico
--------------------------------------------------------------------------------
/resources/public/favicon_generator.md:
--------------------------------------------------------------------------------
1 | # Your Favicon Package
2 |
3 | This package was generated with [RealFaviconGenerator](https://realfavicongenerator.net/) [v0.16](https://realfavicongenerator.net/change_log#v0.16)
4 |
5 | ## Install instructions
6 |
7 | To install this package:
8 |
9 | Extract this package in the root of your web site. If your site is http://www.example.com
, you should be able to access a file named http://www.example.com/favicon.ico
.
10 |
11 | Insert the following code in the `head` section of your pages:
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | *Optional* - Check your favicon with the [favicon checker](https://realfavicongenerator.net/favicon_checker)
--------------------------------------------------------------------------------
/resources/public/html_code.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/resources/public/img/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simongray/sino.study/b1b2954011841bc96449a1aa61eb51656930aee5/resources/public/img/favicon.png
--------------------------------------------------------------------------------
/resources/public/img/favicon.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
128 |
--------------------------------------------------------------------------------
/resources/public/img/logo.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
98 |
--------------------------------------------------------------------------------
/resources/public/img/logo_dark.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
98 |
--------------------------------------------------------------------------------
/resources/public/img/logo_dark_min.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
127 |
--------------------------------------------------------------------------------
/resources/public/img/logo_min.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
127 |
--------------------------------------------------------------------------------
/resources/public/img/search.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
64 |
--------------------------------------------------------------------------------
/resources/public/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 | sino·study
17 |
18 |
19 |
20 |
21 |
22 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/resources/public/mstile-150x150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simongray/sino.study/b1b2954011841bc96449a1aa61eb51656930aee5/resources/public/mstile-150x150.png
--------------------------------------------------------------------------------
/resources/public/safari-pinned-tab.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/resources/public/site.webmanifest:
--------------------------------------------------------------------------------
1 | {
2 | "name": "sino\u00b7study",
3 | "short_name": "sino\u00b7study",
4 | "icons": [
5 | {
6 | "src": "/android-chrome-192x192.png",
7 | "sizes": "192x192",
8 | "type": "image/png"
9 | },
10 | {
11 | "src": "/android-chrome-512x512.png",
12 | "sizes": "512x512",
13 | "type": "image/png"
14 | }
15 | ],
16 | "theme_color": "#fcfcfb",
17 | "background_color": "#fcfcfb",
18 | "start_url": "http://sino.study",
19 | "display": "standalone"
20 | }
21 |
--------------------------------------------------------------------------------
/src/sinostudy/cofx.cljs:
--------------------------------------------------------------------------------
1 | (ns sinostudy.cofx
2 | (:require [clojure.string :as str]
3 | [re-frame.core :as rf]))
4 |
5 | (rf/reg-cofx
6 | ::now
7 | (fn [cofx _]
8 | (assoc cofx ::now (js/Date.))))
9 |
10 | ;; Retrieves scroll states for all tags defined by the selector string.
11 | ;; In the current design, the window/document itself is no longer scrollable,
12 | ;; so there is no need to retrieve its scroll state.
13 | (rf/reg-cofx
14 | ::scroll-state
15 | (fn [cofx _]
16 | (let [selector "*[id], main, body"
17 | elements (array-seq (js/document.querySelectorAll selector))
18 | element->selector (fn [element]
19 | (->> [(.-tagName element) (.-id element)]
20 | (remove nil?)
21 | (str/join "#")))
22 | scroll-state (into {} (for [element elements
23 | :let [x (.-scrollLeft element)
24 | y (.-scrollTop element)]]
25 | (when (or (> x 0)
26 | (> y 0))
27 | [(element->selector element) [x y]])))]
28 | (assoc cofx ::scroll-state scroll-state))))
29 |
30 | (rf/reg-cofx
31 | ::active-element
32 | (fn [cofx _]
33 | (assoc cofx ::active-element (.-activeElement js/document))))
34 |
35 | (rf/reg-cofx
36 | ::pathname
37 | (fn [cofx _]
38 | (assoc cofx ::pathname (js/decodeURIComponent js/window.location.pathname))))
39 |
40 | (rf/reg-cofx
41 | ::local-storage
42 | (fn [cofx key]
43 | (assoc cofx ::local-storage (js->clj (.getItem js/localStorage key)))))
44 |
--------------------------------------------------------------------------------
/src/sinostudy/config.cljs:
--------------------------------------------------------------------------------
1 | (ns sinostudy.config)
2 |
3 | ;; Allows for certain constants to be defined at compile time,
4 | ;; e.g. if debug? is false the production URI should be used.
5 | ;; See: :closure-defines in project.clj
6 | (def debug?
7 | ^boolean goog.DEBUG)
8 |
--------------------------------------------------------------------------------
/src/sinostudy/core.cljs:
--------------------------------------------------------------------------------
1 | (ns sinostudy.core
2 | (:require [reagent.core :as reagent]
3 | [re-frame.core :as rf]
4 | [day8.re-frame.http-fx]
5 | [secretary.core :as secretary]
6 | [sinostudy.navigation.routes :as routes]
7 | [sinostudy.events.core :as events]
8 | [sinostudy.events.actions :as actions]
9 | [sinostudy.subs :as subs]
10 | [sinostudy.views.core :as views]
11 | [sinostudy.config :as config]))
12 |
13 | (defn dev-setup []
14 | (when config/debug?
15 | (enable-console-print!)
16 | (println "dev mode")))
17 |
18 | (defn mount-root []
19 | (rf/clear-subscription-cache!)
20 | (reagent/render [views/app] (.getElementById js/document "app"))
21 |
22 | ;; Start the CLJS app from current page in the address bar.
23 | ;; The routing mostly takes place on the frontend,
24 | ;; so the app needs to orient itself on hard page loads.
25 | (let [current-page (-> js/window .-location .-pathname)]
26 | (secretary/dispatch! current-page))
27 |
28 | ;; The input bar needs to have immediate focus on page load.
29 | (.focus (.getElementById js/document "input-field"))
30 |
31 | ;; Intercepts all key presses in the document.
32 | ;; Only defers from normal operation in the action-chooser mode.
33 | ;; This is important, since calling .preventDefault on all key presses
34 | ;; is a recipe for creating many bugs -- now and down the line, too.
35 | (set! (.-onkeydown js/document)
36 | (fn [e] (when @(rf/subscribe [::subs/actions])
37 | (.preventDefault e)
38 | (rf/dispatch [::actions/on-key-down (.-key e)])))))
39 |
40 | (defn ^:export init []
41 | (routes/app-routes)
42 | (rf/dispatch-sync [::events/initialize-db])
43 | (dev-setup)
44 | (mount-root))
45 |
--------------------------------------------------------------------------------
/src/sinostudy/db.cljs:
--------------------------------------------------------------------------------
1 | (ns sinostudy.db
2 | (:require [cljs.reader :as reader]
3 | [sinostudy.config :as cf]
4 | [sinostudy.navigation.pages :as pages])
5 | (:require-macros [sinostudy.macros.core :as macros]))
6 |
7 | (def config
8 | (reader/read-string (macros/slurp "resources/config.edn")))
9 |
10 | ;; TODO: fix this so that running a JAR locally will still work
11 | (def query-uri
12 | (let [hostname js/window.location.hostname
13 | port (if cf/debug?
14 | (get-in config [:server :port :internal])
15 | (get-in config [:server :port :external]))]
16 | (str "http://" hostname ":" port "/query")))
17 |
18 | ;; TODO: these are views, move to appropriate ns
19 | (def static-pages
20 | {"/404" [:main
21 | [:article.full
22 | [:h1 "Sorry,"]
23 | [:p "that page doesn't exist."]]]
24 | "/" [:main#splash
25 | [:img {:src "/img/logo_dark_min.svg"}]
26 | [:blockquote
27 | "... a modern Chinese dictionary and grammar tool. "
28 | "Here you can look up unknown words or find out what is going on in a sentence. "
29 | [:a {:href "/about"
30 | :title "Learn more about sino.study"}
31 | "Learn More."]]]
32 | "/about" [:main
33 | [:article.full
34 | [:h1 "About"]
35 | [:p "This is the About page."]]]
36 | "/settings" [:main
37 | [:article.full
38 | [:h1 "Settings"]
39 | [:p "This is the Settings page."]]]})
40 |
41 | ;; When used in conjunction with `sorted-set-by`, this comparator can be used to
42 | ;; get the functionality of a set, but ordered by time for occasional trimming.
43 | (defn- timestamp-comparator
44 | "Compare by timestamp as set in the metadata."
45 | [x y]
46 | (let [ts (comp :ts meta)]
47 | (if (= x y)
48 | 0
49 | (compare (ts x) (ts y)))))
50 |
51 | (def initial-db
52 | "This is the db map used as the initial state of the db."
53 | {;; The current contents of the text input field. Shown directly in the UI.
54 | ;; This usually just reflects what the user is typing in, but can also be
55 | ;; affected by conversion operations, e.g. `digits->diacritics`.
56 | :input nil
57 |
58 | ;; A page is basically a 2-tuple describing a URL in the SPA. They can
59 | ;; be directly translated into the full web browser location of a page.
60 | ;; There are two types of page:
61 | ;;
62 | ;; * Static pages that are part of the root domain, e.g. sino.study/about
63 | ;; * Dynamically generated dictionary terms that appear as a sublevel of
64 | ;; sino.study/terms. For example, sino.study/terms/你好.
65 | :pages {::pages/terms {}
66 | ::pages/static static-pages}
67 |
68 | ;; A basic history of the pages that have been navigated to.
69 | ;; Not actually used for generating content, since the in-browser navigation
70 | ;; history is sufficient to recreate pages as the page rendered is simply a
71 | ;; function of the URI.
72 | :history '()
73 |
74 | ;; A set of all unknown queries, i.e. queries that didn't resolve to anything
75 | ;; through a backend request. This is used to memoise those queries for
76 | ;; performance optimisation reasons, but also as a quick way to underline bad
77 | ;; queries in the text input, possibly highlighting spelling mistakes.
78 | :unknown-queries #{}
79 |
80 | ;; A request queue is simply a pattern for avoiding doing multiple identical
81 | ;; backend requests at the same time, e.g. maybe there's a slow connection so
82 | ;; the user manages to click the same link multiple times or spam ENTER.
83 | ;; The queue avoids this enqueuing requests and then dequeuing them when they
84 | ;; eventually return. This is another performance optimisation.
85 | :queue (sorted-set-by timestamp-comparator)
86 |
87 | ;; Result filters are a mapping from terms to user-selected result filters.
88 | ;; These filters are the ones that control whether we're searching for
89 | ;; plain Pinyin, English, the official Pinyin with diacritics, or the popular
90 | ;; online version where tone diacritics have been replaced by digits.
91 | ;; Terms where the user never deviated from the default choice of filter
92 | ;; do not appear in this map, only the ones that were actively selected.
93 | ;; These are (in a similar fashion to :scroll-states) used to recreate UI
94 | ;; state when navigating back in history during the browsing session.
95 | :result-filters {}
96 |
97 | ;; A stack of maps containing evaluations, i.e. maps of input query, output
98 | ;; actions, and timestamp. Basically used to memoise query input to its
99 | ;; results to speed up recollection.
100 | :evaluations '()
101 |
102 | ;; Holds a record of the query content. Not used for much at the moment, but
103 | ;; be used to see the history of backend requests and whether they were
104 | ;; successful or not.
105 | :queries '()
106 |
107 | ;; The preferred script is registered here and this will simply use the
108 | ;; selected script over the other whenever there is an option of both in the
109 | ;; UI. This currently doesn't change the term *itself* on term pages, as this
110 | ;; would also require mutating the URL whenever the user switches the script.
111 | :script :simplified
112 |
113 | ;; Scroll states is a in-memory collection of the scroll state of the page
114 | ;; whenever a new page is reached. Since this is an SPA, the browser doesn't
115 | ;; necessarily remember how far along the page was scrolled at a specific
116 | ;; point in the browsing history. To remedy this, the states here can be
117 | ;; recreated. This is then tied in with the page navigation mechanism.
118 | :scroll-states {}
119 |
120 | ;; The action chooser is pop-in window that can be used to select between
121 | ;; multiple different actions. The window appears spontaneously when a piece
122 | ;; user input can have multiple interpretations and the user needs to filter
123 | ;; it. In this case `actions` is a vector of possible actions options and
124 | ;; `checked-action` is the index of the currently selected option.
125 | :actions nil
126 | :checked-action 0
127 |
128 | ;; This is the content of the `config.edn` file that is read at launch.
129 | ;; While the content is mostly relevant for the backend, this can be used
130 | ;; to monitor this information in the frontend UI, e.g. for debugging
131 | ;; purposes. The `query-uri` is simply the bit of the config that defines
132 | ;; which URI to send backend queries to.
133 | :config config
134 | :query-uri query-uri})
135 |
--------------------------------------------------------------------------------
/src/sinostudy/dictionary/core.cljc:
--------------------------------------------------------------------------------
1 | (ns sinostudy.dictionary.core
2 | (:require [clojure.set :as set]
3 | [clojure.string :as str]
4 | [sinostudy.pinyin.core :as p]
5 | [sinostudy.dictionary.data :as data]
6 | [sinostudy.dictionary.embed :as embed]))
7 |
8 | ;;;; GENERAL STUFF
9 |
10 | (defn pinyin-key
11 | "Convert a CC-CEDICT Pinyin string into a form for use as a map key."
12 | [s]
13 | (-> s
14 | (str/replace "'" "")
15 | (str/replace " " "")
16 | (str/replace "·" "") ; middle dot
17 | (str/replace "," "")
18 | str/lower-case))
19 |
20 |
21 | ;;;; EMBEDDING MANIPULATION
22 |
23 | (defn refr->m
24 | "Transform the embedded reference string into a Clojure map."
25 | [refr]
26 | (let [[hanzi-str pinyin-str] (str/split refr #"\[|\]")
27 | hanzi (str/split hanzi-str #"\|")
28 | pinyin (str/split pinyin-str #" ")
29 | traditional (first hanzi)
30 | simplified (if (second hanzi) (second hanzi) traditional)]
31 | {:traditional traditional
32 | :simplified simplified
33 | :pinyin pinyin}))
34 |
35 |
36 | ;;;; DEALING WITH CLASSIFIERS
37 |
38 | (defn cl-def?
39 | "Determine if a dictionary definition is actually a list of classifiers."
40 | [definition]
41 | (str/starts-with? definition "CL:"))
42 |
43 | (defn has-cls?
44 | "Determine if the listing's :definitions contain classifiers."
45 | [listing]
46 | (some cl-def? (:definitions listing)))
47 |
48 | (defn detach-cls
49 | "Move the classifiers of a listing from :definitions to :classifiers."
50 | [listing]
51 | (if (has-cls? listing)
52 | (let [defs (:definitions listing)
53 | cl-defs (filter cl-def? defs)
54 | get-cls (comp (partial map refr->m) (partial re-seq embed/refr))
55 | cls (set (flatten (map get-cls cl-defs)))]
56 | (if cls
57 | (-> listing
58 | (assoc :definitions (set/difference defs cl-defs))
59 | (assoc :classifiers cls))
60 | listing))
61 | listing))
62 |
63 |
64 | ;;;; UNIFIED HANZI DICT (TRADITIONAL + SIMPLIFIED)
65 |
66 | (defn hanzi-entry
67 | "Make a hanzi dictionary entry based on a script and a CC-CEDICT listing."
68 | [script listing]
69 | (let [script-diff? (not= (:traditional listing) (:simplified listing))
70 | make-vars (fn [script]
71 | (let [other (case script
72 | :traditional :simplified
73 | :simplified :traditional)]
74 | {other #{(get listing other)}}))
75 | classifiers (:classifiers listing)
76 | frequency (:frequency listing)
77 | decomposition (get-in listing [:info script :decomposition])
78 | etymology (get-in listing [:info script :etymology])
79 | radical (get-in listing [:info script :radical])
80 | base-entry {:term (get listing script)
81 | :scripts #{script}
82 | :uses {(:pinyin listing) (:definitions listing)}}]
83 | (cond-> base-entry
84 | script-diff? (assoc :variations (make-vars script))
85 | classifiers (assoc :classifiers classifiers)
86 | frequency (assoc :frequency frequency)
87 | decomposition (assoc :decomposition decomposition)
88 | etymology (assoc :etymology etymology)
89 | radical (assoc :radical radical))))
90 |
91 | (defn add-hanzi*
92 | "Update the hanzi dict at the specified key k with the entry v.
93 | The entry is either inserted as is or merged with the old entry."
94 | [dict k v]
95 | (if-let [old (get dict k)]
96 | (let [scripts (set/union (:scripts old) (:scripts v))
97 | cls (set/union (:classifiers old) (:classifiers v))
98 | uses (merge-with set/union (:uses old) (:uses v))
99 | vars (merge-with set/union (:variations old) (:variations v))
100 | freq (:frequency v)
101 | decomp (:decomposition v)
102 | etym (:etymology v)
103 | radical (:radical v)]
104 | (assoc dict k (cond-> old
105 | scripts (assoc :scripts scripts)
106 | cls (assoc :classifiers cls)
107 | uses (assoc :uses uses)
108 | vars (assoc :variations vars)
109 | freq (assoc :frequency freq)
110 | decomp (assoc :decomposition decomp)
111 | etym (assoc :etymology etym)
112 | radical (assoc :radical radical))))
113 | (assoc dict k v)))
114 |
115 | (defn add-hanzi
116 | "Add 1 to 2 entries in the hanzi dictionary from a CC-CEDICT listing."
117 | [dict listing]
118 | (-> dict
119 | (add-hanzi* (:traditional listing) (hanzi-entry :traditional listing))
120 | (add-hanzi* (:simplified listing) (hanzi-entry :simplified listing))))
121 |
122 |
123 | ;;;; PINYIN DICT
124 |
125 | ;; used by both pinyin-add and english-add
126 | (defn add
127 | "Add an entry to a dictionary; clashes are merged into a set."
128 | [dict k v]
129 | (if-let [old (get dict k)]
130 | (assoc dict k (set/union old v))
131 | (assoc dict k v)))
132 |
133 | (defn pinyin-entry
134 | "Make a pinyin dictionary entry based on a CC-CEDICT listing."
135 | [listing]
136 | (hash-set (:traditional listing) (:simplified listing)))
137 |
138 | (defn add-pinyin
139 | "Add an entry to a pinyin dictionary from a CC-CEDICT listing."
140 | [key-type dict listing]
141 | (let [k (get listing key-type)
142 | v (pinyin-entry listing)]
143 | (add dict k v)))
144 |
145 |
146 | ;;;; ENGLISH DICT
147 |
148 | (defn remove-embedded
149 | "Removes embedded CC-CEDICT information from string s."
150 | [s]
151 | (-> s
152 | (str/replace embed/refr "")
153 | (str/replace embed/hanzi "")
154 | (str/replace embed/pinyin "")))
155 |
156 | ;; Explanatory parentheses, i.e. description preceding/following a definition.
157 | (def expl
158 | #"^\([^)]+\)|\([^)]+\)$")
159 |
160 | (defn english-keys
161 | "Find English dictionary keys based on a CC-CEDICT listing.
162 | Words inside explanatory parentheses are not considered.
163 | Numbers (unless they make up part of a word) are not considered.
164 | Stop-words are removed entirely, unless they make up a full definition
165 | or if they are part of a verblike, e.g. 'to have' or 'to laugh'."
166 | [definitions]
167 | (let [definitions* (->> definitions
168 | (map #(str/replace %1 expl ""))
169 | (map str/trim)
170 | (map ^String str/lower-case)
171 | (set))
172 | single-words (->> definitions*
173 | (map remove-embedded)
174 | (map #(str/split % #"[^a-z0-9-']+"))
175 | (flatten)
176 | (filter (comp not str/blank?))
177 | (filter (comp not (partial re-find #"^[0-9]+$")))
178 | (set))
179 | verblikes (->> definitions*
180 | (filter #(str/starts-with? % "to "))
181 | (map #(subs % 3))
182 | (set))
183 | stopwords* (-> data/stopwords
184 | (set/difference definitions*)
185 | (set/difference verblikes))
186 | keys (set/union definitions*
187 | single-words
188 | verblikes)]
189 | (set/difference keys stopwords*)))
190 |
191 | ;; Used on the backend for limiting results.
192 | ;; (Indirectly) used on the frontend when sorting results.
193 | (defn- english-relevance-score
194 | "Calculates a basic relevance score based on the basic rule of term:use ratio
195 | as well as a few heuristics. All comparisons are done in lower case.
196 |
197 | Current heuristics:
198 | * ratio where explanatory parentheses are normalised to the same length: _
199 | * ratio with prefixed 'to ' removed (common marker of verblikes)"
200 | [term use]
201 | (let [to #"^to "
202 | term* (str/lower-case term)
203 | use* (str/lower-case use)]
204 | (if (str/includes? use* term*)
205 | (let [normalised-expl "_"
206 | use-without-expl (str/replace use* expl normalised-expl)
207 | use-without-to (str/replace use* to "")]
208 | (max
209 | ;; Basic ratio comparison
210 | (/ (count term*) (count use*))
211 |
212 | ;; Ratio comparison with explanatory parentheses normalised
213 | (if (and (str/includes? use-without-expl term*)
214 | (not= use-without-expl normalised-expl))
215 | (/ (count term*) (count use-without-expl))
216 | 0)
217 |
218 | ;; Ratio comparison with prefixed "to " removed
219 | (if (and (str/includes? use-without-to term*)
220 | (not= use-without-to ""))
221 | (/ (count term*) (count use-without-to))
222 | 0)))
223 | 0)))
224 |
225 | ;; Decides which entries to include for English search results.
226 | ;; Really just an arbitrary value, but 0.33 seems to be an fair cutoff!
227 | (def relevance-cutoff
228 | 0.33)
229 |
230 | (defn- above-cutoff?
231 | "Are any of the definitions above a the relevance cutoff for english-key?"
232 | [definitions english-key]
233 | (let [english-relevance-score* (partial english-relevance-score english-key)
234 | scores (map english-relevance-score* definitions)]
235 | (if (not (empty? scores))
236 | (> (apply max scores)
237 | relevance-cutoff))))
238 |
239 | (defn add-english
240 | "Add an entry to the English dictionary from a CC-CEDICT listing.
241 | Keys (= single English words) are only added if they're above a certain
242 | relevance cutoff in order to limit the results list."
243 | [dict listing]
244 | (let [definitions (:definitions listing)
245 | ks (->> (english-keys definitions)
246 | (filter (partial above-cutoff? definitions)))
247 | v (hash-set (:traditional listing) (:simplified listing))]
248 | (loop [dict* dict
249 | ks* ks]
250 | (if (seq ks*)
251 | (recur (add dict* (first ks*) v) (rest ks*))
252 | dict*))))
253 |
254 | ;; Used on the frontend for sorting results.
255 | ;; Note that this - in addition to basic relevance - also considers frequency.
256 | (defn english-relevance
257 | "Calculate the relevance of entry based on an English word as the search term.
258 | The relevance is a score from 0 to ~1, higher being more relevant.
259 | Relevance is able to exceed 1 slightly, as word frequency is also added to the
260 | score, allowing for more accurate sorting (it is a number from 0 to 1 that
261 | tends towards 0). This is what puts e.g. 句子 ahead of 语句 for 'sentence'."
262 | [term entry]
263 | (let [uses (->> (vals (:uses entry))
264 | (apply set/union))
265 | score (partial english-relevance-score term)
266 | scores (map score uses)
267 | max-score (apply max scores)
268 | freq (get entry :frequency 0)]
269 | ;; Note: multiple 0.0 scores only count as a single zero!
270 | ;; This is done to not unfairly weigh down words with many meanings.
271 | (+ max-score freq)))
272 |
273 | ;;;; FREQUENCY DICTIONARY
274 |
275 | (defn add-freq
276 | "Add word frequency (not char frequency) to a listing."
277 | [freq-dict listing]
278 | (let [trad-freq (get freq-dict (:traditional listing) 0)
279 | simp-freq (get freq-dict (:simplified listing) 0)
280 | frequency (max trad-freq simp-freq)]
281 | (if (> frequency 0)
282 | (assoc listing :frequency frequency)
283 | listing)))
284 |
285 | ;;; TODO: find proper thresholds for labels
286 | (defn frequency-label
287 | "Get human-readable label for a given word frequency."
288 | [frequency]
289 | (cond
290 | (> frequency 0.01) :high
291 | (> 0.01 frequency 0.001) :medium
292 | :else :low))
293 |
294 | ;;;; CHARACTER ETYMOLOGY, DECOMPOSITION, ETC.
295 |
296 | (defn add-info*
297 | "Helper function for add-info."
298 | [script makemeahanzi listing]
299 | (if-let [info (get makemeahanzi (get listing script))]
300 | (let [decomposition (get info "decomposition")
301 | etymology (when-let [raw (get info "etymology")]
302 | (into {} (for [[k v] raw]
303 | [(keyword k) v])))
304 | radical (get info "radical")
305 | assoc* (fn [coll k v]
306 | (assoc-in coll [:info script k] v))]
307 | (cond-> listing
308 | decomposition (assoc* :decomposition decomposition)
309 | etymology (assoc* :etymology etymology)
310 | radical (assoc* :radical radical)))
311 | listing))
312 |
313 | (defn add-info
314 | "Add info from makemeahanzi to a CC-CEDICT listing."
315 | [makemeahanzi listing]
316 | (->> listing
317 | (add-info* :traditional makemeahanzi)
318 | (add-info* :simplified makemeahanzi)))
319 |
320 |
321 | ;;;; CREATING DICTS AND LOOKING UP WORDS
322 |
323 | (defn make-report
324 | "Create some some rudimentary statistics about the given dict."
325 | [dict]
326 | {:entry-count (count (keys (:hanzi dict)))
327 | :english-count (count (keys (:english dict)))
328 | :pinyin-count (count (keys (:pinyin dict)))
329 | :pinyin+digits-count (count (keys (:pinyin+digits dict)))
330 | :pinyin+diacritics-count (count (keys (:pinyin+diacritics dict)))})
331 |
332 | ;; TODO: also add listings only found in makemeahanzi (e.g. 忄)
333 | (defn create-dict
334 | "Load the contents of a CC-CEDICT dictionary file into Clojure maps.
335 | The listings convert into multiple dictionary entries based on look-up type.
336 | A freq-dict is used to add the word frequency to each entry if available."
337 | [listings freq-dict makemeahanzi]
338 | (let [listings* (->> listings
339 | (map detach-cls)
340 | (map (partial add-freq freq-dict))
341 | (map (partial add-info makemeahanzi)))
342 | add-pinyin-key (partial add-pinyin :pinyin-key)
343 | add-digits-key (partial add-pinyin :pinyin+digits-key)
344 | add-diacritics-key (partial add-pinyin :pinyin+diacritics-key)]
345 | (->> {:hanzi (reduce add-hanzi {} listings*)
346 | :english (reduce add-english {} listings*)
347 | :pinyin (reduce add-pinyin-key {} listings*)
348 | :pinyin+digits (reduce add-digits-key {} listings*)
349 | :pinyin+diacritics (reduce add-diacritics-key {} listings*)}
350 | (#(assoc %1 :report (make-report %1))))))
351 |
352 | (defn look-up
353 | "Look up the specified term in each dictionary type.
354 | For Pinyin search results, both the raw search term and the pinyin-key version
355 | are looked up (results merged), e.g. 'ding zuo' also gets 'dingzuo'.
356 | Limit (optional) is a set of accepted result types."
357 | ([dict term limit]
358 | (let [term* (pinyin-key term) ; unspaced
359 | look-up* (fn [dict-type word] (-> dict (get dict-type) (get word)))
360 | limited (fn [dict-type] (if limit (get limit dict-type) dict-type))
361 | get-entries (fn [words] (set (map #(look-up* :hanzi %) words)))
362 | hanzi (look-up* (limited :hanzi) term)
363 | pinyin (set/union (look-up* (limited :pinyin) term)
364 | (look-up* (limited :pinyin) term*))
365 | digits (set/union (look-up* (limited :pinyin+digits) term)
366 | (look-up* (limited :pinyin+digits) term*))
367 | diacritics (set/union (look-up* (limited :pinyin+diacritics) term)
368 | (look-up* (limited :pinyin+diacritics) term*))
369 | english (look-up* (limited :english) (str/lower-case term))
370 | result (cond-> {:term term}
371 | hanzi (assoc :hanzi hanzi)
372 | pinyin (assoc :pinyin (get-entries pinyin))
373 | digits (assoc :pinyin+digits (get-entries digits))
374 | diacritics (assoc :pinyin+diacritics (get-entries diacritics))
375 | english (assoc :english (get-entries english)))]
376 | (if (= result {:term term})
377 | nil
378 | result)))
379 | ([dict word]
380 | (look-up dict word nil)))
381 |
382 |
383 | ;;;; POST-PROCESSING DICTIONARY LOOK-UP RESULTS
384 |
385 | (defn- safe-comparator
386 | "Create a comparator for sorting that will not lose items by accident.
387 | When fn1 cannot establish an ordering between two elements, fn2 steps in.
388 | Based on example at: https://clojuredocs.org/clojure.core/sorted-set-by"
389 | [fn1 fn2]
390 | (fn [x y]
391 | (let [comparison (compare (fn1 x) (fn1 y))]
392 | (if (not= comparison 0)
393 | comparison
394 | (compare (fn2 x) (fn2 y))))))
395 |
396 | (defn defs-containing-term
397 | "Only keep definitions that contain the given term."
398 | [term definitions]
399 | (let [term-re (re-pattern (str "(?i)(^|[ (\"])" term "($|[ ,;.'!?)\"])"))
400 | with-term? (fn [definition]
401 | (re-find term-re (remove-embedded definition)))]
402 | (filter with-term? definitions)))
403 |
404 | (defn filter-defs
405 | "Remove definitions from entries if they do not contain the given term.
406 | Used to filter results by an English search term."
407 | [term entries]
408 | (let [relevant-defs (fn [[pinyin definitions]]
409 | [pinyin (defs-containing-term term definitions)])
410 | non-empty (comp seq second)]
411 | (for [entry entries]
412 | (assoc entry :uses (->> (:uses entry)
413 | (map relevant-defs)
414 | (filter non-empty)
415 | (into {}))))))
416 |
417 | (defn filter-uses
418 | "Remove uses from entries if the Pinyin does not match the given term.
419 | Used to filter results by a Pinyin search term.
420 | An optional normalisation function f can be supplied to convert the uses
421 | (normally in pinyin+digits format) to a Pinyin format matching the term."
422 | ([term entries f]
423 | (let [use-matches-term? (comp (fn [s] (= s (pinyin-key term)))
424 | pinyin-key
425 | (if f f identity)
426 | first)]
427 | (for [entry entries
428 | :let [uses (:uses entry)]]
429 | (assoc entry :uses (into {} (filter use-matches-term? uses))))))
430 | ([term entries]
431 | (filter-uses term entries nil)))
432 |
433 | (defn reduce-result
434 | "Reduce the content of a dictionary look-up result.
435 | This removes irrelevant data from the result relative to the search term,
436 | e.g. removes definitions that do not match the search term."
437 | [result]
438 | (let [term (:term result)
439 | entry (:hanzi result) ; dictionary entry, not a sequence!
440 | pinyin (:pinyin result)
441 | digits (:pinyin+digits result)
442 | diacritics (:pinyin+diacritics result)
443 | english (:english result)]
444 | ;; Reduces to a single dictionary entry when applicable, i.e. when the
445 | ;; search term consists of hanzi and happened to match an entry directly.
446 | ;; Otherwise, returns the search results for the given search term.
447 | ;; Note: `hanzi` can only be a set of length 1 or nil!
448 | (or entry
449 | (cond-> result
450 |
451 | pinyin
452 | (assoc :pinyin
453 | (filter-uses term pinyin p/no-digits))
454 |
455 | digits
456 | (assoc :pinyin+digits
457 | (filter-uses term digits))
458 |
459 | diacritics
460 | (assoc :pinyin+diacritics
461 | (filter-uses term diacritics p/digits->diacritics))))))
462 |
463 | ;; TODO: disabled for now, re-enable when more intelligent (issue #37)
464 | ;english
465 | ;(assoc :english
466 | ; (filter-defs term english)))))
467 |
468 | (defn sort-result
469 | "Sort the content of a dictionary look-up result.
470 | This sorts the result relative to the search term,
471 | e.g English word results are sorted according to relevance."
472 | [result]
473 | (let [relevance (memoize (partial english-relevance (:term result)))
474 | relevance* (comp - (safe-comparator relevance :term))
475 | sorted (fn [f coll] (apply sorted-set-by f coll))
476 | pinyin (:pinyin result)
477 | digits (:pinyin+digits result)
478 | diacritics (:pinyin+diacritics result)
479 | english (:english result)]
480 | (cond-> result
481 | ;pinyin (assoc :pinyin (sorted > pinyin))
482 | ;digits (assoc :pinyin+digits (sorted > digits))
483 | ;diacritics (assoc :pinyin+diacritics (sorted > diacritics))
484 | ;; TODO: sort Pinyin properly too
485 | pinyin (assoc :pinyin pinyin)
486 | digits (assoc :pinyin+digits digits)
487 | diacritics (assoc :pinyin+diacritics diacritics)
488 | english (assoc :english (sorted relevance* english)))))
489 |
--------------------------------------------------------------------------------
/src/sinostudy/dictionary/data.cljc:
--------------------------------------------------------------------------------
1 | (ns sinostudy.dictionary.data)
2 |
3 | (def stopwords
4 | #{"a" "about" "above" "across" "after" "afterwards" "again" "against"
5 | "all" "almost" "alone" "along" "already" "also" "although" "always" "am"
6 | "among" "amongst" "amount" "an" "and" "another" "any" "anyhow"
7 | "anyone" "anything" "anyway" "anywhere" "are" "around" "as" "at" "back"
8 | "be" "became" "because" "become" "becomes" "becoming" "been" "before"
9 | "beforehand" "behind" "being" "below" "beside" "besides" "between"
10 | "beyond" "bill" "both" "bottom" "but" "by" "call" "can" "cannot" "cant" "co"
11 | "con" "could" "couldnt" "cry" "de" "describe" "detail" "do" "does" "done"
12 | "down" "due" "during" "each" "eg" "eight" "either" "eleven" "else"
13 | "elsewhere" "empty" "enough" "etc" "even" "ever" "every" "everyone"
14 | "everything" "everywhere" "except" "few" "fifteen" "fify" "fill" "find"
15 | "fire" "first" "five" "for" "former" "formerly" "forty" "found" "four"
16 | "from" "front" "full" "further" "get" "give" "go" "had" "has" "hasnt" "have"
17 | "he" "hence" "her" "here" "hereafter" "hereby" "herein" "hereupon" "hers"
18 | "herself" "him" "himself" "his" "how" "however" "hundred" "i" "ie" "if" "in"
19 | "inc" "indeed" "interest" "into" "is" "it" "its" "itself" "keep" "last"
20 | "latter" "latterly" "least" "less" "ltd" "made" "many" "may" "me"
21 | "meanwhile" "might" "mill" "mine" "more" "moreover" "most" "mostly" "move"
22 | "much" "must" "my" "myself" "name" "namely" "neither" "never" "nevertheless"
23 | "next" "nine" "no" "nobody" "none" "noone" "nor" "not" "nothing" "now"
24 | "nowhere" "of" "off" "often" "on" "once" "one" "only" "onto" "or" "other"
25 | "others" "otherwise" "our" "ours" "ourselves" "out" "over" "own" "part"
26 | "per" "perhaps" "please" "put" "rather" "re" "same" "see" "seem" "seemed"
27 | "seeming" "seems" "serious" "several" "she" "should" "show" "side" "since"
28 | "sincere" "six" "sixty" "so" "some" "somehow" "someone" "something"
29 | "sometime" "sometimes" "somewhere" "still" "such" "system" "take" "ten"
30 | "than" "that" "the" "their" "them" "themselves" "then" "thence" "there"
31 | "thereafter" "thereby" "therefore" "therein" "thereupon" "these" "they"
32 | "thick" "thin" "third" "this" "those" "though" "three" "through"
33 | "throughout" "thru" "thus" "to" "together" "too" "top" "toward" "towards"
34 | "twelve" "twenty" "two" "un" "under" "until" "up" "upon" "us" "very" "via"
35 | "was" "we" "well" "were" "what" "whatever" "when" "whence" "whenever"
36 | "where" "whereafter" "whereas" "whereby" "wherein" "whereupon" "wherever"
37 | "whether" "which" "while" "whither" "who" "whoever" "whole" "whom" "whose"
38 | "why" "will" "with" "within" "without" "would" "yet" "you" "your" "yours"
39 | "yourself" "yourselves"
40 |
41 | ;; Common contractions with apostrophe (and a few without)
42 | ;; https://en.wikipedia.org/wiki/Wikipedia:List_of_English_contractions
43 | "ain't"
44 | "aren't"
45 | "can't"
46 | "could've"
47 | "couldn't"
48 | "daren't"
49 | "daresn't"
50 | "dasn't"
51 | "didn't"
52 | "doesn't"
53 | "don't"
54 | "e'er"
55 | "everyone's"
56 | "finna"
57 | "gimme"
58 | "gonna"
59 | "gotta"
60 | "hadn't"
61 | "hasn't"
62 | "haven't"
63 | "he'd"
64 | "he'll"
65 | "he's"
66 | "he've"
67 | "how'd"
68 | "how'll"
69 | "how're"
70 | "how's"
71 | "I'd"
72 | "I'll"
73 | "I'm"
74 | "I'm'a"
75 | "I'm'o"
76 | "I've"
77 | "isn't"
78 | "it'd"
79 | "it'll"
80 | "it's"
81 | "let's"
82 | "ma'am"
83 | "mayn't"
84 | "may've"
85 | "mightn't"
86 | "might've"
87 | "mustn't"
88 | "mustn't've"
89 | "must've"
90 | "needn't"
91 | "ne'er"
92 | "o'clock"
93 | "o'er"
94 | "ol'"
95 | "oughtn't"
96 | "'s"
97 | "shan't"
98 | "she'd"
99 | "she'll"
100 | "she's"
101 | "should've"
102 | "shouldn't"
103 | "somebody's"
104 | "someone's"
105 | "something's"
106 | "that'll"
107 | "that're"
108 | "that's"
109 | "that'd"
110 | "there'd"
111 | "there'll"
112 | "there're"
113 | "there's"
114 | "these're"
115 | "they'd"
116 | "they'll"
117 | "they're"
118 | "they've"
119 | "this's"
120 | "those're"
121 | "'tis"
122 | "'twas"
123 | "wasn't"
124 | "we'd"
125 | "we'd've"
126 | "we'll"
127 | "we're"
128 | "we've"
129 | "weren't"
130 | "what'd"
131 | "what'll"
132 | "what're"
133 | "what's"
134 | "what've"
135 | "when's"
136 | "where'd"
137 | "where're"
138 | "where's"
139 | "where've"
140 | "which's"
141 | "who'd"
142 | "who'd've"
143 | "who'll"
144 | "who're"
145 | "who's"
146 | "who've"
147 | "why'd"
148 | "why're"
149 | "why's"
150 | "won't"
151 | "would've"
152 | "wouldn't"
153 | "y'all"
154 | "y'all'd've"
155 | "yesn't"
156 | "you'd"
157 | "you'll"
158 | "you're"
159 | "you've"
160 | "noun's"
161 | "noun(s)'re"
162 |
163 | ;; Special cases (common throughout CC-CEDICT definitions)
164 | "variant" "loanword" "cf" "lit" "tw" "pr" "abbr" "taiwan" "radical" "kangxi"
165 | "arch" "archaic" "...er" "written"
166 |
167 | ;; Numbers are excluded
168 | "1"
169 | "2"
170 | "3"
171 | "4"
172 | "5"
173 | "6"
174 | "7"
175 | "8"
176 | "9"
177 | "0"
178 |
179 | ;; The entire English alphabet is excluded ("a" and "i" found above)
180 | ;"a"
181 | "b"
182 | "c"
183 | "d"
184 | "e"
185 | "f"
186 | "g"
187 | "h"
188 | ;"i"
189 | "j"
190 | "k"
191 | "l"
192 | "m"
193 | "n"
194 | "o"
195 | "p"
196 | "q"
197 | "r"
198 | "s"
199 | "t"
200 | "u"
201 | "v"
202 | "w"
203 | "x"
204 | "y"
205 | "z"})
206 |
--------------------------------------------------------------------------------
/src/sinostudy/dictionary/embed.cljc:
--------------------------------------------------------------------------------
1 | (ns sinostudy.dictionary.embed
2 | (:require [clojure.string :as str]
3 | [sinostudy.pinyin.data :as pd]))
4 |
5 | ;;;; CC-CEDICT EMBEDDINGS
6 |
7 | (def refr
8 | "A pattern used in CC-CEDICT to embed a hanzi reference with Pinyin."
9 | #"[^ ,:\[a-zA-Z0-9]+\[[^\]]+\]+")
10 |
11 | ;; CLJS regex seems to have some issues with doing (str pp/hanzi-pattern),
12 | ;; so I've copied over whole implementation.
13 | (def hanzi
14 | "A pattern used in CC-CEDICT to embed a hanzi reference (no Pinyin)."
15 | (let [hanzi+ (str "[" (str/join (map str (vals pd/hanzi-unicode))) "]+")]
16 | (re-pattern (str hanzi+ "\\|?" hanzi+))))
17 |
18 | (def pinyin
19 | "A pattern used in CC-CEDICT to embed a Pinyin pronunciation."
20 | #"\[[a-zA-Z0-9 ]+\]+")
21 |
--------------------------------------------------------------------------------
/src/sinostudy/dictionary/load.clj:
--------------------------------------------------------------------------------
1 | (ns sinostudy.dictionary.load
2 | (:require [clojure.java.io :as io]
3 | [clojure.data.csv :as csv]
4 | [clojure.string :as str]
5 | [clj-json.core :as json]
6 | [sinostudy.dictionary.core :as d]
7 | [sinostudy.pinyin.core :as p]
8 | [sinostudy.pinyin.eval :as pe]))
9 |
10 | ;;;; CC-CEDICT
11 |
12 | (defn- u:->umlaut
13 | "Replace the CC-CEDICT substitute u: with the proper Pinyin ü."
14 | [pinyin]
15 | (str/replace pinyin "u:" "ü"))
16 |
17 | (defn- join-abbr
18 | "Join the uppercase letters in a CC-CEDICT Pinyin string into blocks."
19 | [pinyin]
20 | (let [abbr-letters #"([A-Z]( [A-Z])+)( |$)"
21 | remove-spaces #(str (str/replace (% 1) " " "") (% 3))]
22 | (str/replace pinyin abbr-letters remove-spaces)))
23 |
24 | (defn neutral-as-0
25 | "Convert the neutral tone digits (represented as 5 in CC-CEDICT) to 0.
26 | This ensures that the Pinyin strings are alphabetically sortable."
27 | [s]
28 | (if (pe/pinyin-block+digits? s)
29 | (str/replace s "5" "0")
30 | s))
31 |
32 | (defn split-defs
33 | "Split the CC-CEDICT definition string into separate, unique parts."
34 | [definition]
35 | (set (str/split definition #"/")))
36 |
37 | (defn line->cedict-listing
38 | "Extract the constituents of a line in a CC-CEDICT dictionary file.
39 | Returns a map representation suitable for use as a dictionary entry."
40 | [line]
41 | (let [pattern #"^([^ ]+) ([^ ]+) \[([^]]+)\] /(.+)/"
42 | [_ trad simp pinyin defs :as entry] (re-matches pattern line)]
43 | (when entry
44 | (let [pinyin* (u:->umlaut (neutral-as-0 pinyin))]
45 | {:traditional trad
46 | :simplified simp
47 | :pinyin (join-abbr pinyin*)
48 | :pinyin-key (d/pinyin-key (str/replace pinyin* #"\d" ""))
49 | :pinyin+digits-key (d/pinyin-key pinyin*)
50 | :pinyin+diacritics-key (d/pinyin-key (p/digits->diacritics pinyin*))
51 | :definitions (split-defs defs)}))))
52 |
53 | (defn load-cedict
54 | "Load the listings of a CC-CEDICT dictionary file into Clojure maps."
55 | [file]
56 | (with-open [reader (io/reader file)]
57 | (->> (doall (line-seq reader))
58 | (remove #(str/starts-with? % "#"))
59 | (map line->cedict-listing))))
60 |
61 |
62 | ;;;; WORD FREQUENCY
63 |
64 | (defn line->freq-listing
65 | "Extract the constituents of a line in a CC-CEDICT dictionary file.
66 | Returns a map representation suitable for use as a dictionary entry."
67 | [line]
68 | (let [re #"^([^ ]+) ([^ ]+) ([^ ]+)"
69 | [_ _ freq word :as entry] (re-matches re line)]
70 | (when entry
71 | {:frequency (Double/parseDouble freq)
72 | :word word})))
73 |
74 | (defn normalise
75 | "Normalise the frequency of a freq-listing."
76 | [max-freq freq-listing]
77 | (assoc freq-listing :frequency (/ (:frequency freq-listing)
78 | max-freq)))
79 |
80 | (defn load-freq-dict
81 | "Load the listings of 1 or more frequency files into a Clojure map."
82 | ([file]
83 | (with-open [reader (io/reader file)]
84 | (let [raw-listings (->> (doall (line-seq reader))
85 | (filter #(re-find #"^\d+ " %))
86 | (map line->freq-listing)
87 | (filter (comp not nil?)))
88 | max-freq (:frequency (first raw-listings))]
89 | (->> raw-listings
90 | (map (partial normalise max-freq))
91 | (reduce #(assoc %1 (:word %2) (:frequency %2)) {})))))
92 | ([file & files]
93 | (let [m (load-freq-dict file)
94 | ms (map load-freq-dict files)]
95 | (reduce (partial merge-with #(/ (+ %1 %2) 2)) m ms))))
96 |
97 |
98 | ;;;; CHARACTER COMPOSITION, ETYMOLOGY, ETC.
99 |
100 | (defn load-makemeahanzi
101 | "Load the listings of a makemeahanzi file into a Clojure map."
102 | [file]
103 | (with-open [reader (io/reader file)]
104 | (let [raw-listings (->> (doall (line-seq reader))
105 | (map json/parse-string))]
106 | (reduce #(assoc %1 (get %2 "character") %2) {} raw-listings))))
107 |
108 |
109 | ;;;; EXAMPLE SENTENCES + THEIR RELATIONS AND METADATA
110 | (defn load-sentences
111 | [sentences-file links-file]
112 | (with-open [sentences-reader (io/reader sentences-file)
113 | links-reader (io/reader links-file)]
114 | (let [entries (->> (csv/read-csv sentences-reader :separator \tab :quote \^)
115 | ;(take-nth 1000) ; TODO: remove
116 | (map (partial take 3))
117 | (filter (comp #{"eng" "cmn"} second))
118 | (doall))
119 |
120 | ;; We only want to keep stuff around that is present in both eng/cmn.
121 | cmn-ids (->> entries
122 | (filter (comp #{"cmn"} second))
123 | (set))
124 |
125 | ;; It seems like the links in this dataset include both directions.
126 | links (->> (csv/read-csv links-reader :separator \tab)
127 | (filter (comp cmn-ids second))
128 | (doall))] ;TODO: first, second?
129 | {:entries (count entries)
130 | :cmn-ids (count cmn-ids)
131 | :links (count links)})))
132 |
133 |
134 | (defn load-test
135 | []
136 | (load-sentences
137 | (str (System/getProperty "user.home") "/" "Code/sinostudy-data/"
138 | "tatoeba/sentences_detailed.csv")
139 | (str (System/getProperty "user.home") "/" "Code/sinostudy-data/"
140 | "tatoeba/links.csv")))
141 |
142 | ;;;; FULL DICTIONARY
143 |
144 | (defn- in-home
145 | "Expands to the current user's home directory + s."
146 | [s]
147 | (str (System/getProperty "user.home") "/" s))
148 |
149 | ;; Note: dict compilation requires the sinostudy-data git repo to be located in:
150 | ;; ~/Code/sinostudy-data
151 | (defn load-dict
152 | []
153 | (let [data #(in-home (str "Code/sinostudy-data/" %))
154 | listings (load-cedict
155 | (data "cedict_ts.u8"))
156 | freq-dict (load-freq-dict
157 | (data "frequency/internet-zh.num.txt")
158 | (data "frequency/giga-zh.num.txt"))
159 | makemeahanzi (load-makemeahanzi
160 | (data "makemeahanzi/dictionary.txt"))]
161 | (d/create-dict listings freq-dict makemeahanzi)))
162 |
--------------------------------------------------------------------------------
/src/sinostudy/events/actions.cljs:
--------------------------------------------------------------------------------
1 | (ns sinostudy.events.actions
2 | "For all events relating to actions triggered through the text input field,
3 | including displaying and navigating the `action-chooser`."
4 | (:require [re-frame.core :as rf]
5 | [sinostudy.pinyin.core :as p]
6 | [sinostudy.cofx :as cofx]
7 | [sinostudy.fx :as fx]))
8 |
9 | ;; Only dispatched when the action-chooser is open.
10 | (rf/reg-event-fx
11 | ::on-key-down
12 | (fn [{:keys [db] :as cofx} [_ key]]
13 | (let [{:keys [actions checked-action]} db
14 | next? (fn [k] (contains? #{"ArrowRight" "ArrowDown"} k))
15 | prev? (fn [k] (contains? #{"ArrowLeft" "ArrowUp"} k))
16 | valid-num? (fn [k] (let [num (js/parseInt k)]
17 | (and (int? num)
18 | (< 0 num (inc (count actions))))))]
19 | (cond
20 | (= "Escape" key)
21 | (rf/dispatch [::choose-action [::close-action-chooser]])
22 |
23 | (= "Enter" key)
24 | (rf/dispatch [::choose-action (nth actions checked-action)])
25 |
26 | (valid-num? key)
27 | (let [action (nth actions (dec (js/parseInt key)))]
28 | (rf/dispatch [::choose-action action]))
29 |
30 | ;; Starts from beginning when upper bound is crossed.
31 | (next? key)
32 | (let [bound (dec (count actions))
33 | n (if (< checked-action bound)
34 | (inc checked-action)
35 | 0)]
36 | (rf/dispatch [::check-action n]))
37 |
38 | ;; Goes to last action when lower bound is crossed.
39 | (prev? key)
40 | (let [n (if (> checked-action 0)
41 | (dec checked-action)
42 | (dec (count actions)))]
43 | (rf/dispatch [::check-action n]))))))
44 |
45 | (rf/reg-event-fx
46 | ::open-action-chooser
47 | [(rf/inject-cofx ::cofx/active-element)]
48 | (fn [{:keys [db ::cofx/active-element] :as cofx} _]
49 | (let [actions (:actions (first (:evaluations db)))]
50 | ;; Firefox won't get keydown events without removing focus from the input
51 | {::fx/blur active-element
52 | :db (-> db
53 | (assoc :checked-action 0)
54 | (assoc :actions (conj actions [::close-action-chooser])))})))
55 |
56 | (rf/reg-event-db
57 | ::close-action-chooser
58 | (fn [db _]
59 | (assoc db :actions nil)))
60 |
61 | (rf/reg-event-db
62 | ::check-action
63 | (fn [db [_ n]]
64 | (assoc db :checked-action n)))
65 |
66 | ;; Dispatched by user selecting an action in the action-chooser.
67 | ;; ::close-action-chooser (= cancel) is a special action (doesn't clear input).
68 | (rf/reg-event-fx
69 | ::choose-action
70 | (fn [_ [_ action]]
71 | (if (= [::close-action-chooser] action)
72 | {:dispatch-n [[::close-action-chooser]
73 | [::regain-input-focus]]}
74 | {:dispatch-n [[::close-action-chooser]
75 | action]})))
76 |
77 | ;; TODO: figure out a better way to regain focus for previously disabled field
78 | (rf/reg-event-fx
79 | ::regain-input-focus
80 | (fn [_ _]
81 | {::fx/set-focus [(.getElementById js/document "input-field") 100]}))
82 |
83 | (rf/reg-event-fx
84 | ::digits->diacritics
85 | (fn [{:keys [db] :as cofx} [_ input]]
86 | {:db (assoc db :input (p/digits->diacritics input))
87 | :dispatch [::regain-input-focus]}))
88 |
89 | (rf/reg-event-fx
90 | ::diacritics->digits
91 | (fn [{:keys [db] :as cofx} [_ input]]
92 | {:db (assoc db :input (p/diacritics->digits input))
93 | :dispatch [::regain-input-focus]}))
94 |
--------------------------------------------------------------------------------
/src/sinostudy/events/core.cljs:
--------------------------------------------------------------------------------
1 | (ns sinostudy.events.core
2 | "For miscellaneous events that do not have their own more specific namespace."
3 | (:require [clojure.string :as str]
4 | [clojure.set :as set]
5 | [cljs.spec.alpha :as s]
6 | [re-frame.core :as rf]
7 | [ajax.core :as ajax]
8 | [cognitect.transit :as transit]
9 | [sinostudy.spec.dictionary :as sd]
10 | [sinostudy.db :as db]
11 | [sinostudy.pinyin.core :as p]
12 | [sinostudy.pinyin.eval :as pe]
13 | [sinostudy.dictionary.core :as d]
14 | [sinostudy.navigation.pages :as pages]
15 | [sinostudy.events.scrolling :as scrolling]
16 | [sinostudy.events.actions :as actions]
17 | [sinostudy.cofx :as cofx]
18 | [sinostudy.fx :as fx]))
19 |
20 | ;; all responses from the Compojure backend are Transit-encoded
21 | (def transit-reader
22 | (transit/reader :json))
23 |
24 | (defn available-actions
25 | "Evaluate a query string to get a vector of possible actions."
26 | [query]
27 | (let [query* (p/with-umlaut query)
28 | pinyin-block? (or (pe/pinyin-block? query*)
29 | (pe/pinyin-block+digits? query*)
30 | (pe/pinyin-block+diacritics? query*))
31 | diacritics->digits? (and (pe/pinyin+diacritics+punct? query*)
32 | (not (pe/pinyin+punct? query*)))
33 | digits->diacritics? (and (pe/pinyin+digits+punct? query*)
34 | (not (pe/pinyin+punct? query*)))]
35 | (cond
36 | (pe/hanzi-block? query*)
37 | [[::look-up query*]]
38 |
39 | (re-find #"^\w" query*)
40 | (cond-> [[::look-up query*]]
41 |
42 | (and pinyin-block?
43 | (not= query query*))
44 | (conj [::look-up (d/pinyin-key query*)])
45 |
46 | digits->diacritics?
47 | (conj [::digits->diacritics query*])
48 |
49 | diacritics->digits?
50 | (conj [::diacritics->digits query*])))))
51 |
52 | (defn- cache-search-result-entries
53 | "Save the individual entries of a dictionary search result in the db.
54 | Note: this is a separate step from saving the search result itself!"
55 | [db content]
56 | (let [path [:pages ::pages/terms]
57 | entry-ks #{:english
58 | :pinyin
59 | :pinyin+diacritics
60 | :pinyin+digits}
61 | entries (->> (select-keys content entry-ks)
62 | (vals)
63 | (apply set/union))
64 | add-entry (fn [db entry]
65 | (assoc-in db (conj path (:term entry)) entry))]
66 | (reduce add-entry db entries)))
67 |
68 | (defn mk-input
69 | "What the input field should display based on a given page."
70 | [[category id :as page]]
71 | (cond
72 | (= ::pages/terms category) (when (not (pe/hanzi-block? id)) id)))
73 |
74 | ;;;; MISCELLANEOUS
75 |
76 | (rf/reg-event-db
77 | ::initialize-db
78 | (fn [_ _]
79 | db/initial-db))
80 |
81 | (rf/reg-event-db
82 | ::decompose-char
83 | (fn [db [_ decomposition]]
84 | (assoc db :decomposed decomposition)))
85 |
86 | (rf/reg-event-db
87 | ::change-script
88 | (fn [db [_ script]]
89 | (assoc db :script script)))
90 |
91 | (rf/reg-event-db
92 | ::set-result-filter
93 | (fn [db [_ term type]]
94 | (assoc-in db [:result-filters term] type)))
95 |
96 | (rf/reg-event-fx
97 | ::blur-active-element
98 | [(rf/inject-cofx ::cofx/active-element)]
99 | (fn [{:keys [::cofx/active-element] :as cofx} _]
100 | {::fx/blur active-element}))
101 |
102 |
103 | ;;;; EVALUATION
104 |
105 | (rf/reg-event-fx
106 | ::save-evaluation
107 | [(rf/inject-cofx ::cofx/now)]
108 | (fn [{:keys [db ::cofx/now] :as cofx} [_ query actions]]
109 | {:db (update db :evaluations conj {:query query
110 | :actions actions
111 | :timestamp now})}))
112 |
113 | ;; Only evaluates the latest input (no change while still writing).
114 | ;; This improves performance when coupled with delayed dispatching
115 | ;; also doesn't evaluate the same query twice in a row!
116 | (rf/reg-event-fx
117 | ::evaluate-input
118 | (fn [{:keys [db] :as cofx} [_ input]]
119 | (let [latest-evaluation (first (:evaluations db))
120 | latest-input? (= input (:input db))
121 | query (str/trim input)
122 | new-query? (not= query (:query latest-evaluation))
123 | actions (available-actions query)]
124 | (when (and latest-input? new-query?)
125 | {:dispatch-n [[::save-evaluation query actions]
126 | (when (and actions
127 | (= ::look-up (-> actions first first)))
128 | (first actions))]}))))
129 |
130 | ;; Dispatched every time the input field changes.
131 | ;; For performance reasons, non-blank queries are evaluated with a short lag
132 | ;; while blank queries are dispatched immediately for evaluation.
133 | ;; Immediate evaluation for blank input will override queued queries
134 | ;; this prevents any hint-changing misfires after clearing the input.
135 | ;; Otherwise, a queued query could modify the UI shortly after.
136 | (rf/reg-event-fx
137 | ::on-input-change
138 | (fn [{:keys [db] :as cofx} [_ input]]
139 | (let [delay (get-in db [:config :evaluation :delay])
140 | fx {:db (assoc db :input input)}]
141 | (if (str/blank? input)
142 | (assoc fx :dispatch [::evaluate-input input])
143 | (assoc fx :dispatch-later [{:dispatch [::evaluate-input input]
144 | :ms delay}])))))
145 |
146 |
147 | ;;;; CHANGING LOCATION & LOADING PAGE CONTENT
148 |
149 | ;;; Force an evaluation for the latest input if it hasn't been evaluated yet.
150 | (rf/reg-event-fx
151 | ::submit
152 | (fn [{:keys [db] :as cofx} [_ input]]
153 | (let [latest-eval (first (:evaluations db))
154 | query (str/trim input)
155 | new-query? (not= query (:query latest-eval))
156 | actions (if new-query?
157 | (available-actions query)
158 | (:actions latest-eval))
159 | n (count actions)]
160 | {:dispatch-n (cond-> []
161 | new-query? (conj [::save-evaluation query actions])
162 | (= n 1) (concat (conj actions [::blur-active-element]))
163 | (> n 1) (conj [::actions/open-action-chooser]))})))
164 |
165 | ;; Pages are loaded on-demand from either the frontend db or (if N/A) by sending
166 | ;; a request to the backend. Currently, only dictionary pages are supported.
167 | (rf/reg-event-fx
168 | ::load-page
169 | (fn [{:keys [db] :as cofx} [_ [category _ :as page]]]
170 | (let [{:keys [unknown-queries pages]} db]
171 | (when (= category ::pages/terms)
172 | (if (and (not (contains? unknown-queries page))
173 | (not (get-in pages page)))
174 | {:dispatch [::request page]}
175 | {:dispatch [::update-location page]})))))
176 |
177 | (rf/reg-event-fx
178 | ::enqueue
179 | [(rf/inject-cofx ::cofx/now)]
180 | (fn [{:keys [db ::cofx/now] :as cofx} [_ page]]
181 | {:db (update db :queue conj (with-meta page {:ts now}))}))
182 |
183 | (rf/reg-event-db
184 | ::dequeue
185 | (fn [db [_ page]]
186 | (update db :queue disj page)))
187 |
188 | ;; If a page doesn't exist in the frontend db, the backend will be contacted
189 | ;; through an Ajax request. While the request is underway, the requested page
190 | ;; is put in a queue. While requests are enqueued, they will not be retried.
191 | ;; Once a request for a page has been fulfilled or failed, the page will be
192 | ;; dequeued once again, allowing for new requests to be sent.
193 | (rf/reg-event-fx
194 | ::request
195 | (fn [{:keys [db queue] :as cofx} [_ [category id :as page]]]
196 | (let [uri (str (:query-uri db) "/" (name category) "/" id)]
197 | (when (not (contains? queue page))
198 | {:dispatch [::enqueue page]
199 | :http-xhrio {:method :get
200 | :timeout 5000
201 | :response-format (ajax/text-response-format)
202 | :on-success [::on-request-success]
203 | :on-failure [::on-request-failure]
204 | :uri uri}}))))
205 |
206 | (rf/reg-event-fx
207 | ::on-request-success
208 | [(rf/inject-cofx ::cofx/now)]
209 | (fn [{:keys [db ::cofx/now] :as cofx} [_ result]]
210 | (let [content (transit/read transit-reader result)
211 | page (:page content)]
212 | {:db (update db :queries conj {:state :success
213 | :content content
214 | :timestamp now})
215 | :dispatch-n [[::dequeue page]
216 | [::save-page content]]})))
217 |
218 | (rf/reg-event-fx
219 | ::on-request-failure
220 | [(rf/inject-cofx ::cofx/now)]
221 | (fn [{:keys [db ::cofx/now] :as cofx} [_ result]]
222 | {:db (update db :queries conj {:state :failure
223 | :content result
224 | :timestamp now})}))
225 |
226 | ;; Successful request to the backend lead to the retrieved page being saved in
227 | ;; the frontend db. In cases where a term does not have an associated page,
228 | ;; it is registered as unknown to prevent further retrieval attempts.
229 | (rf/reg-event-fx
230 | ::save-page
231 | (fn [_ [_ {:keys [page result]}]]
232 | (let [[category id] page]
233 | {:dispatch-n [(cond
234 | (nil? result) [::register-unknown-query id]
235 | (= category ::pages/terms) [::save-term page result])
236 | [::update-location page]]})))
237 |
238 | (rf/reg-event-db
239 | ::register-unknown-query
240 | (fn [db [_ term]]
241 | (update db :unknown-queries conj term)))
242 |
243 | ;; Store result directly and then store individual entries.
244 | ;; TODO: reduce overwrites for hanzi result?
245 | (rf/reg-event-db
246 | ::save-term
247 | (fn [db [_ [category id :as page] search-result]]
248 | (if-let [err (s/explain-data ::sd/search-result search-result)]
249 | (do (js/console.error (with-out-str (cljs.pprint/pprint err)))
250 | db)
251 | (-> db
252 | ;; Save the actual search result or dictionary entry in the db.
253 | (assoc-in [:pages category id] (-> search-result
254 | (d/reduce-result)
255 | (d/sort-result)))
256 |
257 | ;; Cache incidental, referenced entries for faster page rendering times.
258 | (cache-search-result-entries search-result)))))
259 |
260 | ;; Dispatched either directly by ::load-page or indirectly through a successful
261 | ;; backend request. This ensures that the address bar is only updated when
262 | ;; content actually exists.
263 | (rf/reg-event-fx
264 | ::update-location
265 | [(rf/inject-cofx ::cofx/pathname)]
266 | (fn [{:keys [db ::cofx/pathname] :as cofx} [_ [_ id :as page]]]
267 | (let [{:keys [input unknown-queries]} db]
268 | (when (and (= input id)
269 | (not (contains? unknown-queries id))
270 | (not (pages/equivalent? pathname page)))
271 | {::fx/navigate-to (pages/page->pathname page)}))))
272 |
273 | (rf/reg-event-fx
274 | ::change-location
275 | [(rf/inject-cofx ::cofx/now)
276 | (rf/inject-cofx ::cofx/scroll-state)]
277 | (fn [{:keys [db ::cofx/now ::cofx/scroll-state] :as cofx} [_ new-page]]
278 | (let [{:keys [input history]} db
279 | current-page (first history)]
280 | {:db (-> db
281 | (update :history conj (with-meta new-page now))
282 | (assoc :input (or input
283 | (mk-input new-page))))
284 | :dispatch-n [[::scrolling/save-scroll-state current-page scroll-state]
285 | [::load-page (pages/shortened new-page)]]})))
286 |
287 | (rf/reg-event-fx
288 | ::look-up
289 | (fn [_ [_ term]]
290 | {:dispatch [::load-page [::pages/terms term]]}))
291 |
--------------------------------------------------------------------------------
/src/sinostudy/events/scrolling.cljs:
--------------------------------------------------------------------------------
1 | (ns sinostudy.events.scrolling
2 | "For all events relating to preservation of scroll state when navigating
3 | the browser history."
4 | (:require [re-frame.core :as rf]
5 | [sinostudy.fx :as fx]))
6 |
7 | (rf/reg-event-db
8 | ::save-scroll-state
9 | (fn [db [_ page scroll-state]]
10 | (if (not (empty? scroll-state))
11 | (assoc-in db [:scroll-states page] scroll-state)
12 | db)))
13 |
14 | (rf/reg-event-db
15 | ::reset-scroll-state
16 | (fn [db [_ page]]
17 | (update db :scroll-states dissoc page)))
18 |
19 | (rf/reg-event-fx
20 | ::load-scroll-state
21 | (fn [{:keys [db]} [_ page]]
22 | {::fx/set-scroll-state (get-in db [:scroll-states page])}))
23 |
--------------------------------------------------------------------------------
/src/sinostudy/fx.cljs:
--------------------------------------------------------------------------------
1 | (ns sinostudy.fx
2 | (:require [re-frame.core :as rf]
3 | [accountant.core :as accountant]))
4 |
5 | ;; Dispatched by actions that need to change the page (and browser history).
6 | (rf/reg-fx
7 | ::navigate-to
8 | (fn [path]
9 | (accountant/navigate! path)))
10 |
11 | ;; Dispatched by ::close-action-chooser.
12 | ;; This is definitely a less than optimal solution...
13 | (rf/reg-fx
14 | ::set-focus
15 | (fn [[element delay]]
16 | (js/setTimeout
17 | #(.focus element)
18 | delay)))
19 |
20 | ;; Dispatched by ::close-action-chooser.
21 | ;; This is definitely a less than optimal solution...
22 | (rf/reg-fx
23 | ::blur
24 | (fn [element]
25 | (when element
26 | (.blur element))))
27 |
28 | ;; Dispatched by ::load-scroll-state.
29 | (rf/reg-fx
30 | ::set-scroll-state
31 | (fn [scroll-state]
32 | (doseq [[css-selector [x y]] scroll-state]
33 | (let [element (aget (js/document.querySelectorAll css-selector) 0)]
34 | (set! (.-scrollLeft element) x)
35 | (set! (.-scrollTop element) y)))))
36 |
--------------------------------------------------------------------------------
/src/sinostudy/macros/core.clj:
--------------------------------------------------------------------------------
1 | (ns sinostudy.macros.core
2 | (:refer-clojure :exclude [slurp]))
3 |
4 | (defmacro slurp [file]
5 | (clojure.core/slurp file))
6 |
--------------------------------------------------------------------------------
/src/sinostudy/navigation/handler.clj:
--------------------------------------------------------------------------------
1 | (ns sinostudy.navigation.handler
2 | (:import [java.io ByteArrayOutputStream])
3 | (:require [clojure.java.io :as io]
4 | [clojure.string :as str]
5 | [clojure.tools.reader :as reader]
6 | [compojure.core :refer :all]
7 | [compojure.route :as route]
8 | [ring.middleware.defaults :refer [wrap-defaults site-defaults]]
9 | [cognitect.transit :as transit]
10 | [org.httpkit.server :as hs]
11 | [mount.core :as mount :refer [defstate]]
12 | [mount-up.core :as mount-up]
13 | [sinostudy.navigation.pages :as pages]
14 | [sinostudy.dictionary.load :as dl]
15 | [sinostudy.dictionary.core :as d])
16 | (:gen-class))
17 |
18 | ;; TODO: split into dev/production
19 | ;; https://github.com/JulianBirch/cljs-ajax/blob/master/docs/server.md#cross-origin-requests
20 |
21 | ;; TODO: use coercions for regex check of input
22 | ;; https://weavejester.github.io/compojure/compojure.coercions.html
23 |
24 | (defstate config
25 | "System config file (EDN format)."
26 | :start (-> "config.edn" io/resource slurp reader/read-string))
27 |
28 | (defstate dict
29 | "Dictionary used for Chinese/English/pinyin term look-ups."
30 | :start (dl/load-dict))
31 |
32 | (def index
33 | (slurp (io/resource "public/index.html")))
34 |
35 | ;; First Access-Control header permits cross-origin requests.
36 | ;; Second prevents Chrome from stripping Content-Type header.
37 | (def ajax-headers
38 | {"Access-Control-Allow-Origin" "*"
39 | "Access-Control-Allow-Headers" "Content-Type"
40 | "Content-Type" "application/transit+json; charset=utf-8"})
41 |
42 | (defn transit-write [x]
43 | "Encode Clojure data using Transit (adapted from David Nolen's example)."
44 | (let [baos (ByteArrayOutputStream.)
45 | w (transit/writer baos :json)
46 | _ (transit/write w x)
47 | ret (.toString baos)]
48 | (.reset baos)
49 | ret))
50 |
51 | (defn ns-keywords
52 | "Convert a string separated by a delimiter into namespaced keywords."
53 | [re ns s]
54 | (if (string? s)
55 | (->> (str/split s re)
56 | (map (partial keyword (str ns)))
57 | (set))
58 | s))
59 |
60 | (defn execute-query
61 | "Execute a query from the ClojureScript app.
62 | The queries all resolve to a type, a query, and optional parameters."
63 | [type query opts]
64 | (cond
65 | (= ::pages/terms type) (d/look-up dict query)))
66 |
67 | (defn transit-result
68 | "Get the Transit-encoded result of a query."
69 | [type query opts]
70 | (transit-write {:page [type query]
71 | :result (execute-query type query opts)}))
72 |
73 | (defroutes app-routes
74 | ;; ANY rather than GET is necessary to allow cross origin requests during dev.
75 | (ANY "/query/:type/:query" [type query & opts]
76 | {:status 200
77 | :headers ajax-headers
78 | :body (transit-result (keyword (str 'sinostudy.navigation.pages) type)
79 | query
80 | opts)})
81 |
82 | ;; HTML page requests all resolve to the ClojureScript app.
83 | ;; The internal routing of the app creates the correct presentation.
84 | (ANY "*" [] index))
85 |
86 | ;; Allows web resources in the JAR (such as CSS and JS) to be fetched.
87 | ;; This is especially important in production, i.e. using html-kit.
88 | ;; Otherwise, the paths referencing them in index.html will return nothing.
89 | (defroutes resources-routes
90 | (route/resources "/" {:root "public"}))
91 |
92 | (def all-routes
93 | (routes resources-routes
94 | app-routes))
95 |
96 | (def app
97 | (wrap-defaults all-routes site-defaults))
98 |
99 | (defstate server
100 | "Server instance (http-kit)."
101 | :start (hs/run-server #'app {:port (get-in config [:server :port :internal] 8080)})
102 | :stop (server))
103 |
104 | (defn -main
105 | []
106 | (mount-up/on-upndown :info mount-up/log :before)
107 | (mount/start)
108 | (println (str "Listening on port " (get-in config [:server :port :internal] 8080))))
109 |
--------------------------------------------------------------------------------
/src/sinostudy/navigation/pages.cljc:
--------------------------------------------------------------------------------
1 | (ns sinostudy.navigation.pages
2 | "This namespace contains functions related to the page abstraction used in
3 | sino.study, as well as serving as a namespaced keyword prefix for the various
4 | page categories in use, e.g. ::pages/terms."
5 | (:require [clojure.string :as str]))
6 |
7 | (defn shortened
8 | "Helper function to make sure a page is maximum 2 items (category and id).
9 | Additional items do affect how a page is displayed, but still refer to the
10 | same basic data as the 2-item page."
11 | [page]
12 | (when (and page (> (count page) 1))
13 | (subvec page 0 2)))
14 |
15 | (defn page->pathname
16 | "Convert a page to a window.location.pathname."
17 | [page]
18 | (str "/" (str/join "/" (map name page))))
19 |
20 | (defn equivalent?
21 | "Is the window.location.pathname equivalent to the given page?"
22 | [pathname page]
23 | (= pathname (page->pathname page)))
24 |
--------------------------------------------------------------------------------
/src/sinostudy/navigation/routes.cljs:
--------------------------------------------------------------------------------
1 | (ns sinostudy.navigation.routes
2 | (:require-macros [secretary.core :refer [defroute]])
3 | (:import goog.History)
4 | (:require [secretary.core :as secretary]
5 | [re-frame.core :as rf]
6 | [sinostudy.events.core :as events]
7 | [sinostudy.navigation.pages :as pages]
8 | [accountant.core :as accountant]))
9 |
10 | ;; Since scroll restoration differs in implementation between e.g. Firefox
11 | ;; and Chrome -- and neither implementations are good enough -- the safest
12 | ;; choice is to carefully disable scroll restoration (default: "automatic").
13 | (when (exists? js/window.history.scrollRestoration)
14 | (set! js/window.history.scrollRestoration "manual"))
15 |
16 | (defn app-routes []
17 | ;; This prefixes routes with a hash for compatibility with older browsers
18 | ;; however, it might not be necessary if I don't need to support IE 9
19 | ;; furthermore, it may impede on some other functionality.
20 | (secretary/set-config! :prefix "#")
21 |
22 | ;; Combining the root route with the other page routes doesn't seem to work.
23 | (defroute "/" []
24 | (rf/dispatch [::events/change-location [::pages/static "/"]]))
25 |
26 | (defroute "/:page" [page]
27 | (rf/dispatch [::events/change-location [::pages/static (str "/" page)]]))
28 |
29 | (defroute
30 | (str "/" (name :pages/terms) "/:term") [term]
31 | (rf/dispatch [::events/change-location [::pages/terms term]]))
32 |
33 | (defroute
34 | (str "/" (name ::pages/terms) "/:term/:attribute") [term attribute]
35 | (rf/dispatch [::events/change-location [::pages/terms term attribute]]))
36 |
37 | (defroute
38 | "*" []
39 | (rf/dispatch [::events/change-location [::pages/static "/404"]])))
40 |
41 | ;; Following instructions from: https://github.com/venantius/accountant
42 | (accountant/configure-navigation!
43 | {:nav-handler (fn [path] (secretary/dispatch! path))
44 | :path-exists? (fn [path] (secretary/locate-route path))})
45 |
--------------------------------------------------------------------------------
/src/sinostudy/pinyin/core.cljc:
--------------------------------------------------------------------------------
1 | (ns sinostudy.pinyin.core
2 | (:require [clojure.string :as str]
3 | [sinostudy.rim.core :as rim]
4 | [sinostudy.pinyin.patterns :as patterns]
5 | [sinostudy.pinyin.data :as data]))
6 |
7 | (defn parse-int
8 | "Parses a string s into an integer."
9 | [s]
10 | #?(:clj (Integer/parseInt s)
11 | :cljs (js/parseInt s)))
12 |
13 | (defn with-umlaut
14 | "Replace the common substitute letter V in s with the proper Pinyin Ü."
15 | [s]
16 | (-> s
17 | (str/replace \v \ü)
18 | (str/replace \V \Ü)))
19 |
20 | (defn with-diacritic
21 | "Get the diacriticised char based on Pinyin tone (0 through 5)."
22 | [char tone]
23 | (nth (data/diacritics char) tone))
24 |
25 | ;; derived from this guideline: http://www.pinyin.info/rules/where.html
26 | (defn diacritic-index
27 | "Get the index in s where a diacritic should be put according to Pinyin rules;
28 | s is a Pinyin syllable with/without an affixed digit (e.g. wang2 or lao)."
29 | [s]
30 | (let [s* (re-find #"[^\d]+" (str/lower-case s))]
31 | (cond
32 | (not (string? s)) nil
33 | (empty? s*) nil
34 | (str/includes? s* "a") (str/index-of s* "a")
35 | (str/includes? s* "e") (str/index-of s* "e")
36 | (str/includes? s* "ou") (str/index-of s* "o")
37 | :else (if-let [index (str/last-index-of s* "n")]
38 | (- index 1)
39 | (- (count s*) 1)))))
40 |
41 | (defn handle-m
42 | "Handle the super rare, special case final, m."
43 | [s]
44 | (let [tone (parse-int (str (last s)))
45 | skip (if (= \M (first s)) 6 0)]
46 | (nth data/m-diacritics (+ tone skip))))
47 |
48 | (defn digit->diacritic
49 | "Convert a Pinyin syllable/final s with an affixed tone digit into one with a
50 | tone diacritic. When converting more than a single syllable at a time,
51 | use digits->diacritics instead!"
52 | [s]
53 | (cond
54 | (or (empty? s) (nil? s)) s
55 | (re-matches #"[mM]\d" s) (handle-m s)
56 | :else (let [tone (parse-int (str (last s)))
57 | s* (subs s 0 (dec (count s)))
58 | char (nth s (diacritic-index s))
59 | char+diacritic (with-diacritic char tone)]
60 | (str/replace s* char char+diacritic))))
61 |
62 | ;; used by diacritic-string to find the bounds of the last Pinyin final
63 | (defn- last-final
64 | "Take a string with a single affixed tone digit as input and returns the
65 | longest allowed Pinyin final + the digit. The Pinyin final that is returned
66 | is the one immediately before the digit, i.e. the last final."
67 | [s]
68 | (let [digit (last s)
69 | end (dec (count s)) ; decrementing b/c of affixed digit
70 | length (if (< end 4) end 4) ; most cases will be <4
71 | start (- end length)]
72 | (loop [candidate (subs s start end)]
73 | (cond
74 | (empty? candidate) nil
75 | (contains? data/finals (str/lower-case candidate)) (str candidate digit)
76 | :else (recur (apply str (rest candidate)))))))
77 |
78 | (defn- handle-r
79 | "Handle the common special case final, r."
80 | [s]
81 | (str/replace s #"\d" ""))
82 |
83 | ;; used by digits->diacritics to convert tone digits into diacritics
84 | (defn- diacritic-string
85 | "Take a string with a single affixed tone digit as input and substitutes the
86 | digit with a tone diacritic. The diacritic is placed in the Pinyin final
87 | immediately before tone digit."
88 | [s]
89 | (if (contains? #{"r5" "R5" "r0" "R0"} (str/trim s))
90 | (handle-r s)
91 | (let [final (last-final s)
92 | final+diacritic (digit->diacritic final)
93 | ;; prefix = preceding neutral tone syllables + the initial
94 | prefix (subs s 0 (- (count s) (count final)))]
95 | (str prefix final+diacritic))))
96 |
97 | (defn digits->diacritics
98 | "Convert a Pinyin string s with one or several tone digits into a string with
99 | tone diacritics. The digits 0, 1, 2, 3, 4, and 5 can be used as tone markers
100 | behind any Pinyin final in the block. Postfixing 0 or 5 (or nothing) will
101 | result in no diacritic being added, i.e. marking a neutral tone. Furthermore,
102 | any occurrence of V is treated as and implicitly converted into a Ü."
103 | [s & {:keys [v-as-umlaut?] :or {v-as-umlaut? false}}]
104 | (if (not (string? s))
105 | s
106 | (let [s* (if v-as-umlaut? (with-umlaut s) s)
107 | digit-strings (re-seq #"[^\d]+\d" s*)
108 | diacritic-strings (map diacritic-string digit-strings)
109 | suffix (re-seq #"[^\d]+$" s*)]
110 | (apply str (concat diacritic-strings suffix)))))
111 |
112 | ;; used by the pinyin+diacritics? (allows for evaluation as plain Pinyin)
113 | (defn no-diacritics
114 | "Replace those characters in the input string s that have Pinyin diacritics
115 | with standard characters."
116 | ([s] (no-diacritics s data/diacritic-patterns))
117 | ([s [[replacement match] & xs]]
118 | (if (nil? match)
119 | s
120 | (recur (str/replace s match replacement) xs))))
121 |
122 | (defn no-digits
123 | "Remove digits from the input string."
124 | [s]
125 | (str/replace s #"[0-9]" ""))
126 |
127 | (defn- char->tone
128 | "Get the tone (0-4) based on a char."
129 | [char]
130 | (loop [tone 1]
131 | (cond
132 | (or (= nil char) (= 5 tone)) 0
133 | (re-matches (get data/tone-diacritics tone) char) tone
134 | :else (recur (inc tone)))))
135 |
136 | (defn- replace-at
137 | "Like clojure.string/replace, but replaces between index from and to (excl)."
138 | [s from to replacement]
139 | (str (subs s 0 from) replacement (subs s to)))
140 |
141 | (defn- diacritics->digits*
142 | "Replaces in s based on a replacements vector."
143 | [s replacements]
144 | (loop [skip 0
145 | s* s
146 | replacements* replacements]
147 | (if-let [[from syllable tone] (first replacements*)]
148 | (recur (if tone (inc skip) skip)
149 | (replace-at s*
150 | (+ skip from)
151 | (+ skip from (count syllable))
152 | (str syllable tone))
153 | (rest replacements*))
154 | s*)))
155 |
156 | (defn diacritics->digits
157 | "Convert a Pinyin string s with tone diacritics into one with tone digits."
158 | [s]
159 | (let [s* (no-diacritics s)
160 | syllables (rim/re-pos patterns/pinyin-syllable s*)
161 | original #(subs s (first %) (+ (first %) (count (second %))))
162 | diacritic #(re-find #"[^\w]" %)
163 | tone (comp #(if (= 0 %) nil %) char->tone diacritic original)]
164 | (diacritics->digits* s (map (juxt first second tone) syllables))))
165 |
--------------------------------------------------------------------------------
/src/sinostudy/pinyin/data.cljc:
--------------------------------------------------------------------------------
1 | (ns sinostudy.pinyin.data)
2 |
3 | ;; also includes special case initials w and y (technically not initials)
4 | (def initials
5 | #{"b" "p" "m" "f" "d" "t" "n" "l"
6 | "g" "k" "h" "j" "q" "x" "z" "c"
7 | "s" "zh" "ch" "sh" "r" "w" "y"})
8 |
9 | ;; includes all possible forms in use (e.g. "ue" as shorthand for "üe")
10 | ;; r is a common special case final (technically not a final)
11 | ;; m is a super rare, special case final
12 | (def finals
13 | #{"a" "ai" "an" "ang" "ao"
14 | "e" "ei" "en" "eng" "er"
15 | "i" "ia" "ian" "iang" "iao" "ie" "in" "ing" "iong" "iu"
16 | "m"
17 | "o" "ong" "ou"
18 | "r"
19 | "u" "ua" "uai" "uan" "uang" "ue" "ui" "un" "uo"
20 | "ü" "üe"})
21 |
22 | ;; the index of a character correspond to the tone present at that index
23 | ;; indexes 0 and 5 both represent neutral tone (= no diacritics)
24 | (def diacritics
25 | {\a "aāáǎàa", \A "AĀÁǍÀA"
26 | \o "oōóǒòo", \O "OŌÓǑÒO"
27 | \e "eēéěèe", \E "EĒÉĚÈE"
28 | \u "uūúǔùu", \U "UŪÚǓÙU"
29 | \i "iīíǐìi", \I "IĪÍǏÌI"
30 | \ü "üǖǘǚǜü", \Ü "ÜǕǗǙǛÜ"})
31 |
32 | ;; m is a super rare, special case final
33 | ;; the vec is index-aligned like the diacritics above (skip 6 for upper case)
34 | ;; note: the diacriticised versions are multi-char and may ruin formatting!
35 | (def m-diacritics
36 | ["m" "m̄" "ḿ" "m̌" "m̀" "m"
37 | "M" "M̄" "Ḿ" "M̌" "M̀" "M"])
38 |
39 | ;; only used to search and replace diacritics
40 | ;; also handles special case diacritic char, m
41 | (def diacritic-patterns
42 | {"a" #"[āáǎà]", "A" #"[ĀÁǍÀ]"
43 | "o" #"[ōóǒò]", "O" #"[ŌÓǑÒ]"
44 | "e" #"[ēéěè]", "E" #"[ĒÉĚÈ]"
45 | "u" #"[ūúǔù]", "U" #"[ŪÚǓÙ]"
46 | "i" #"[īíǐì]", "I" #"[ĪÍǏÌ]"
47 | "ü" #"[ǖǘǚǜ]", "Ü" #"[ǕǗǙǛ]"
48 | "m" #"(m̄|ḿ|m̌|m̀)" "M" #"(M̄|Ḿ|M̌|M̀)"})
49 |
50 | ;; used to match diacritics to tones in diacritics->digits
51 | (def tone-diacritics
52 | {1 #"(ā|ō|ē|ū|ī|ǖ|m̄|Ā|Ō|Ē|Ū|Ī|Ǖ|M̄)"
53 | 2 #"(á|ó|é|ú|í|ǘ|ḿ|Á|Ó|É|Ú|Í|Ǘ|Ḿ)"
54 | 3 #"(ǎ|ǒ|ě|ǔ|ǐ|ǚ|m̌|Ǎ|Ǒ|Ě|Ǔ|Ǐ|Ǚ|M̌)"
55 | 4 #"(à|ò|è|ù|ì|ǜ|m̀|À|Ò|È|Ù|Ì|Ǜ|M̀)"})
56 |
57 | ;; adapted from http://pinyin.info/rules/initials_finals.html
58 | ;; some non-standard syllables have been added: fiao, lo, m, r, sei, yo
59 | (def syllables
60 | #{"a" "ai" "an" "ang" "ao"
61 |
62 | "ba" "bai" "ban" "bang" "bao" "bei" "ben" "beng" "bi" "bian" "biao" "bie"
63 | "bin" "bing" "bo" "bu"
64 |
65 | "ca" "cai" "can" "cang" "cao" "ce" "cen" "ceng" "cha" "chai" "chan" "chang"
66 | "chao" "che" "chen" "cheng" "chi" "chong" "chou" "chu" "chua" "chuai"
67 | "chuan" "chuang" "chui" "chun" "chuo" "ci" "cong" "cou" "cu" "cuan" "cui"
68 | "cun" "cuo"
69 |
70 | "da" "dai" "dan" "dang" "dao" "de" "dei" "den" "deng" "di" "dia" "dian"
71 | "diao" "die" "ding" "diu" "dong" "dou" "du" "duan" "dui" "dun" "duo"
72 |
73 | "e" "ei" "en" "eng" "er"
74 |
75 | "fa" "fan" "fang" "fei" "fen" "feng" "fiao" "fo" "fou" "fu"
76 |
77 | "ga" "gai" "gan" "gang" "gao" "ge" "gei" "gen" "geng" "gong" "gou" "gu"
78 | "gua" "guai" "guan" "guang" "gui" "gun" "guo"
79 |
80 | "ha" "hai" "han" "hang" "hao" "he" "hei" "hen" "heng" "hong" "hou" "hu"
81 | "hua" "huai" "huan" "huang" "hui" "hun" "huo"
82 |
83 | "ji" "jia" "jian" "jiang" "jiao" "jie" "jin" "jing" "jiong" "jiu" "ju"
84 | "juan" "jue" "jun"
85 |
86 | "ka" "kai" "kan" "kang" "kao" "ke" "kei" "ken" "keng" "kong" "kou" "ku"
87 | "kua" "kuai" "kuan" "kuang" "kui" "kun" "kuo"
88 |
89 | "la" "lai" "lan" "lang" "lao" "le" "lei" "leng" "li" "lia" "lian" "liang"
90 | "liao" "lie" "lin" "ling" "liu" "lo" "long" "lou" "lu" "luan" "lun" "luo"
91 | "lü" "lüe"
92 |
93 | "m" "ma" "mai" "man" "mang" "mao" "me" "mei" "men" "meng" "mi" "mian" "miao"
94 | "mie" "min" "ming" "miu" "mo" "mou" "mu"
95 |
96 | "na" "nai" "nan" "nang" "nao" "ne" "nei" "nen" "neng" "ni" "nian" "niang"
97 | "niao" "nie" "nin" "ning" "niu" "nong" "nou" "nu" "nuan" "nun" "nuo" "nü"
98 | "nüe"
99 |
100 | "o" "ou"
101 |
102 | "pa" "pai" "pan" "pang" "pao" "pei" "pen" "peng" "pi" "pian" "piao" "pie"
103 | "pin" "ping" "po" "pou" "pu"
104 |
105 | "qi" "qia" "qian" "qiang" "qiao" "qie" "qin" "qing" "qiong" "qiu" "qu"
106 | "quan" "que" "qun"
107 |
108 | "r" "ran" "rang" "rao" "re" "ren" "reng" "ri" "rong" "rou" "ru" "rua" "ruan"
109 | "rui" "run" "ruo"
110 |
111 | "sa" "sai" "san" "sang" "sao" "se" "sei" "sen" "seng" "sha" "shai" "shan"
112 | "shang" "shao" "she" "shei" "shen" "sheng" "shi" "shou" "shu" "shua" "shuai"
113 | "shuan" "shuang" "shui" "shun" "shuo" "si" "song" "sou" "su" "suan" "sui"
114 | "sun" "suo"
115 |
116 | "ta" "tai" "tan" "tang" "tao" "te" "tei" "teng" "ti" "tian" "tiao" "tie"
117 | "ting" "tong" "tou" "tu" "tuan" "tui" "tun" "tuo"
118 |
119 | "wa" "wai" "wan" "wang" "wei" "wen" "weng" "wo" "wu"
120 |
121 | "xi" "xia" "xian" "xiang" "xiao" "xie" "xin" "xing" "xiong" "xiu" "xu"
122 | "xuan" "xun" "xue"
123 |
124 | "ya" "yan" "yang" "yao" "ye" "yi" "yin" "ying" "yo" "yong" "you" "yu" "yuan"
125 | "yun" "yue"
126 |
127 | "za" "zai" "zan" "zang" "zao" "ze" "zei" "zen" "zeng" "zha" "zhai" "zhan"
128 | "zhang" "zhao" "zhe" "zhei" "zhen" "zheng" "zhi" "zhong" "zhou" "zhu" "zhua"
129 | "zhuai" "zhuan" "zhuang" "zhui" "zhun" "zhuo" "zi" "zong" "zou" "zu" "zuan"
130 | "zui" "zun" "zuo"})
131 |
132 | (def decomposition-symbols
133 | "Unicode range for the so-called 'Ideographic Description Characters'.
134 | They are used for decomposition of Hanzi."
135 | #"\u2FF0-\u2FFF")
136 |
137 | ;; from http://kourge.net/projects/regexp-unicode-block
138 | (def hanzi-unicode
139 | {"CJK Radicals Supplement" #"\u2E80-\u2EFF"
140 | "Kangxi Radicals" #"\u2F00-\u2FDF"
141 | "CJK Symbols and Punctuation" #"\u3000-\u303F"
142 | "CJK Strokes" #"\u31C0-\u31EF"
143 | "Enclosed CJK Letters and Months" #"\u3200-\u32FF"
144 | "CJK Compatibility" #"\u3300-\u33FF"
145 | "CJK Unified Ideographs Extension A" #"\u3400-\u4DBF"
146 | "Yijing Hexagram Symbols" #"\u4DC0-\u4DFF"
147 | "CJK Unified Ideographs" #"\u4E00-\u9FFF"
148 | "CJK Compatibility Ideographs" #"\uF900-\uFAFF"
149 | "CJK Compatibility Forms" #"\uFE30-\uFE4F"})
150 |
--------------------------------------------------------------------------------
/src/sinostudy/pinyin/eval.cljc:
--------------------------------------------------------------------------------
1 | (ns sinostudy.pinyin.eval
2 | (:require #?(:clj [clojure.spec.alpha :as spec]
3 | :cljs [cljs.spec.alpha :as spec])
4 | [clojure.string :as str]
5 | [sinostudy.pinyin.core :as p]
6 | [sinostudy.pinyin.patterns :as patterns]))
7 |
8 | (defn pinyin-syllable?
9 | "Is this a single Pinyin syllable (no digits or diacritics allowed)?"
10 | [s]
11 | (re-matches patterns/pinyin-syllable s))
12 |
13 | (defn pinyin-block?
14 | "Is this a plain block of Pinyin (no digits or diacritics allowed)?
15 | Also checks string in reverse to prevent false negatives, e.g. hanguo."
16 | [s]
17 | (or (re-matches patterns/pinyin-block s)
18 | (re-matches patterns/pinyin-rev-block (str/join (reverse s)))))
19 |
20 | ;; TODO: does this need to be changed similar to pinyin-block?
21 | (defn pinyin+punct?
22 | "Is this a sentence containing Pinyin without any tone digits or diacritics?"
23 | [s]
24 | (re-matches patterns/pinyin+punct s))
25 |
26 | (defn pinyin-block+digits?
27 | "Is this a block of Pinyin with tone digits?"
28 | [s]
29 | (re-matches patterns/pinyin+digits s))
30 |
31 | (defn pinyin+digits+punct?
32 | "Is this a sentence containing Pinyin with tone digits?"
33 | [s]
34 | (re-matches patterns/pinyin+digits+punct s))
35 |
36 | (defn pinyin-block+diacritics?
37 | "Is this a block of Pinyin with tone diacritics?
38 | Note that this function does not validate the *placement* of diacritics!"
39 | [s]
40 | (pinyin-block? (p/no-diacritics s)))
41 |
42 | (defn pinyin+diacritics+punct?
43 | "Is this a sentence containing Pinyin with tone diacritics?
44 | Note that this function does not validate the *placement* of diacritics!"
45 | [s]
46 | (pinyin+punct? (p/no-diacritics s)))
47 |
48 | (defn hanzi-block?
49 | [s]
50 | (re-matches patterns/hanzi-block s))
51 |
52 | (spec/def ::pinyin-syllable pinyin-syllable?)
53 |
54 | (spec/def ::pinyin-block pinyin-block?)
55 |
56 | (spec/def ::pinyin-block+digits pinyin-block+digits?)
57 |
58 | (spec/def ::pinyin+digits+punct pinyin+digits+punct?)
59 |
60 | (spec/def ::pinyin-block+diacritics pinyin-block+diacritics?)
61 |
62 | (spec/def ::pinyin+diacritics+punct pinyin+diacritics+punct?)
63 |
64 | (spec/def ::hanzi-block hanzi-block?)
65 |
--------------------------------------------------------------------------------
/src/sinostudy/pinyin/patterns.cljc:
--------------------------------------------------------------------------------
1 | (ns sinostudy.pinyin.patterns
2 | (:require [clojure.string :as str]
3 | [sinostudy.pinyin.data :as data]))
4 |
5 | ;; reverse-sorting the list of syllables prevents eager resolution in JS regex
6 | ;; otherwise syllables like "wang" will not match (they eagerly resolve to "wa")
7 | (def rev-syllables
8 | (reverse (sort data/syllables)))
9 |
10 | ;; This crazy concoction is used to validate Pinyin such as "hanguo".
11 | ;; If only checking front to back, it's read as "hang" + "uo", i.e. invalid.
12 | ;; By also validating the block in reverse, we get around this issue.
13 | (def rev-rev-syllables
14 | (reverse (map #(str/join (reverse %)) (sort data/syllables))))
15 |
16 | (def syllable
17 | (str "(" (str/join "|" rev-syllables) ")"))
18 |
19 | (def rev-syllable
20 | (str "(" (str/join "|" rev-rev-syllables) ")"))
21 |
22 | (def syllable+digit
23 | (str "((" (str/join "|" rev-syllables) ")[012345]?)"))
24 |
25 | (def block
26 | (let [syllable+ (str syllable "+")
27 | syllable* (str "('?" syllable ")*")]
28 | (str "(" syllable+ syllable* ")")))
29 |
30 | (def rev-block
31 | (let [syllable+ (str rev-syllable "+")
32 | syllable* (str "('?" rev-syllable ")*")]
33 | (str "(" syllable+ syllable* ")")))
34 |
35 | (def block+digit
36 | (let [syllable+digit+ (str syllable+digit "+")
37 | syllable+digit* (str "('?" syllable+digit ")*")]
38 | (str "(" syllable+digit+ syllable+digit* ")")))
39 |
40 | ;; note: technically matches non-Latin, e.g. also matches hanzi
41 | (def punct
42 | "[^\\w]+")
43 |
44 | (def pinyin-syllable
45 | (re-pattern (str "(?i)" syllable)))
46 |
47 | (def pinyin-block
48 | (re-pattern (str "(?i)" block)))
49 |
50 | (def pinyin-rev-block
51 | (re-pattern (str "(?i)" rev-block)))
52 |
53 | (def pinyin+punct
54 | (re-pattern (str "(?i)" block "(" block "|" punct ")*")))
55 |
56 | (def pinyin+digits
57 | (re-pattern (str "(?i)" block+digit)))
58 |
59 | (def pinyin+digits+punct
60 | (re-pattern (str "(?i)" block+digit "(" block+digit "|" punct ")*")))
61 |
62 | (def hanzi-block
63 | (re-pattern (str "[" (str/join (map str (vals data/hanzi-unicode))) "]+")))
64 |
--------------------------------------------------------------------------------
/src/sinostudy/rim/core.cljc:
--------------------------------------------------------------------------------
1 | (ns sinostudy.rim.core
2 | (:require [clojure.string :as str]))
3 |
4 | ;;;; TEXT-RELATED FUNCTIONS
5 |
6 | ;; based on code examples from StackOverflow:
7 | ;; https://stackoverflow.com/questions/3262195/compact-clojure-code-for-regular-expression-matches-and-their-position-in-string
8 | ;; https://stackoverflow.com/questions/18735665/how-can-i-get-the-positions-of-regex-matches-in-clojurescript
9 | (defn re-pos
10 | "Like re-seq, but returns a map of indexes to matches, not a seq of matches."
11 | [re s]
12 | #?(:clj (loop [out {}
13 | m (re-matcher re s)]
14 | (if (.find m)
15 | (recur (assoc out (.start m) (.group m)) m)
16 | out))
17 | :cljs (let [flags (fn [re]
18 | (let [m? (.-multiline re)
19 | i? (.-ignoreCase re)]
20 | (str "g" (when m? "m") (when i? "i"))))
21 | re (js/RegExp. (.-source re) (flags re))]
22 | (loop [out {}]
23 | (if-let [m (.exec re s)]
24 | (recur (assoc out (.-index m) (first m)))
25 | out)))))
26 |
27 | (defn- re-handle*
28 | "Helper function for re-handle. Only takes strings."
29 | [s re f]
30 | (let [matches (re-seq re s)]
31 | (if (empty? matches)
32 | s
33 | (let [others (str/split s re)
34 | ;; Dealing with weird behaviour present in Java/JS implementations
35 | ;; causing empty strings as the first split result.
36 | others* (if (= "" (first others))
37 | (rest others)
38 | others)
39 | results (map f matches)
40 | [c1 c2] (if (str/starts-with? s (first matches))
41 | [results others*]
42 | [others* results])
43 | c3 (if (> (count c1) (count c2))
44 | (subvec (vec c1) (count c2))
45 | (subvec (vec c2) (count c1)))]
46 | (concat (vec (interleave c1 c2)) c3)))))
47 |
48 | (defn re-handle
49 | "Split s based on re and reinsert the matches of re in s with f applied.
50 | If s is sequential, then will apply f to matches inside any strings in s.
51 | Note: can be chained -- very useful for creating hiccup data out of a string."
52 | [s re f]
53 | (if (sequential? s)
54 | (map #(if (string? %) (re-handle* % re f) %) s)
55 | (re-handle* s re f)))
56 |
--------------------------------------------------------------------------------
/src/sinostudy/spec/dictionary.cljc:
--------------------------------------------------------------------------------
1 | (ns sinostudy.spec.dictionary
2 | "Contains all specs pertaining to dictionary entries and search results."
3 | (:require [clojure.spec.alpha :as s]
4 | [clojure.string :as str]))
5 |
6 | ;; There's no built-in predicate for this.
7 | (s/def ::non-blank-string
8 | (s/and string?
9 | (complement str/blank?)))
10 |
11 | ;; TODO: expand on this
12 | (s/def ::hanzi
13 | ::non-blank-string)
14 |
15 | ;; TODO: expand on this
16 | (s/def ::pinyin+digits
17 | ::non-blank-string)
18 |
19 | (s/def ::term
20 | ::non-blank-string)
21 |
22 | (s/def ::script
23 | #{:simplified
24 | :traditional})
25 |
26 | (s/def ::scripts
27 | (s/coll-of ::script :kind set? :into #{}))
28 |
29 | (s/def ::definition
30 | ::non-blank-string)
31 |
32 | (s/def ::definitions
33 | (s/coll-of ::definition :kind set? :into #{}))
34 |
35 | (s/def ::uses
36 | (s/map-of ::pinyin+digits ::definitions))
37 |
38 | (s/def ::variations
39 | (s/map-of ::script (s/coll-of ::hanzi :kind set? :into #{})))
40 |
41 | (s/def :classifier/traditional
42 | ::hanzi)
43 |
44 | (s/def :classifier/simplified
45 | ::hanzi)
46 |
47 | (s/def :classifier/pinyin
48 | (s/coll-of ::pinyin+digits))
49 |
50 | (s/def ::classifier
51 | (s/keys :req-un [:classifier/traditional
52 | :classifier/simplified
53 | :classifier/pinyin]))
54 |
55 | (s/def ::classifiers
56 | (s/coll-of ::classifier :kind set? :into #{}))
57 |
58 | ;; TODO: expand on this
59 | (s/def ::decomposition
60 | string?)
61 |
62 | (s/def ::frequency
63 | (s/double-in :min 0
64 | :max 1))
65 |
66 | (s/def ::radical
67 | ::hanzi)
68 |
69 | (s/def ::type
70 | #{"ideographic"
71 | "pictographic"
72 | "pictophonetic"})
73 |
74 | (s/def ::phonetic
75 | ::hanzi)
76 |
77 | (s/def ::semantic
78 | ::hanzi)
79 |
80 | (s/def ::hint
81 | string?)
82 |
83 | ;; See: https://www.skishore.me/makemeahanzi/
84 | (s/def ::etymology
85 | (s/keys :req-un [::type]
86 | :opt-un [::phonetic
87 | ::semantic
88 | ::hint]))
89 |
90 | (s/def ::entry
91 | (s/keys :req-un [::term
92 | ::scripts
93 | ::uses]
94 | :opt-un [::radical
95 | ::frequency
96 | ::variations
97 | ::classifiers
98 | ::etymology]))
99 |
100 | (s/def :search-result/hanzi
101 | ::entry)
102 |
103 | (s/def :search-result/pinyin
104 | (s/coll-of ::entry))
105 |
106 | (s/def :search-result/pinyin+digits
107 | (s/coll-of ::entry))
108 |
109 | (s/def :search-result/pinyin+diacritics
110 | (s/coll-of ::entry))
111 |
112 | (s/def :search-result/english
113 | (s/coll-of ::entry))
114 |
115 | (s/def ::search-result
116 | (s/keys :req-un [::term]
117 | :opt-un [:search-result/hanzi
118 | :search-result/pinyin
119 | :search-result/pinyin+digits
120 | :search-result/pinyin+diacritics
121 | :search-result/english]))
122 |
--------------------------------------------------------------------------------
/src/sinostudy/spec/pages.cljc:
--------------------------------------------------------------------------------
1 | (ns sinostudy.spec.pages
2 | (:require [clojure.spec.alpha :as s]))
3 |
4 | (s/def ::category
5 | #{:term :static})
6 |
7 | (s/def ::page
8 | (s/tuple ::category string?))
9 |
--------------------------------------------------------------------------------
/src/sinostudy/subs.cljs:
--------------------------------------------------------------------------------
1 | (ns sinostudy.subs
2 | (:require [re-frame.core :as rf]
3 | [sinostudy.navigation.pages :as pages]))
4 |
5 | (rf/reg-sub
6 | ::input
7 | (fn [db]
8 | (:input db)))
9 |
10 | (rf/reg-sub
11 | ::pages
12 | (fn [db]
13 | (:pages db)))
14 |
15 | (rf/reg-sub
16 | ::unknown-queries
17 | (fn [db]
18 | (:unknown-queries db)))
19 |
20 | (rf/reg-sub
21 | ::history
22 | (fn [db]
23 | (:history db)))
24 |
25 | (rf/reg-sub
26 | ::queries
27 | (fn [db]
28 | (:queries db)))
29 |
30 | (rf/reg-sub
31 | ::script
32 | (fn [db]
33 | (:script db)))
34 |
35 | (rf/reg-sub
36 | ::mode
37 | (fn [db]
38 | (:mode db)))
39 |
40 | (rf/reg-sub
41 | ::actions
42 | (fn [db]
43 | (:actions db)))
44 |
45 | (rf/reg-sub
46 | ::checked-action
47 | (fn [db]
48 | (:checked-action db)))
49 |
50 | (rf/reg-sub
51 | ::current-evaluation
52 | (fn [db]
53 | (first (:evaluations db))))
54 |
55 | (rf/reg-sub
56 | ::current-query
57 | (fn [_]
58 | (rf/subscribe [::current-evaluation]))
59 | (fn [evaluation]
60 | (:query evaluation)))
61 |
62 | (rf/reg-sub
63 | ::current-page
64 | (fn [_]
65 | (rf/subscribe [::history]))
66 | (fn [history]
67 | (first history)))
68 |
69 | (rf/reg-sub
70 | ::current-category
71 | (fn [_]
72 | (rf/subscribe [::current-page]))
73 | (fn [page]
74 | (first page)))
75 |
76 | (rf/reg-sub
77 | ::current-id
78 | (fn [_]
79 | (rf/subscribe [::current-page]))
80 | (fn [page]
81 | (second page)))
82 |
83 | (rf/reg-sub
84 | ::current-attribute
85 | (fn [_]
86 | (rf/subscribe [::current-page]))
87 | (fn [page]
88 | (get page 2)))
89 |
90 | (rf/reg-sub
91 | ::content
92 | (fn [_]
93 | [(rf/subscribe [::pages])
94 | (rf/subscribe [::current-page])])
95 | (fn [[pages page]]
96 | (when page
97 | (get-in pages (pages/shortened page)))))
98 |
99 | ;; The result filters are stored in a map with pages as keys.
100 | (rf/reg-sub
101 | ::result-filters
102 | (fn [db]
103 | (:result-filters db)))
104 |
105 | (rf/reg-sub
106 | ::current-result-types
107 | (fn [_]
108 | [(rf/subscribe [::current-category])
109 | (rf/subscribe [::content])])
110 | (fn [[category content]]
111 | (when (and (= category ::pages/terms)
112 | (not (contains? content :uses)))
113 | (->> (keys content)
114 | (filter (partial not= :term))
115 | (sort)))))
116 |
117 | (rf/reg-sub
118 | ::current-result-filter
119 | (fn [_]
120 | [(rf/subscribe [::current-category])
121 | (rf/subscribe [::content])
122 | (rf/subscribe [::result-filters])
123 | (rf/subscribe [::current-result-types])])
124 | (fn [[category
125 | {search-term :term
126 | :as content}
127 | result-filter
128 | current-result-types]]
129 | (when (= category ::pages/terms)
130 | (or (get result-filter search-term)
131 | (apply max-key (comp count (partial get content))
132 | current-result-types)))))
133 |
134 | ;; the currently active link in the nav section
135 | ;; used to determine which top-level link to disable
136 | (rf/reg-sub
137 | ::current-nav
138 | (fn [_]
139 | (rf/subscribe [::current-page]))
140 | (fn [[page-type key]]
141 | (when (= page-type ::pages/static) key)))
142 |
--------------------------------------------------------------------------------
/src/sinostudy/views/common.cljs:
--------------------------------------------------------------------------------
1 | (ns sinostudy.views.common
2 | (:require [sinostudy.navigation.pages :as pages]
3 | [sinostudy.events.scrolling :as scrolling]
4 | [sinostudy.pinyin.eval :as pe]
5 | [sinostudy.rim.core :as rim]
6 | [sinostudy.dictionary.embed :as embed]
7 | [sinostudy.pinyin.core :as p]
8 | [clojure.string :as str]
9 | [re-frame.core :as rf]))
10 |
11 | ;; The on-click handler that dispatches an event to reset the scroll state
12 | ;; is a necessity, given that it is currently not possible to distinguish
13 | ;; between back/forward button navigation events and clicking links.
14 | ;; Obviously, clicking a link should never result in a restored scroll state.
15 | ;; Similarly, some queries (e.g. look-ups) also manually reset the scroll state.
16 | (defn link-term
17 | "Add links to dictionary look-ups for each term in text.
18 | If text is a string, then each character is linked.
19 | If text is a collection (e.g. hiccup), then each collection item is linked."
20 | [text]
21 | (let [ids (range (count text))
22 | link (fn [term id]
23 | [:a
24 | {:title (str "Look up " term)
25 | :on-click #(rf/dispatch [::scrolling/reset-scroll-state
26 | [::pages/terms term]])
27 | :href (str "/" (name ::pages/terms) "/" term)
28 | :key (str term "-" id)}
29 | term])]
30 | (map link text ids)))
31 |
32 | (defn hanzi-link
33 | "Link the text, but only link if the text is Hanzi."
34 | [text]
35 | (if (pe/hanzi-block? text)
36 | (link-term text)
37 | text))
38 |
39 | (defn refr->m
40 | "Transform the embedded reference string into a Clojure map."
41 | [refr]
42 | (let [[hanzi-str pinyin-str] (str/split refr #"\[|\]")
43 | hanzi (str/split hanzi-str #"\|")
44 | pinyin (->> (str/split pinyin-str #" ")
45 | (map p/digits->diacritics))
46 | traditional (first hanzi)
47 | simplified (if (second hanzi) (second hanzi) traditional)]
48 | {:traditional traditional
49 | :simplified simplified
50 | :pinyin pinyin}))
51 |
52 | (defn zh
53 | "Get the proper Chinese lang attribute based on the script."
54 | [script]
55 | (case script
56 | :traditional "zh-Hant"
57 | :simplified "zh-Hans"
58 | "zh"))
59 |
60 | (defn- handle-ref
61 | "Handle s with f in the given script if s is a reference."
62 | [script f s]
63 | (let [zh (zh script)
64 | use-script (fn [coll]
65 | (get coll (cond
66 | (= (count coll) 1) 0
67 | (= script :simplified) 1
68 | :else 0)))]
69 | (cond
70 | (re-matches embed/refr s) (let [m (refr->m s)
71 | pinyin (->> (:pinyin m)
72 | (map f)
73 | (interpose " "))
74 | hanzi (script m)]
75 | [:span {:key hanzi}
76 | [:span {:lang zh}
77 | (f hanzi)]
78 | [:span.pinyin
79 | pinyin]])
80 |
81 | (re-matches embed/hanzi s) (let [hanzi (-> s
82 | (str/split #"\|")
83 | (use-script))]
84 | [:span {:lang zh :key hanzi}
85 | (f hanzi)])
86 |
87 | (pe/hanzi-block? s) [:span {:lang zh
88 | :key s}
89 | (f s)]
90 |
91 | (re-matches embed/pinyin s) (let [pinyin (-> s
92 | (subs 1 (dec (count s)))
93 | (str/split #" "))]
94 | [:span.pinyin {:key s}
95 | (interpose " " (map f pinyin))])
96 |
97 | ;; TODO: don't link numbers? i.e. 118 in "Kangxi radical 118"
98 | :else (f s))))
99 |
100 | (defn handle-refs
101 | "Add hyperlink and style any references to dictionary entries in s.
102 | Script is the preferred script, i.e. traditional or simplified."
103 | [script f s]
104 | ;; The part before the first | matches the full embedded refs;
105 | ;; The part before the second | part matches embedded pinyin;
106 | ;; The latter part matches all remaining words in English or Chinese.
107 | (let [non-ref #"[^\s]+\[[^\]]+\]|\[[^\]]+\]|[^,.;'\"`´+?&()#%\s]+"
108 | handle-ref* (partial handle-ref script f)]
109 | (rim/re-handle s non-ref handle-ref*)))
110 |
--------------------------------------------------------------------------------
/src/sinostudy/views/core.cljs:
--------------------------------------------------------------------------------
1 | (ns sinostudy.views.core
2 | (:require [re-frame.core :as rf]
3 | [reagent.core :as reagent]
4 | [clojure.string :as str]
5 | [cljs.reader :as reader]
6 | [sinostudy.db :as db]
7 | [sinostudy.subs :as subs]
8 | [sinostudy.events.core :as events]
9 | [sinostudy.events.scrolling :as scrolling]
10 | [sinostudy.events.actions :as actions]
11 | [sinostudy.views.dictionary :as vd]
12 | [sinostudy.navigation.pages :as pages])
13 | (:require-macros [sinostudy.macros.core :as macros]))
14 |
15 | ;;;; HELPER FUNCTIONS
16 |
17 | (defn navlink
18 | [from to text]
19 | (let [key (str from "->" to)]
20 | (if (= from to)
21 | [:a.current-page
22 | {:key key}
23 | text]
24 | [:a
25 | {:on-click #(rf/dispatch [::scrolling/reset-scroll-state
26 | [::pages/static to]])
27 | :href to
28 | :key key}
29 | text])))
30 |
31 | (defn navify [from links]
32 | (map (fn [[to text]] (navlink from to text)) links))
33 |
34 |
35 | ;;;; VIEWS
36 |
37 | (defn smart-input []
38 | "The input field (part of the header form)."
39 | (let [input @(rf/subscribe [::subs/input])
40 | actions @(rf/subscribe [::subs/actions])
41 | unknown-queries @(rf/subscribe [::subs/unknown-queries])
42 | disabled? (not (nil? actions))
43 | unknown-query? (when input
44 | (contains? unknown-queries (str/trim input)))]
45 | [:<>
46 | [:div#header-input
47 | [:input#input-field
48 | {:type "text"
49 | :class (when unknown-query? "unknown")
50 | :placeholder "look up..."
51 | :auto-capitalize "off"
52 | :auto-correct "off"
53 | :auto-complete "off" ':spell-check false
54 | :disabled disabled?
55 | :value input
56 | :on-change (fn [e]
57 | (when (nil? actions)
58 | (rf/dispatch [::events/on-input-change
59 | (-> e .-target .-value)])))}]
60 |
61 | ;; The button is not actually displayed!
62 | ;; It's kept around to prevent "Enter" submitting the input to an unknown href.
63 | ;; If the button isn't there, pressing enter to select an action in the
64 | ;; action-chooser can misfire a submit event. The on-click event in the submit
65 | ;; button captures these submit events and sends straight them to /dev/null.
66 | [:button
67 | {:type "submit"
68 | :on-click (fn [e]
69 | (.preventDefault e)
70 | (rf/dispatch [::events/submit input]))}
71 | "go"]]]))
72 |
73 | (defn filters
74 | "Filter for what type of dictionary search result should be shown."
75 | []
76 | (let [{search-term :term} @(rf/subscribe [::subs/content])
77 | current-filter @(rf/subscribe [::subs/current-result-filter])
78 | result-types @(rf/subscribe [::subs/current-result-types])
79 | hidden? (not (and result-types
80 | (> (count result-types) 1)))]
81 | [:div#filters
82 | {:class (when hidden? "hidden")}
83 | (interpose " · "
84 | (for [result-type result-types]
85 | (let [result-type-str (str/capitalize (name result-type))]
86 | [:span {:key result-type}
87 | [:input {:type "radio"
88 | :name "result-filter"
89 | :value result-type
90 | :id result-type
91 | :checked (= current-filter result-type)
92 | :on-change (fn [_]
93 | (rf/dispatch [::events/set-result-filter
94 | search-term
95 | result-type]))}]
96 | [:label {:for result-type
97 | :title (str "View " result-type-str " results")}
98 | result-type-str]])))]))
99 |
100 | (defn header
101 | "The header contains the logo and the main input form."
102 | []
103 | (let [page @(rf/subscribe [::subs/current-page])
104 | input @(rf/subscribe [::subs/input])]
105 | [:header
106 | [:div#aligner
107 | [:form {:auto-complete "off"}
108 | [smart-input]
109 | (when-let [title (and (not= input (second page))
110 | (events/mk-input page))]
111 | [:p#title "↓ " [:em title] " ↓"])
112 | [filters]]]]))
113 |
114 | (defn main
115 | "The content pane of the site."
116 | []
117 | (reagent/create-class
118 | {:display-name
119 | "main"
120 |
121 | :reagent-render
122 | (fn []
123 | (let [category @(rf/subscribe [::subs/current-category])
124 | content @(rf/subscribe [::subs/content])]
125 | (cond
126 | (= ::pages/static category) (or content (db/static-pages "/404"))
127 | (= ::pages/terms category) [vd/dictionary-page])))
128 |
129 | ;; Ensures that scroll state is restored when pushing back/forward button.
130 | ;; Sadly, this behaviour is global for all updates, so links/buttons/etc.
131 | ;; must manually dispatch ::scrolling/reset-scroll-state to avoid this!
132 | :component-did-update
133 | (fn [_ _]
134 | (let [page @(rf/subscribe [::subs/current-page])]
135 | (rf/dispatch [::scrolling/load-scroll-state page])))}))
136 |
137 | (defn script-changer []
138 | "The button used to toggle traditional/simplified Chinese script."
139 | (let [script @(rf/subscribe [::subs/script])
140 | text (if (= :simplified script)
141 | "Simpl."
142 | "Trad.")
143 | alt-script (if (= :simplified script)
144 | :traditional
145 | :simplified)
146 | title (str "Click to use " (if (= :simplified alt-script)
147 | "simplified characters"
148 | "traditional characters"))]
149 | [:a#script-changer
150 | {:key alt-script
151 | :title title
152 | :on-click #(rf/dispatch [::events/change-script alt-script])}
153 | text]))
154 |
155 | (defn footer []
156 | "The footer (contains navigation)."
157 | (let [from @(rf/subscribe [::subs/current-nav])
158 | links [["/" "Home"] ["/about" "About"] ["/settings" "Settings"]]]
159 | [:footer
160 | [:nav (interpose " · "
161 | (conj (vec (navify from links))
162 | [script-changer {:key "script-changer"}]))]]))
163 |
164 | (defn- action-text
165 | [[action query]]
166 | (case action
167 | ::events/look-up (str "Look up " query)
168 | ::actions/digits->diacritics "Convert to diacritics"
169 | ::actions/diacritics->digits "Convert to digits"
170 | ::actions/close-action-chooser "Cancel"))
171 |
172 | (defn- action-choice
173 | [checked action]
174 | (let [choose-action (fn [e]
175 | (.preventDefault e)
176 | (rf/dispatch [::actions/choose-action action]))]
177 | [:li {:key action}
178 | [:input {:type :radio
179 | :name "action"
180 | :value action
181 | :checked (= action checked)
182 | :id action
183 | :on-change choose-action}]
184 | [:label {:for action
185 | :on-click choose-action}
186 | (action-text action)]]))
187 |
188 | (defn action-chooser []
189 | "The pop-in dialog that is used to select from different possible options."
190 | (let [actions @(rf/subscribe [::subs/actions])
191 | checked @(rf/subscribe [::subs/checked-action])]
192 | (when actions
193 | [:fieldset#actions
194 | [:legend "Select an action"]
195 | [:ol
196 | (map (partial action-choice (nth actions checked)) actions)]])))
197 |
198 | ;;; Project version based on git tag
199 | ;;; See: https://github.com/arrdem/lein-git-version
200 | (defn version-digest
201 | "Current version with link to project on Github."
202 | [attr]
203 | (let [version (reader/read-string (macros/slurp "resources/version.edn"))]
204 | [:address attr
205 | [:a {:href "https://github.com/simongray/sino.study"}
206 | (:tag version)]]))
207 |
208 | (defn app []
209 | (let [not-home? (not= "/" @(rf/subscribe [::subs/current-nav]))]
210 | [:<>
211 | [action-chooser]
212 | [header not-home?]
213 | [main]
214 | [footer]
215 | [version-digest (when not-home? {:class "hidden"})]]))
216 |
--------------------------------------------------------------------------------
/src/sinostudy/views/dictionary.cljs:
--------------------------------------------------------------------------------
1 | (ns sinostudy.views.dictionary
2 | (:require [clojure.string :as str]
3 | [re-frame.core :as rf]
4 | [sinostudy.dictionary.core :as d]
5 | [sinostudy.pinyin.core :as p]
6 | [sinostudy.views.common :as vc]
7 | [sinostudy.subs :as subs]
8 | [sinostudy.navigation.pages :as pages]))
9 |
10 | (defn entry-title
11 | "The title of the term with links to characters -OR- decomposition
12 | into components if the term is a character."
13 | []
14 | (let [script @(rf/subscribe [::subs/script])
15 | {term :term
16 | decomposition :decomposition} @(rf/subscribe [::subs/content])
17 | attribute @(rf/subscribe [::subs/current-attribute])
18 | zh (vc/zh script)
19 | decomposition* (when (not= decomposition "?") decomposition)]
20 | (cond
21 | (> (count term) 1)
22 | [:h1 {:lang zh} (vc/link-term term)]
23 |
24 | (= attribute "decomposition")
25 | [:h1 {:lang zh
26 | :title (str "Character decomposition")}
27 | (map vc/hanzi-link decomposition*)]
28 |
29 | decomposition*
30 | [:h1
31 | {:lang zh
32 | :title (str "Click to decompose")}
33 | [:a
34 | {:href (str "/" (name ::pages/terms) "/" term "/decomposition")}
35 | term]]
36 |
37 | :else
38 | [:h1
39 | {:lang zh
40 | :title term}
41 | term])))
42 |
43 | ;; In certain cases, entries may include these "fake" definitions.
44 | ;; They're removed on the frontend since the variant may well be valid in
45 | ;; .e.g. traditional Chinese, but not in simplified Chinese (see: 喂).
46 | (defn no-fake-variants
47 | "Removes definitions of the pattern 'variant of _' if the term is identical."
48 | [script term definitions]
49 | (if (= (count term) 1)
50 | (let [variant-re (re-pattern (if (= script :traditional)
51 | (str "variant of " term)
52 | (str "variant of " term
53 | "\\[|variant of .\\|" term)))]
54 | (filter (comp not (partial re-find variant-re)) definitions))
55 | definitions))
56 |
57 | (defn usage-list
58 | "List of definitions for each Pinyin variation of an entry."
59 | []
60 | (let [script @(rf/subscribe [::subs/script])
61 | {term :term
62 | uses :uses} @(rf/subscribe [::subs/content])]
63 | [:section#usages
64 | [:dl
65 | (for [[pinyin definitions] uses]
66 | (let [pinyin* (->> (str/split pinyin #" ")
67 | (map p/digits->diacritics)
68 | (map vector)
69 | (map vc/link-term)
70 | (interpose " "))]
71 | [:<> {:key pinyin*}
72 | [:dt.pinyin pinyin*]
73 | [:dd
74 | [:ol
75 | (for [definition (no-fake-variants script term (sort definitions))]
76 | [:li {:key definition}
77 | (let [link (comp vc/link-term vector)]
78 | (vc/handle-refs script link definition))])]]]))]]))
79 |
80 | (defn details-table
81 | "Additional information about the dictionary entry."
82 | []
83 | (let [script @(rf/subscribe [::subs/script])
84 | zh (vc/zh script)
85 | {term :term
86 | radical :radical
87 | frequency :frequency
88 | variations :variations
89 | classifiers :classifiers
90 | etymology :etymology} @(rf/subscribe [::subs/content])
91 | label (d/frequency-label frequency)
92 | entry-script (cond
93 | (contains? variations :traditional) :traditional
94 | (contains? variations :simplified) :simplified)
95 | entry-zh (vc/zh entry-script)]
96 | [:section.details
97 | [:table
98 | [:tbody
99 | [:tr {:key :frequency
100 | :title "Word frequency"}
101 | [:td "Freq"]
102 | [:td (cond
103 | (= label :high) "frequent"
104 | (= label :medium) "average"
105 | (= label :low) "infrequent")]]
106 | (when entry-script
107 | [:tr {:key :variations
108 | :title (str (if (= :traditional entry-script)
109 | "In Traditional Chinese"
110 | "In Simplified Chinese"))}
111 | (if (= entry-script :traditional)
112 | [:td "Trad"]
113 | [:td "Simp"])
114 | [:td {:lang entry-zh}
115 | (interpose ", " (->> variations
116 | entry-script
117 | (map vector)
118 | (map vc/link-term)
119 | (map (fn [variation]
120 | [:span {:key variation}
121 | variation]))))]])
122 | (when classifiers
123 | [:tr {:key :classifiers
124 | :title (str "Common classifiers")}
125 | [:td "Cl"]
126 | [:td
127 | (interpose ", "
128 | (for [classifier (sort-by :pinyin classifiers)]
129 | [:span
130 | {:lang zh
131 | :key (script classifier)}
132 | (vc/link-term (vector (script classifier)))]))]])
133 | (when radical
134 | [:tr {:key :radical
135 | :title "Radical"}
136 | [:td "Rad"]
137 | (if (= term radical)
138 | [:td "The character is a radical"]
139 | [:td {:lang zh} (vc/link-term (vector radical))])])
140 | (when etymology
141 | (let [{type :type
142 | hint :hint
143 | semantic :semantic
144 | phonetic :phonetic} etymology]
145 | (when-let [etym (cond
146 | (and (or (= type "pictographic")
147 | (= type "ideographic")) hint)
148 | [:<> (let [link (comp vc/link-term vector)]
149 | (vc/handle-refs script link hint))]
150 |
151 | (and (= type "pictophonetic") semantic phonetic)
152 | [:<>
153 | [:span {:lang zh} (vc/link-term semantic)]
154 | " (" hint ") + "
155 | [:span {:lang zh} (vc/link-term phonetic)]])]
156 | [:tr {:key :etymology
157 | :title "Etymology"}
158 | [:td "Hint"]
159 | [:td etym]])))]]]))
160 |
161 | (defn entry
162 | "Dictionary entry for a specific term."
163 | []
164 | [:main
165 | [:article.entry.full
166 | [entry-title]
167 | [:div.content
168 | [usage-list]
169 | [details-table]]]])
170 |
171 | (defn- result-entry-uses
172 | "Listed uses of a search result entry."
173 | [script search-term term uses]
174 | (for [[pronunciation definitions] uses]
175 | (let [handle-refs* (partial vc/handle-refs script identity)
176 | all-defs (no-fake-variants script term definitions)
177 | relevant-defs (->> (if search-term
178 | (d/defs-containing-term search-term all-defs)
179 | all-defs))
180 | other-defs (->> all-defs
181 | (remove (set relevant-defs)))]
182 | (when (not (empty? relevant-defs))
183 | [:<> {:key pronunciation}
184 | [:dt.pinyin
185 | (p/digits->diacritics pronunciation)]
186 | ;; TODO: resolve relevant and other during save step instead
187 | (into [:dd.understated] (interpose " / "
188 | (concat (->> relevant-defs
189 | (sort)
190 | (map handle-refs*)
191 | (map (fn [x] [:em x])))
192 | (->> other-defs
193 | (sort)
194 | (map handle-refs*)))))]))))
195 |
196 | (defn- search-result-entry
197 | "Entry in a results-list."
198 | [script search-term {term :term
199 | uses :uses}]
200 | (when-let [entry-uses (result-entry-uses script search-term term uses)]
201 | [:article {:key term}
202 | [:a {:href (str "/" (name :terms) "/" term)}
203 | [:h1 {:lang (vc/zh script)}
204 | term]
205 | [:dl
206 | entry-uses]]]))
207 |
208 | (defn search-results
209 | "List of search result entries."
210 | []
211 | (let [script @(rf/subscribe [::subs/script])
212 | content @(rf/subscribe [::subs/content])
213 | result-filter @(rf/subscribe [::subs/current-result-filter])
214 | search-term (when (= result-filter :english)
215 | @(rf/subscribe [::subs/current-id]))
216 | in-current-script? #(contains? (:scripts %) script)]
217 | (when-let [entries (get content result-filter)]
218 | [:main#entries
219 | (->> entries
220 | (filter in-current-script?)
221 | (map (partial search-result-entry script search-term)))])))
222 |
223 | (defn unknown-term
224 | "Slightly more specific than a 404."
225 | [term]
226 | [:main
227 | [:article.full
228 | [:h1 "Sorry,"]
229 | [:p "the dictionary currently doesn't contain an entry for " term "."]]])
230 |
231 | (defn dictionary-page
232 | "A dictionary page can be 1 of 3 types: entry, search result, or unknown."
233 | []
234 | (let [{uses :uses} @(rf/subscribe [::subs/content])
235 | unknown-queries @(rf/subscribe [::subs/unknown-queries])
236 | search-term @(rf/subscribe [::subs/current-id])]
237 | (cond
238 | (contains? unknown-queries search-term) [unknown-term search-term]
239 | uses [entry]
240 | :else [search-results])))
241 |
--------------------------------------------------------------------------------
/test/sinostudy/pinyin/core_test.clj:
--------------------------------------------------------------------------------
1 | (ns sinostudy.pinyin.core-test
2 | (:require [clojure.test :refer :all]
3 | [sinostudy.pinyin.core :refer :all]))
4 |
5 | (deftest test-umlaut
6 | (testing "umlaut"
7 | (is (= (with-umlaut "VvÜü") "ÜüÜü"))))
8 |
9 | ;; only tests a single char for now!
10 | (deftest test-diacritic
11 | (testing "diacritic"
12 | (testing "added to characters?"
13 | (are [x y] (= x y)
14 | \a (with-diacritic \a 0)
15 | \ā (with-diacritic \a 1)
16 | \á (with-diacritic \a 2)
17 | \ǎ (with-diacritic \a 3)
18 | \à (with-diacritic \a 4)
19 | \a (with-diacritic \a 5)
20 | \A (with-diacritic \A 0)
21 | \Ā (with-diacritic \A 1)
22 | \Á (with-diacritic \A 2)
23 | \Ǎ (with-diacritic \A 3)
24 | \À (with-diacritic \A 4)
25 | \A (with-diacritic \A 5)))
26 | (testing "tone out of range?"
27 | (is (thrown? IndexOutOfBoundsException (with-diacritic \a 6))))
28 | (testing "string instead of char?"
29 | (is (nil? (with-diacritic "a" 1))))))
30 |
31 | (deftest test-diacritic-index
32 | (testing "diacritic-index"
33 | (testing "a-rule"
34 | (is (= (diacritic-index "ao1") 0))
35 | (is (= (diacritic-index "lang4") 1))
36 | (is (= (diacritic-index "quan") 2)))
37 | (testing "e-rule"
38 | (is (= (diacritic-index "eng") 0))
39 | (is (= (diacritic-index "heng1") 1))
40 | (is (= (diacritic-index "zheng") 2)))
41 | (testing "ou-rule"
42 | (is (= (diacritic-index "ou") 0))
43 | (is (= (diacritic-index "tou2") 1))
44 | (is (= (diacritic-index "zhou") 2)))
45 | (testing "general rule"
46 | (is (= (diacritic-index "e") 0))
47 | (is (= (diacritic-index "eng") 0))
48 | (is (= (diacritic-index "long2") 1))
49 | (is (= (diacritic-index "lan") 1))
50 | (is (= (diacritic-index "kuo4") 2)))
51 | (testing "mixed case"
52 | (is (= (diacritic-index "WANG") 1))
53 | (is (= (diacritic-index "lI0") 1))
54 | (is (= (diacritic-index "Qu4") 1)))
55 | (testing "undefined cases (returns nil)"
56 | (is (thrown? NullPointerException (diacritic-index nil)))
57 | (is (nil? (diacritic-index "")))
58 | (is (nil? (diacritic-index "4")))
59 | (is (nil? (diacritic-index [1 2 3])))
60 | (is (nil? (diacritic-index {:foo :bar})))
61 | (is (nil? (diacritic-index {:foo :bar}))))))
62 |
63 | (deftest test-digit->diacritic
64 | (testing "digit->diacritic"
65 | (testing "converts properly?"
66 | (is (= (digit->diacritic "long3") "lǒng"))
67 | (is (= (digit->diacritic "er2") "ér")))
68 | (testing "exceptions"
69 | (is (thrown? NumberFormatException (digit->diacritic "long")))
70 | (is (thrown? ClassCastException (digit->diacritic [1 2 3]))))))
71 |
72 | (deftest test-digits->diacritics
73 | (testing "digits->diacritics"
74 | (testing "converts properly?"
75 | (is (= (digits->diacritics "ni3hao3, ni3 shi4 shei2?") "nǐhǎo, nǐ shì shéi?"))
76 | (is (= (digits->diacritics "long") "long"))
77 | (is (= (digits->diacritics "") "")))
78 | (testing "non-strings"
79 | (is (= (digits->diacritics []) []))
80 | (is (= (digits->diacritics [1 2 3]) [1 2 3]))
81 | (is (= (digits->diacritics 0) 0))
82 | (is (= (digits->diacritics \a) \a)))))
83 |
--------------------------------------------------------------------------------