├── .gitignore ├── obsolete-shell-aliases.lisp ├── fare-scripts.asd ├── make-multi.sh ├── repl.lisp ├── git.lisp ├── vicode-test.lisp ├── commands.lisp ├── typographie.lisp ├── bazel.lisp ├── gutenberg.lisp ├── unmime.lisp ├── toggle-touchpad.lisp ├── rescript.lisp ├── README.md ├── xrandr.lisp ├── shell-aliases.lisp ├── network.lisp ├── random.lisp ├── languages.lisp ├── viet-practice.lisp ├── edgar.lisp └── vicode.lisp /.gitignore: -------------------------------------------------------------------------------- 1 | *.fasl 2 | *.*fsl 3 | -------------------------------------------------------------------------------- /obsolete-shell-aliases.lisp: -------------------------------------------------------------------------------- 1 | ;; Slowly migrating my zsh aliases here... 2 | 3 | (uiop:define-package :fare-scripts/obsolete-shell-aliases 4 | (:use :cl :fare-utils :uiop :inferior-shell :optima :optima.ppcre :cl-launch/dispatch :cl-scripting)) 5 | 6 | (in-package :fare-scripts/obsolete-shell-aliases) 7 | 8 | (exporting-definitions 9 | 10 | (defun tcdr (&rest args) (run/i `(cdrecord -v dev=4,0,0 speed=32 ,@args))) 11 | (defun tdao (cmd &rest args) (run/i `(cdrdao ,cmd --device 4,0,0 --driver generic-mmc --speed 32 ,@args))) 12 | );exporting-definitions 13 | 14 | (register-commands :fare-scripts/obsolete-shell-aliases) 15 | -------------------------------------------------------------------------------- /fare-scripts.asd: -------------------------------------------------------------------------------- 1 | #-asdf3.1 (error "ASDF 3.1 or bust!") 2 | 3 | (defsystem "fare-scripts" 4 | :version "1.0.0" ;; Beware: not systematically incremented 5 | :description "Various small programs that I write in CL in lieu of shell scripts" 6 | :license "MIT" ;; also BSD or bugroff 7 | :author "Francois-Rene Rideau" 8 | :class :package-inferred-system 9 | :depends-on ((:version "cl-scripting" "0.1") 10 | (:version "inferior-shell" "2.0.3.3") 11 | (:version "fare-utils" "1.0.0.5") 12 | "fare-scripts/bazel" 13 | "fare-scripts/git" 14 | "fare-scripts/languages" 15 | "fare-scripts/network" 16 | "fare-scripts/random" 17 | "fare-scripts/repl" 18 | "fare-scripts/rescript" 19 | "fare-scripts/shell-aliases" 20 | "fare-scripts/toggle-touchpad" 21 | "fare-scripts/typographie" 22 | "fare-scripts/unmime" 23 | "fare-scripts/vicode" 24 | "fare-scripts/xrandr")) 25 | -------------------------------------------------------------------------------- /make-multi.sh: -------------------------------------------------------------------------------- 1 | #!/bin/zsh -f 2 | set -eu 3 | 4 | has_dll () { 5 | [[ -n "$(ldconfig -p 2> /dev/null | grep -F ${1}.so)" ]] 6 | } 7 | 8 | if has_dll libasound && has_dll libvorbis ; then 9 | workout_timer=(--dispatch-system workout-timer) 10 | else 11 | workout_timer= 12 | fi 13 | 14 | MULTI=${BINDIR}/${BINARCH}/fare-scripts 15 | 16 | A=( 17 | cl-launch 18 | --output ${MULTI} --dump ! 19 | --lisp sbcl 20 | --quicklisp 21 | --dispatch-system exscribe --system exscribe/typeset # add-on to exscribe 22 | --dispatch-system fare-scripts/typographie 23 | --dispatch-system fare-scripts/toggle-touchpad 24 | --dispatch-system fare-scripts/unmime 25 | --dispatch-system tthsum 26 | ${workout_timer} 27 | --system fare-scripts # Many of its subsystems register their own functions! 28 | --system-package lisp-stripper --dispatch-entry lispwc 29 | --eval "(map () 'asdf::register-immutable-system (remove \"cl-launch-program\" (asdf::registered-systems) :test 'equal))" 30 | --eval "(uiop:println \"foo\")" 31 | ) 32 | $A $@ 33 | ${MULTI} fare-scripts-symlinks 34 | ${MULTI} help 35 | echo ${MULTI} 36 | -------------------------------------------------------------------------------- /repl.lisp: -------------------------------------------------------------------------------- 1 | ;;; REPL utilities 2 | (uiop:define-package :fare-scripts/repl 3 | (:use :cl :fare-utils :uiop :inferior-shell :optima :optima.ppcre :cl-scripting :cl-launch/dispatch) 4 | (:import-from :swank) 5 | (:export #:read-eval-print #:rep)) 6 | 7 | (in-package :fare-scripts/repl) 8 | 9 | ;; From http://lispblog.xach.com/post/129215925278/my-new-favorite-slimesbclccl-trick 10 | #+sbcl 11 | (push (lambda (&rest args) (apply #'swank:ed-in-emacs args) t) sb-ext:*ed-functions*) 12 | #+ccl 13 | (setq ccl:*resident-editor-hook* #'swank:ed-in-emacs) 14 | 15 | (exporting-definitions 16 | (defun read-eval-print (s &optional (package :fare-scripts/repl)) 17 | (with-standard-io-syntax 18 | (let ((*package* (find-package (standard-case-symbol-name package)))) 19 | (format t "~W~%" (eval-input s))))) 20 | 21 | (defun rep (a &optional b) 22 | (if b (read-eval-print b a) (read-eval-print a)))) 23 | (register-commands :fare-scripts/repl) 24 | 25 | (eval-when (:compile-toplevel) 26 | (uiop:println "COMPILE-TOPLEVEL")) 27 | (eval-when (:load-toplevel) 28 | (uiop:println "LOAD-TOPLEVEL")) 29 | (eval-when (:execute) 30 | (uiop:println "EXECUTE")) 31 | -------------------------------------------------------------------------------- /git.lisp: -------------------------------------------------------------------------------- 1 | ;; Personal scripts to deal with git 2 | 3 | (uiop:define-package :fare-scripts/git 4 | (:use :cl :fare-utils :uiop 5 | :inferior-shell :cl-scripting :fare-scripts/commands 6 | :optima :optima.ppcre :cl-ppcre 7 | :cl-launch/dispatch) 8 | (:export 9 | #:gb)) 10 | 11 | (in-package :fare-scripts/git) 12 | 13 | (exporting-definitions 14 | 15 | (defun gb (&optional pattern) 16 | (if pattern 17 | (let* ((branches* (run/lines `(git branch))) 18 | (branches (mapcar (lambda (s) (subseq s 2)) branches*)) 19 | (matches (loop :for b :in branches :when (scan pattern b) :collect b))) 20 | (case (length matches) 21 | (0 (fail! "No branch matches ~S. Branches available:~%~{~A~%~}" pattern branches*)) 22 | (1 (let ((branch (first matches))) 23 | (run/i `(git checkout ,branch)) 24 | (success))) 25 | (otherwise 26 | (fail! "Several branches match ~S:~%~{ ~A~%~}" pattern matches)))) 27 | (progn 28 | (run/i `(git branch)) 29 | (success)))) 30 | 31 | );exporting-definitions 32 | 33 | (register-commands :fare-scripts/git) 34 | -------------------------------------------------------------------------------- /vicode-test.lisp: -------------------------------------------------------------------------------- 1 | ;;; -*- mode: Lisp; coding: utf-8 -*- 2 | ;;; Testing vicode 3 | ;;; 4 | (defpackage :fare-scripts/vicode-test 5 | (:use :common-lisp :uiop :fare-scripts/vicode :hu.dwim.stefil) 6 | (:export #:test-suite)) 7 | (in-package :fare-scripts/vicode-test) 8 | 9 | (defsuite* (test-suite 10 | :in root-suite 11 | :documentation "Testing fare-scripts/vicode")) 12 | 13 | (defparameter *strings* 14 | '("a" 15 | "AĂÂBCDĐEÊGHIKLMNOÔƠPQRSTUƯVXY" 16 | "an" 17 | "an ninh" 18 | "an toàn" 19 | "anh" 20 | "ảnh" 21 | "ánh sáng" 22 | "ă" 23 | "ăn" 24 | "â" "b" "c" "d" "e" "ê" 25 | "f" "g" "h" 26 | "hello world" 27 | "i" "j" "k" "l" "m" 28 | "ma" "mà" "mả" "mã" "má" "mạ" 29 | "n" "o" "ô" "ơ" "p" "q" "r" "s" "t" 30 | "tôi học tiếng Việt" 31 | "u" "ư" "v" "w" "x" "y" "z" 32 | )) 33 | 34 | (deftest sort-test-1 () 35 | (is (equal *strings* (sort (copy-list *strings*) 'string< 36 | :key (lambda (x) (vi-sortable-string (string-downcase x))))))) 37 | 38 | (deftest viqr-test-1 () 39 | (loop :for (viqr unicode) :in 40 | '(("to^i ho.c tie^'ng Vie^.t" "tôi học tiếng Việt") 41 | ("ba` ba be'o ba'n ba'nh be`o ba'nh bo` ba'nh bao bu+`a ba~i be^n bo+` bie^?n bi. ba('t bo? bo't ba bo^'n ba^.n." 42 | "bà ba béo bán bánh bèo bánh bò bánh bao bừa bãi bên bờ biển bị bắt bỏ bót ba bốn bận.")) :do 43 | (is (equal viqr (viqr-from-unicode :input unicode))) 44 | (is (equal unicode (unicode-from-viqr :input viqr))))) 45 | 46 | -------------------------------------------------------------------------------- /commands.lisp: -------------------------------------------------------------------------------- 1 | ;; Slowly migrating my zsh aliases here... 2 | 3 | (uiop:define-package :fare-scripts/commands 4 | (:use :cl :uiop :fare-utils 5 | :inferior-shell :cl-scripting :cl-launch/dispatch) 6 | (:export #:fare-dir #:src-root #:common-lisp-src #:cl-root 7 | #:getuid #:stow-root #:restow 8 | #:fare-scripts-symlinks #:help)) 9 | 10 | (in-package :fare-scripts/commands) 11 | 12 | (exporting-definitions 13 | 14 | (defun fare-dir () (getenv-absolute-directory "FARE")) 15 | (defun src-root () (subpathname (fare-dir) "src/")) 16 | (defun common-lisp-src () (subpathname (src-root) "common-lisp/")) 17 | (defun cl-root () (subpathname (fare-dir) "cl/")) 18 | 19 | (defun getuid () 20 | #+sbcl (sb-posix:getuid) 21 | #-sbcl (error "no getuid")) ;; use iolib? 22 | 23 | (defun stow-root () 24 | (if (zerop (getuid)) 25 | #p"/usr/local/stow/" 26 | (subpathname (fare-dir) "local/stow/"))) 27 | 28 | (defun restow () 29 | (with-current-directory ((stow-root)) 30 | (run `(stow "-R" ,@(mapcar (lambda (x) (car (last (pathname-directory x)))) (subdirectories ".")))) 31 | (run '(symlinks -rd ".."))) 32 | (success)) 33 | 34 | (defun fare-scripts-symlinks () 35 | (let ((binarch (resolve-absolute-location `(,(getenv "BINDIR") ,(getenv "BINARCH")) :ensure-directory t))) 36 | (with-current-directory (binarch) 37 | (dolist (i (cl-launch/dispatch:all-entry-names)) 38 | (unless (file-exists-p i) 39 | (format t "linking file ~A~%" i) 40 | (run `("ln" "-s" "fare-scripts" ,i)))))) 41 | (success)) 42 | (defun help () 43 | (format! t "~A available commands: ~{~A~^ ~}~%" (get-name) (all-entry-names)) 44 | (success)) 45 | );exporting-definitions 46 | 47 | ;; Not all our exported symbols are worth exposing to the shell command-line. 48 | (register-commands :fare-scripts/commands) 49 | -------------------------------------------------------------------------------- /typographie.lisp: -------------------------------------------------------------------------------- 1 | ":" ; exec cl-launch -Q -sm fare-scripts/typographie -- "$@" 2 | 3 | (defpackage :fare-scripts/typographie 4 | (:use :common-lisp :uiop :cl-ppcre) 5 | (:export #:typographie #:ligne)) 6 | 7 | (in-package :fare-scripts/typographie) 8 | 9 | (defparameter *spaces* 10 | ;; space, no-break-space, thin space, narrow no-break space, tab 11 | (map 'string 'code-char '(#x20 #xA0 #x2009 #x202F #x9))) 12 | 13 | (defun space-char (&key thin unbreakable) 14 | (aref *spaces* (+ (if unbreakable 1 0) (if thin 2 0)))) 15 | 16 | (defparameter *any-space* `(:regex ,(strcat "[" *spaces* "]"))) 17 | (defparameter *pre-demi-espace* 18 | (create-scanner 19 | `(:sequence 20 | (:greedy-repetition 0 nil ,*any-space*) 21 | (:regex "([;!?])") 22 | (:greedy-repetition 0 nil ,*any-space*)))) 23 | (defparameter *pre-espace* 24 | (create-scanner 25 | `(:sequence 26 | (:greedy-repetition 0 nil ,*any-space*) 27 | (:regex "([:»])") 28 | (:negative-lookahead "[/,.:;!?]") 29 | (:greedy-repetition 0 nil ,*any-space*)))) 30 | (defparameter *post-espace* 31 | (create-scanner 32 | `(:sequence 33 | (:regex "(«)") 34 | (:greedy-repetition 0 nil ,*any-space*)))) 35 | (defparameter *pas-d-espace* 36 | (create-scanner 37 | `(:alternation 38 | (:sequence 39 | (:greedy-repetition 0 nil ,*any-space*) 40 | (:positive-lookahead (:regex "$|[)\\]}]"))) 41 | (:sequence 42 | (:positive-lookahead (:regex "$|[(\\[{]")) 43 | (:greedy-repetition 0 nil ,*any-space*))))) 44 | 45 | (defun ligne (s) 46 | (let* ((s1 (regex-replace-all *pre-demi-espace* s 47 | (strcat (space-char :thin t :unbreakable t) "\\1 "))) 48 | (s2 (regex-replace-all *pre-espace* s1 49 | (strcat (space-char :unbreakable t) "\\1 "))) 50 | (s3 (regex-replace-all *post-espace* s2 51 | (strcat "\\1" (space-char :unbreakable t))))) 52 | (regex-replace-all *pas-d-espace* s3 ""))) 53 | 54 | (defun typographie (&optional (i *standard-input*) (o *standard-output*)) 55 | (loop for r = (read-line i nil nil) 56 | while r do 57 | (format o "~A~%" (ligne r)))) 58 | 59 | (defun main (argv) 60 | (assert (null argv)) 61 | (typographie)) 62 | -------------------------------------------------------------------------------- /bazel.lisp: -------------------------------------------------------------------------------- 1 | ;; Shell aliases for working with bazel... 2 | 3 | (uiop:define-package :fare-scripts/bazel 4 | (:use :cl :fare-utils :uiop :inferior-shell :optima :optima.ppcre :cl-scripting) 5 | (:export #:*bazel-dir* #:*bazel* #:bazel #:ss #:ngr #:make-bazel 6 | #:*java-home*)) 7 | 8 | (in-package :fare-scripts/bazel) 9 | 10 | (exporting-definitions 11 | 12 | (defparameter *java-home* "/usr/lib/jvm/java-8-openjdk-amd64") 13 | (defparameter *bazel-dir* (subpathname (user-homedir-pathname) "src/google/bazel/")) 14 | (defparameter *bazel* (subpathname *bazel-dir* "output/bazel")) 15 | 16 | (defun make-bazel () 17 | (with-current-directory (*bazel-dir*) 18 | (run/i `(env ("JAVA_HOME=" ,*java-home*) "./compile.sh")) 19 | (values))) 20 | 21 | (defun bazel (&rest args) 22 | (with-current-directory (*bazel-dir*) 23 | (run/i `(env ("JAVA_HOME=" ,*java-home*) ,*bazel* ,@args))) 24 | (success)) 25 | 26 | (defun ss (&optional nobuild) 27 | (with-current-directory (*bazel-dir*) 28 | (unless nobuild 29 | (bazel 'build "src/test/java:skylarkshell")) 30 | (run/i '("bazel-bin/src/test/java/skylarkshell"))) 31 | (success)) 32 | 33 | 34 | (progn 35 | (defun directory-last-name (path) 36 | (let ((n (last (pathname-directory path)))) 37 | (and (consp n) (stringp (car n)) (car n)))) 38 | (defun repo-char-p (char) 39 | (ascii-alphanumeric-or-underscore-p char))) 40 | 41 | (defun ngr (&optional path) 42 | (with-current-directory ((and path (ensure-directory-pathname path))) 43 | (let* ((url 44 | (match (run/ss '(git config --get remote.origin.url)) 45 | ((ppcre "^(ssh:|git:)?(//)?(git@)?(github.com|gitlab.common-lisp.net)[:/](.*)$" 46 | _ _ _ h p) 47 | (strcat "https://" h "/" p)) 48 | (url url))) 49 | (commit (run/ss '(git log -1 "--format=%H"))) 50 | (dirname (directory-last-name (getcwd))) 51 | (repo (strcat "lisp__" (substitute-if-not #\_ #'repo-char-p 52 | (string-downcase dirname))))) 53 | (format t " native.new_git_repository( 54 | name = \"~A\", 55 | commit = \"~A\", 56 | remote = \"~A\", 57 | build_file = base_dir + \"/build_defs/~A.BUILD\" 58 | )~%~%" 59 | repo commit url repo) 60 | (values)))) 61 | );exporting-definitions 62 | 63 | (register-commands :fare-scripts/bazel) 64 | -------------------------------------------------------------------------------- /gutenberg.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Downloading and processing text from Project Gutenberg 2 | (uiop:define-package :fare-scripts/gutenberg 3 | (:use :cl :uiop :fare-utils 4 | :drakma :inferior-shell :cl-scripting :cl-launch/dispatch) 5 | (:export 6 | #:get-gutenberg-urls)) 7 | #:download-gutenberg-files)) 8 | 9 | (in-package :fare-scripts/gutenberg) 10 | 11 | #| ;;; Downloading files from Project Gutenberg 12 | https://www.gutenberg.org/wiki/Gutenberg:Information_About_Robot_Access_to_our_Pages 13 | https://www.exratione.com/2014/11/how-to-politely-download-all-english-language-text-format-files-from-project-gutenberg/ 14 | http://webapps.stackexchange.com/questions/12311/how-to-download-all-english-books-from-gutenberg 15 | |# 16 | (exporting-definitions 17 | 18 | (defun get-gutenberg-urls 19 | (&key 20 | (directory (subpathname (user-homedir-pathname) "gutenberg/")) 21 | (language "en") 22 | (format "txt")) 23 | (let ((catdir (subpathname directory "catalog/"))) 24 | ;; Get a list of all the files we want 25 | (with-current-directory (catdir) 26 | (run/i 27 | `(wget 28 | "-w" 2 "-m" 29 | ("--directory-prefix=" ,catdir) 30 | ("http://www.gutenberg.org/robot/harvest?" 31 | ,@(when format `("filetypes[]=" ,format "&")) 32 | ,@(when language `("langs[]=" ,language))))) 33 | ;; Process the downloaded HTML link lists into 34 | ;; a single sorted file of zipfile URLs, one per line. 35 | (prog1 36 | (run 37 | `(pipe 38 | (grep "-oh" "http://[a-zA-Z0-9./]*.zip" 39 | ,@(directory (subpathname catdir "www.gutenberg.org/robot/"))) 40 | (sort -u)) 41 | :output :lines) 42 | ;; Get rid of the downloaded harvest files now that we have what we want. 43 | #+(or) 44 | (uiop:delete-directory-tree 45 | (subpathname catdir "www.gutenberg.org/") 46 | :validate (lambda (x) (search "/catalog/www.gutenberg.org/" (namestring x)))))))) 47 | 48 | (defun download-all-urls (urls &key directory) 49 | (when directory (ensure-directory-exists directory)) 50 | (with-current-directory (directory) 51 | ;; XXXXXX 52 | )) 53 | 54 | (defun download-gutenberg-files 55 | (&key 56 | (directory (subpathname (user-homedir-pathname) "gutenberg/")) 57 | (language "en") 58 | (format "txt")) 59 | (let ((catdir (subpathname directory "catalog/"))) 60 | )) 61 | 62 | );exporting-definitions 63 | 64 | (register-commands :fare-scripts/gutenberg) 65 | -------------------------------------------------------------------------------- /unmime.lisp: -------------------------------------------------------------------------------- 1 | ":" ; exec cl-launch -Q -sm fare-scripts/unmime -- "$@" 2 | 3 | (uiop:define-package :fare-scripts/unmime 4 | (:mix :cl :cl-mime :uiop :optima :optima.ppcre :babel)) 5 | 6 | (in-package :fare-scripts/unmime) 7 | 8 | (defun show-usage (&optional (stream *standard-output*)) 9 | (format stream "usage: unmime | unmime - | unmime ~%~ 10 | decodes the specified file (default: stdin) as a single-file mime container~%")) 11 | 12 | (defun get-parsed-mime (x) 13 | (etypecase x 14 | ((or stream string) (cl-mime:parse-mime x)) 15 | (pathname (with-input-file (s x) (cl-mime:parse-mime s))))) 16 | 17 | (defun decode-text-mime (tm) 18 | (let* ((c (coerce (decode-content tm) '(vector (unsigned-byte 8) *))) 19 | (cs (first (charset tm))) 20 | (e (babel-encodings:get-character-encoding 21 | (and cs (find-symbol (string-upcase cs) :keyword))))) 22 | (octets-to-string c :encoding e))) 23 | 24 | (defun get-mime-string (x) 25 | (let* ((m (get-parsed-mime x)) 26 | (tm (etypecase m 27 | (text-mime m) 28 | (multipart-mime (let ((f (first (content m)))) 29 | (check-type f text-mime) 30 | f))))) 31 | (decode-text-mime tm))) 32 | 33 | (defun unmime (x) (princ (get-mime-string x))) 34 | 35 | (defun unmimeall (input output-name) 36 | (let ((m (get-parsed-mime input))) 37 | (unmime-to m output-name))) 38 | 39 | (defun unmime-to (m output-name) 40 | (etypecase m 41 | (text-mime 42 | (format! t "Creating text file ~A~%" output-name) 43 | (with-output-file (o output-name :if-exists :rename-and-delete) 44 | (princ (decode-text-mime m) o))) 45 | (multipart-mime 46 | (loop 47 | :for i :from 0 48 | :for n :in (content m) 49 | :do (unmime-to n (format nil "~A.~D" output-name i)))) 50 | (mime 51 | (format! t "Creating binary file ~A~%" output-name) 52 | (with-output-file (o output-name :element-type '(unsigned-byte 8) :if-exists :rename-and-delete) 53 | (write-sequence (decode-content m) o))))) 54 | 55 | (defun main (argv) 56 | (match argv 57 | (() (unmime *standard-input*)) 58 | ((list x) 59 | (match x 60 | ("-" (unmime *standard-input*)) 61 | ((or "-h" "--help" "-?") (show-usage *standard-output*)) 62 | ((ppcre "^-") (show-usage *error-output*)) 63 | (_ (unmime (parse-native-namestring x))))) 64 | ((list in out) 65 | (unmimeall 66 | (if (equal in "-") *standard-input* (parse-native-namestring in)) 67 | out)) 68 | (_ (show-usage *error-output*)))) 69 | -------------------------------------------------------------------------------- /toggle-touchpad.lisp: -------------------------------------------------------------------------------- 1 | ":" ; exec cl-launch -Q -sm fare-scripts/toggle-touchpad "$0" "$@" 2 | ;; -*- lisp -*- 3 | ;; Based on https://wiki.archlinux.org/index.php/Touchpad_Synaptics#Software_toggle 4 | ;; Use the UI preferences to add a keyboard shortcut that invokes this script. 5 | ;; To avoid the slow startup time of lisp as a script, better dump an image with: 6 | ;; cl-launch -o ~/bin/x64/toggle-touchpad -d ! -l clisp \ 7 | ;; -s optima.ppcre -s inferior-shell -E toggle-touchpad::main -L toggle-touchpad.lisp 8 | ;; Or use make-multi.sh to create a multi-call binary that includes toggle-touchpad support. 9 | 10 | (uiop:define-package :fare-scripts/toggle-touchpad 11 | (:use :cl :fare-utils :uiop :inferior-shell 12 | :optima :optima.ppcre :cl-scripting) 13 | (:export #:help #:get-touchpad-id #:device-enabled-p 14 | #:toggle-device #:disable-device #:enable-device)) 15 | 16 | (in-package :fare-scripts/toggle-touchpad) 17 | 18 | (defun get-touchpad-id () 19 | (dolist (line (run/lines '(xinput list))) 20 | (match line 21 | ((ppcre "(TouchPad|\\sSYNA.*|Synaptics\\s.*|SynPS/2 Synaptics TouchPad)\\s+id\=([0-9]{1,2})\\s+" _ x) 22 | (return (values (parse-integer x))))))) 23 | 24 | (defun device-enabled-p (&optional (id (get-touchpad-id))) 25 | (dolist (line (run/lines `(xinput list-props ,id))) 26 | (match line 27 | ((ppcre "Device Enabled\\s+[():0-9]+\\s+([01])" x) (return (equal x "1")))))) 28 | 29 | (defun toggle-device (&optional (id (get-touchpad-id)) (on :toggle)) 30 | (let ((state (ecase on 31 | ((:toggle) (not (device-enabled-p id))) 32 | ((nil t) on)))) 33 | (run `(xinput ,(if state 'enable 'disable) ,id))) 34 | (success)) 35 | 36 | (defun enable-device (&optional (id (get-touchpad-id))) 37 | (toggle-device id t)) 38 | 39 | (defun disable-device (&optional (id (get-touchpad-id))) 40 | (toggle-device id nil)) 41 | 42 | (defun help (&optional (output *standard-output*)) 43 | (format output "toggle-touchpad functions: ~{~(~A~)~^ ~}~%" 44 | (package-functions :fare-scripts/toggle-touchpad)) 45 | (success)) 46 | 47 | (defun main (argv) ;; TODO: use command-line-arguments, or CLON 48 | (cond 49 | ((null argv) (toggle-device)) 50 | ((eql (first-char (first argv)) #\() (eval (first argv))) 51 | (t (if-let (fun (package-function :fare-scripts/toggle-touchpad 52 | (standard-case-symbol-name (first argv)))) 53 | (apply 'run-command fun (rest argv)) 54 | (progn 55 | (format *error-output* "Bad toggle-touchpad command: ~A~%" (first argv)) 56 | (help *error-output*) 57 | (quit 2)))))) 58 | -------------------------------------------------------------------------------- /rescript.lisp: -------------------------------------------------------------------------------- 1 | ;;; -*- mode: Lisp; coding: utf-8 -*- 2 | ;; ᴸᴀᵀᴇᵡ 3 | 4 | (defpackage :fare-scripts/rescript 5 | (:use :common-lisp :uiop) 6 | (:export #:superscriptize #:subscriptize #:upsidedown #:leftright #:mathbb #:smallcaps #:fbchange)) 7 | 8 | (in-package :fare-scripts/rescript) 9 | 10 | (defun make-script-table (original translated &key reversible) 11 | (loop :with h = (make-hash-table :test 'equal) 12 | :for x :across original 13 | :for y :across translated 14 | :do (setf (gethash x h) y) 15 | (when reversible 16 | (setf (gethash y h) x)) 17 | :finally (return h))) 18 | 19 | (defmacro define-script-translation (name original translated 20 | &key reversible nest) 21 | (let ((table (intern (format nil "*~A-~A*" name 'table))) 22 | (process-char (intern (format nil "~A-~A" name 'character)))) 23 | `(progn 24 | (defparameter ,table 25 | (make-script-table ,original ,translated :reversible ,reversible)) 26 | (defun ,process-char (c) 27 | (or (gethash c ,table) 28 | (error "Cannot ~S ~S" ',process-char c))) 29 | (defun ,name (s) 30 | (nest 31 | ,@(when nest (list nest)) 32 | (map 'string ',process-char s)))))) 33 | 34 | (define-script-translation superscriptize 35 | " 0123456789+-=()abcdefghijklmnoprstuvwxyzABDEGHIJKLMNOPRTUVWαβγδεθιΦφχ" 36 | " ⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ᵃᵇᶜᵈᵉᶠᵍʰⁱʲᵏˡᵐⁿᵒᵖʳˢᵗᵘᵛʷˣʸᶻᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁⱽᵂᵅᵝᵞᵟᵋᶿᶥᶲᵠᵡ") 37 | 38 | (define-script-translation subscriptize 39 | " 0123456789+-=()aehijklmnoprstuvxβγρφχəا" 40 | " ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎ₐₑₕᵢⱼₖₗₘₙₒₚᵣₛₜᵤᵥₓᵦᵧᵨᵩᵪₔٖ") 41 | 42 | (define-script-translation upsidedown 43 | " zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA0987654321&_?!\"'.,;" 44 | " zʎxʍʌnʇsɹbdouɯlʞɾıɥɓɟǝpɔqɐZ⅄XMΛ∩⊥SᴚԾԀONW⅂⋊ſIH⅁ℲƎᗡƆ𐐒∀068ㄥ9ގㄣƐᄅ⇂⅋‾¿¡„,˙'؛" 45 | :reversible t :nest (reverse)) 46 | 47 | (define-script-translation leftright 48 | " 018!\"'.:-_+=|()[]{}<>/\\´`ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" 49 | " 018!\"'.:-_+=|)(][}{><\\/`´ᗅᗺƆᗡƎꟻᎮHIႱ⋊⅃MͶOꟼỌЯꙄTUVWXYƸɒdɔbɘᎸǫʜiꞁʞ|mᴎoqpɿꙅƚuvwxʏƹ" 50 | :reversible t :nest (reverse)) 51 | 52 | #-allegro ;; Allegro gets confused, possibly because of codepoints > 65535 ? 53 | (define-script-translation mathbb 54 | " ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" 55 | " 𝔸𝔹ℂ𝔻𝔼𝔽𝔾ℍ𝕀𝕁𝕂𝕃𝕄ℕ𝕆ℙℚℝ𝕊𝕋𝕌𝕍𝕎𝕏𝕐ℤ𝕒𝕓𝕔𝕕𝕖𝕗𝕘𝕙𝕚𝕛𝕜𝕝𝕞𝕟𝕠𝕡𝕢𝕣𝕤𝕥𝕦𝕧𝕨𝕩𝕪𝕫𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡") 56 | 57 | (define-script-translation smallcaps 58 | " ABCDEFGHIJKLMNOPRSTUVWYZ" 59 | " ᴀʙᴄᴅᴇꜰɢʜɪᴊᴋʟᴍɴᴏᴘʀsᴛᴜᴠᴡʏᴢ") 60 | 61 | (define-script-translation fbchange 62 | " abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789&_?!\"'.,;" ; nah: ԁг 63 | " аbсdеfghіјklmոοрԛrѕtυѵԝхуzАВСDЕFGHӀЈКLМNОРԚRЅТUѴԜXҮZ0123456789&_?!\"'.,;") ; асеіјոοрԛυѵѕԝхуAВСЕКӀЈМОРԚЅТѴԜХҮ 64 | 65 | (defun search-char-name (subname) 66 | (loop 67 | :for i :from 0 :below char-code-limit 68 | :for c = (ignore-errors (code-char i)) 69 | :for n = (and c (char-name c)) 70 | :when (and n (search subname n)) 71 | :do (format t "~D ~C ~A~%" i c n))) 72 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | fare-scripts, random personal scripts 2 | ===================================== 3 | 4 | fare-scripts is a collection of small scripts I use at home. 5 | These scripts are published as examples of how to write scripts in Common Lisp, 6 | but without the ambition of turning them into widely used programs as such. 7 | 8 | Any general purpose utilities will be moved from out of this repository 9 | into the [cl-scripting](http://github.com/fare/cl-scripting) project. 10 | 11 | NB: If you want to try these scripts, be sure to install this directory 12 | where ASDF 3.1 will find it, e.g. under `~/common-lisp/` or, 13 | if you use [Quicklisp](https://www.quicklisp.org/beta/), under `~/quicklisp/local-projects/`. 14 | Be sure to install all the dependencies, and/or to use Quicklisp. 15 | 16 | Contents: 17 | 18 | * [fare-scripts.asd](fare-scripts.asd): the `.asd` file, 19 | which trivially uses package-inferred-system. 20 | 21 | * [make-multi.sh](make-multi.sh): a shell script to create a multicall binary 22 | that contains all the CL scripts I need in a single image, to combine 23 | fast startup with memory savings. See 24 | 25 | * [bazel.lisp](bazel.lisp): trivial functions to work with [bazel](http://bazel.io) 26 | 27 | * [commands.lisp](commands.lisp): functions to manage commands in my PATH. 28 | 29 | * [edgar.lisp](edgar.lisp): accessing the EDGAR database from the SEC.gov. 30 | 31 | * [git.lisp](git.lisp): some utility for git. 32 | 33 | * [gutenberg.lisp](gutenberg.lisp): downloading and processing text from Project Gutenberg. 34 | 35 | * [languages.lisp](languages.lisp): scripts to bootstrap various programming languages my way. 36 | 37 | * [network.lisp](network.lisp): wrappers for network-manager 38 | 39 | * [obsolete-shell-aliases.lisp](obsolete-shell-aliases.lisp): functions I don't use any more, 40 | kept around for archival purposes after conversion from shell to Lisp. 41 | 42 | * [random.lisp](random.lisp): functions to randomly generate various passphrases, etc. 43 | 44 | * [repl.lisp](repl.lisp): various helpers for the Lisp REPL. 45 | 46 | * [rescript.lisp](rescript.lisp): various silly script filters. 47 | 48 | * [shell-aliases.lisp](shell-aliases.lisp): various functions that used to be 49 | shell aliases and are not better written in Lisp. 50 | 51 | * [toggle-touchpad.lisp](toggle-touchpad.lisp): a utility to toggle the touchpad on a laptop 52 | using X-Window. 53 | 54 | * [typographie.lisp](typographie.lisp): a filter so my html file abides by French 55 | typographic standards, using cl-ppcre for regexp replacement (NB: assumes UTF-8). 56 | 57 | * [unmime.lisp](unmime.lisp): filter that's semi-useful when processing 58 | mime files as e.g. decrypted from PGP encrypted mail. 59 | 60 | * [vicode.lisp](vicode.lisp): deal with various vietnamese encodings 61 | 62 | * [vicode-test.lisp](vicode-test.lisp): mini test-suite for vicode 63 | 64 | * [viet-practice.lisp](viet-practice.lisp): memorization exercises for Vietnamese, courtesy of Robert Strandh 65 | 66 | * [xrandr.lisp](xrandr.lisp): wrappers around xrandr, notably for dealing with screen orientation. 67 | -------------------------------------------------------------------------------- /xrandr.lisp: -------------------------------------------------------------------------------- 1 | (uiop:define-package :fare-scripts/xrandr 2 | (:use :cl :uiop :fare-utils 3 | :optima :optima.ppcre 4 | :inferior-shell :cl-scripting :cl-launch/dispatch) 5 | (:export #:screen-device-up #:screen-device-right #:screen-device-down #:screen-device-left)) 6 | 7 | (in-package :fare-scripts/xrandr) 8 | 9 | ;; TODO: write a real parser for xrandr output? 10 | 11 | (defun current-device () "eDP-1") 12 | 13 | (defun xinput-device-properties (device-id) 14 | (loop :for line :in (cdr (run/lines `(xinput list-props ,device-id))) :collect 15 | (match line 16 | ((ppcre "^\\s+([A-Za-z-0-9][A-Za-z0-9 ]*[A-Za-z-0-9]) [(]([0-9]+)[)]:\\s+(.*)$" 17 | name id value) 18 | (list name (parse-integer id) value)) 19 | (_ (error "Cannot parse device property line ~A" line))))) 20 | 21 | (defun touchscreen-devices () 22 | (while-collecting (c) 23 | (dolist (line (run/lines '(xinput list))) 24 | (match line 25 | ((ppcre "(ELAN21EF:00 04F3:[0-9A-F]{4}|TPPS/2 IBM TrackPoint|SynPS/2 Synaptics TouchPad|Wacom Co.,Ltd. Pen and multitouch sensor (Pen.*|Finger))\\s+id\=([0-9]{1,2})\\s+" _ _ x) 26 | (c (parse-integer x))))))) 27 | 28 | (defun configure-touchscreen (&key invert-x invert-y swap-xy matrix) 29 | "Configure all builtin pointer devices to follow the given orientation. 30 | INVERT-X, INVERT-Y and SWAP-XY specify how to configure the devices with the Evdev mechanism; 31 | MATRIX specifies how to configure the devices with the Coordinate Transformation Matrix mechanism." 32 | (dolist (ts (touchscreen-devices)) 33 | (if-let (properties (ignore-errors (xinput-device-properties ts))) 34 | (flet ((property-id (name) (second (find name properties :key 'first :test 'equal)))) 35 | (if-let (c-t-m (property-id "Coordinate Transformation Matrix")) 36 | (run/i `(xinput set-prop ,ts ,c-t-m ,@matrix) :on-error nil) 37 | (if-let (axis-inversion (property-id "Evdev Axis Inversion")) 38 | (if-let (axes-swap (property-id "Evdev Axes Swap")) 39 | (progn 40 | (run/i `(xinput set-prop ,ts ,axis-inversion ,(if invert-x 1 0) ,(if invert-y 1 0))) 41 | (run/i `(xinput set-prop ,ts ,axes-swap ,(if swap-xy 1 0))))))))))) 42 | 43 | (exporting-definitions 44 | 45 | (defun screen-device-up (&optional (device (current-device))) 46 | (run/i `(xrandr --output ,device --rotate normal)) 47 | (configure-touchscreen :invert-x nil :invert-y nil :swap-xy nil :matrix '(1 0 0 0 1 0 0 0 1))) 48 | (defun screen-device-right (&optional (device (current-device))) 49 | (run/i `(xrandr --output ,device --rotate right)) 50 | (configure-touchscreen :invert-x nil :invert-y t :swap-xy t :matrix '(0 1 0 -1 0 1 0 0 1))) 51 | (defun screen-device-down (&optional (device (current-device))) 52 | (run/i `(xrandr --output ,device --rotate inverted)) 53 | (configure-touchscreen :invert-x t :invert-y t :swap-xy nil :matrix '(-1 0 1 0 -1 1 0 0 1))) 54 | (defun screen-device-left (&optional (device (current-device))) 55 | (run/i `(xrandr --output ,device --rotate left)) 56 | (configure-touchscreen :invert-x t :invert-y nil :swap-xy t :matrix '(0 -1 1 1 0 0 0 0 1))) 57 | 58 | );exporting-definitions 59 | 60 | ;; Not all our exported symbols are worth exposing to the shell command-line. 61 | (register-commands :fare-scripts/xrandr) 62 | -------------------------------------------------------------------------------- /shell-aliases.lisp: -------------------------------------------------------------------------------- 1 | ;; Slowly migrating my zsh aliases here... 2 | 3 | (uiop:define-package :fare-scripts/shell-aliases 4 | (:use :cl :fare-utils :uiop 5 | :inferior-shell :cl-scripting :fare-scripts/commands 6 | :optima :optima.ppcre 7 | :cl-launch/dispatch) 8 | (:export 9 | #:*char-mode* 10 | #:*colon-mode* 11 | #:*normal-mode* 12 | #:*num-mode* 13 | #:ascii 14 | #:batt #:battery-status 15 | #:continue-chrome 16 | #:display-ascii-hex-table 17 | #:display-ascii-oct-table 18 | #:kde-panel 19 | #:kill-chrome 20 | #:rot13 21 | #:snd-jack 22 | #:snd-jackd 23 | #:snd-nojack 24 | #:snd-pulse 25 | #:stop-chrome 26 | #:xrsync)) 27 | 28 | (in-package :fare-scripts/shell-aliases) 29 | 30 | (defun char-display-char (c) 31 | (if (or (member c '(127 155)) 32 | (< c 32) 33 | (<= 128 c 159)) 34 | #\space 35 | (code-char c))) 36 | 37 | (defvar *num-mode* "") 38 | (defvar *colon-mode* "") 39 | (defvar *char-mode* "") 40 | (defvar *normal-mode* "") 41 | 42 | (exporting-definitions 43 | 44 | (defun display-ascii-hex-table () 45 | (loop for i from 32 to 255 46 | do (format t "~A~X~A:~A~A~A~:[ ~;~%~]" 47 | *num-mode* i 48 | *colon-mode* *char-mode* 49 | (char-display-char i) 50 | *normal-mode* 51 | (zerop (mod (1+ i) 16)))) 52 | (success)) 53 | 54 | (defun ascii () (display-ascii-hex-table)) 55 | 56 | (defun display-ascii-oct-table () 57 | (loop for i from 32 to 255 58 | do (format t "~A~3O~A~A~A~:[ ~;~%~]" 59 | *num-mode* i 60 | *char-mode* 61 | (char-display-char i) 62 | *normal-mode* 63 | (zerop (mod (1+ i) 16)))) 64 | (success)) 65 | 66 | (defun rot13 () 67 | (run/i '(tr "[a-zA-Z]" "[n-za-mN-ZA-M]")) 68 | (success)) 69 | 70 | (defun xrsync (args) 71 | (run/i `(rsync "-rlptgoDHSx" ,@args))) 72 | 73 | (defun snd-jack () 74 | (run/i `(pasuspender -- jack_control start))) 75 | 76 | (defun snd-jackd () ;; another way to start... 77 | (run/i `(pasuspender -- jackd "-R" "-P4" -dalsa -r44100 -p512 -n4 "-D" "-Chw:PCH" "-Phw:PCH"))) 78 | 79 | (defun snd-pulse () 80 | (run/i `(jack_control exit) :on-error nil)) 81 | 82 | (defun snd-nojack () 83 | (run/i `(killall jackd) :on-error nil)) 84 | 85 | (defun kill-chrome (&rest args) 86 | (inferior-shell:run 87 | `(killall ,@args chromium-browser chromium google-chrome chrome) 88 | :output :interactive :input :interactive :error-output nil :on-error nil)) 89 | 90 | (defun stop-chrome () 91 | (kill-chrome "-STOP")) 92 | 93 | (defun continue-chrome () 94 | (kill-chrome "-CONT")) 95 | 96 | (defun kde-panel () 97 | (run/i `(kquitapp plasmashell)) 98 | (run `(setsid plasmashell 99 | (> ,(subpathname (temporary-directory) "plasmashell.out")) (>& 2 1)))) 100 | 101 | (defun battery-status (&optional out) 102 | (with-output (out) 103 | (loop :for dir :in (uiop:directory* #p"/sys/class/power_supply/*/") 104 | :for battery = (first (last (pathname-directory dir))) 105 | :for capacity = (ignore-errors (read-file-line (subpathname dir "capacity"))) 106 | :for status = (ignore-errors (read-file-line (subpathname dir "status"))) 107 | :when (and capacity status) :do 108 | (format out "~A: ~A% (~A)~%" battery capacity status)))) 109 | 110 | (defun batt () 111 | (battery-status t) 112 | (values)) 113 | 114 | );exporting-definitions 115 | 116 | 117 | (register-commands :fare-scripts/shell-aliases) 118 | -------------------------------------------------------------------------------- /network.lisp: -------------------------------------------------------------------------------- 1 | ;;; REPL utilities 2 | (uiop:define-package :fare-scripts/network 3 | (:use :cl :fare-utils :uiop :inferior-shell :optima :optima.ppcre :cl-scripting) 4 | (:export 5 | #:get-wireless-connections #:get-wireless-passphrase 6 | #:nmup #:nmauto #:nowifi)) 7 | 8 | (in-package :fare-scripts/network) 9 | 10 | ;;; The WIRELESS_SECRETS environment variable should point to a file where 11 | ;;; some lines of the form "ESSID: your ssid" are followed by 12 | ;;; a line "passphrase: your passphrase", in order of network preference. 13 | ;;; 14 | ;;; Q: should it default to ~/.secrets/wireless.text ??? 15 | (defvar *wireless-secrets* nil) 16 | (defun init-wireless-secrets () 17 | (setf *wireless-secrets* (getenv-pathname "WIRELESS_SECRETS"))) 18 | (register-image-restore-hook 'init-wireless-secrets t) 19 | 20 | (defun get-wireless-secrets () 21 | (or *wireless-secrets* (error "WIRELESS_SECRETS variable not defined"))) 22 | 23 | (defun extract-fields (field-lengths line) 24 | (loop :with len = (length line) 25 | :for (name start end) :in field-lengths :collect 26 | (progn 27 | name ;; ignore 28 | (string-right-trim " " (subseq line start (min end len)))))) 29 | 30 | (defun extract-field-lengths (fields) 31 | (loop :with start = 0 :with len = (length fields) 32 | :while (< start len) :collect 33 | (let* ((name-end (position #\space fields :start start)) 34 | (name (subseq fields start name-end)) 35 | (end (or (when name-end (position #\space fields :start name-end :test-not #'eql)) len))) 36 | (prog1 37 | (list name start end) 38 | (setf start end))))) 39 | 40 | (defun parse-nmcli-list-line (field-lengths) 41 | (lambda (line) 42 | (destructuring-bind (connected ssid mode chan rate signal bars security) 43 | (extract-fields field-lengths line) 44 | (declare (ignore bars)) 45 | (list (equal connected "*") 46 | ssid 47 | mode 48 | (parse-integer chan) 49 | (parse-integer rate :junk-allowed t) ;; "54 Mbit/s" 50 | (parse-integer signal) 51 | (split-string (string-right-trim " " security)))))) 52 | 53 | (defun nmcli-list () 54 | (destructuring-bind (fields . lines) (run/lines '(nmcli device wifi list)) 55 | (let ((field-lengths (extract-field-lengths fields))) 56 | (values (mapcar (parse-nmcli-list-line field-lengths) lines) 57 | field-lengths)))) 58 | 59 | (exporting-definitions 60 | 61 | (defun get-wireless-connections () 62 | (destructuring-bind (fields . lines) (run/lines '(nmcli connection show --active)) 63 | (let ((field-lengths (extract-field-lengths fields))) 64 | (loop :for line :in lines :collect 65 | (first (extract-fields field-lengths line)))))) ;; dropping uuid type device 66 | 67 | (defun get-wireless-passphrase (essid) 68 | (with-input-file (s (get-wireless-secrets)) 69 | (loop :with expected = (strcat "ESSID: " essid) 70 | :for line = (read-line s nil nil) :while line :do 71 | (if (equal line expected) 72 | (match (read-line s nil nil) 73 | ((ppcre "^pass(?:word|phrase): (.*)$" pass) (return pass))))))) 74 | 75 | (defun nmup (&optional connection (passphrase :auto)) 76 | (if connection 77 | (let ((passphrase 78 | (if (eq passphrase :auto) 79 | (get-wireless-passphrase connection) 80 | passphrase))) 81 | (if passphrase 82 | (with-temporary-file (:stream s :pathname passwd-file) 83 | (format s "802-11-wireless-security.psk:~a~%" passphrase) 84 | :close-stream 85 | (run/i `(nmcli connection up ,connection passwd-file ,passwd-file (>& 1 2)))) 86 | (run/i `(nmcli --ask connection up ,connection))) 87 | (success)) 88 | (nmauto))) 89 | 90 | (defun nowifi () 91 | (dolist (connection (get-wireless-connections)) 92 | (run/i `(nmcli connection down ,connection)))) 93 | 94 | (defun nmauto () 95 | (loop :with table = (make-hash-table :test 'equal) 96 | :for network :in (nmcli-list) 97 | :for ssid = (second network) 98 | :do (setf (gethash ssid table) t) 99 | :finally 100 | (with-input-file (s (get-wireless-secrets)) 101 | (loop :for line = (read-line s nil nil) :while line :do 102 | (when (string-prefix-p "ESSID: " line) 103 | (let ((ssid (subseq line #.(length "ESSID: ")))) 104 | (when (gethash ssid table) 105 | (match (read-line s nil nil) 106 | ((ppcre "^pass(?:word|phrase): (.*)$" pass) 107 | (nmup ssid pass)) 108 | (_ (nmup ssid nil))) 109 | (return-from nmauto ssid)))))))) 110 | ) 111 | 112 | (register-commands :fare-scripts/network) 113 | -------------------------------------------------------------------------------- /random.lisp: -------------------------------------------------------------------------------- 1 | (uiop:define-package :fare-scripts/random 2 | (:use :cl :uiop :fare-utils :optima :optima.ppcre :binascii :cl-scripting 3 | :inferior-shell :command-line-arguments) 4 | (:export 5 | ;; #:randomize #:random-bytes #:urandom 6 | ;; #:*diceware* #:*diceware-words* 7 | ;; #:dice #:roll-index #:roll-string #:get-diceware-words #:ensure-diceware-words 8 | ;; #:diceware-word 9 | #:diceware-phrase #:genpasswd #:genresa #:random-run #:shuffle-lines 10 | )) 11 | 12 | (in-package :fare-scripts/random) 13 | 14 | (defun randomize () 15 | (setf *random-state* (make-random-state t))) 16 | 17 | (register-image-restore-hook 'randomize) 18 | 19 | (defun shuffle-list (list &optional n) 20 | (check-type list list) 21 | (loop :with vec = (coerce list 'vector) 22 | :with len = (length vec) 23 | :repeat (if n (min n len) len) 24 | :for end :downfrom len 25 | :for i = (random end) 26 | :for val = (aref vec i) 27 | :do (setf (aref vec i) (aref vec (- end 1))) 28 | :collect val)) 29 | 30 | (defun group-by (n list) 31 | (loop :for len :downfrom (length list) :above 0 :by n 32 | :for l = list :then (nthcdr n l) 33 | :collect (subseq l 0 (min n len)))) 34 | 35 | #|(defun ensure-prng () 36 | (unless crypto:*prng* (setf crypto:*prng* (crypto:make-prng :fortuna))))|# 37 | 38 | (defun random-bytes (n) 39 | (let ((x (make-array (list n) :element-type '(unsigned-byte 8) :initial-element 0))) 40 | (with-input-file (s "/dev/urandom" :element-type '(unsigned-byte 8)) 41 | (read-sequence x s)) 42 | x)) 43 | 44 | (defun urandom (n) 45 | ;; If you don't trust your implementation's CL:RANDOM, you can use FARE-SCRIPTS/RANDOM:URANDOM. 46 | ;; Get 64 extra bits everytime, minimizing the mismatch between n and 2**m 47 | (let* ((n-bytes (ceiling (+ 64 (log n 2)) 8)) 48 | (bytes (random-bytes n-bytes)) 49 | (big-n (reduce (lambda (x y) (mod (+ (ash x 8) y) n)) bytes))) 50 | (mod big-n n))) 51 | 52 | (defparameter *diceware* 53 | `(:file ,(subpathname (user-homedir-pathname) "src/security/diceware-fr/diceware-fr-5-jets.txt") 54 | :n-dice 5)) 55 | 56 | (defvar *diceware-words* ()) 57 | 58 | (defun dice (&optional (n 1) (sides 6)) 59 | (loop :repeat n :sum (1+ (random sides)))) 60 | 61 | (defun roll-index (&optional x (n-dice (getf *diceware* :n-dice))) 62 | (etypecase x 63 | (null 64 | (random (expt 6 n-dice))) 65 | ((integer * 0) 66 | (assert (< (- (expt 6 n-dice)) x)) 67 | (- x)) 68 | (integer 69 | (roll-index (princ-to-string x))) 70 | (list 71 | (assert (= n-dice (length x))) 72 | (loop :for d :in (reverse x) 73 | :for m = 1 :then (* 6 m) 74 | :do (assert (typep d '(integer 1 6))) 75 | :sum (* m (1- d)))) 76 | (string 77 | (assert (= n-dice (length x))) 78 | (roll-index 79 | (loop :repeat n-dice :for c :across x :for d = (digit-char-p c) 80 | :do (assert (typep d '(integer 1 6))) 81 | :collect d))))) 82 | 83 | (defun roll-string (&optional x (n-dice (getf *diceware* :n-dice))) 84 | (map 'string (lambda (c) (code-char (1+ (char-code c)))) 85 | (format nil "~6,v,'0R" n-dice (roll-index x n-dice)))) 86 | 87 | (defun get-diceware-words () 88 | (destructuring-bind (&key file n-dice) *diceware* 89 | (let* ((limit (expt 6 n-dice)) 90 | (words (make-array limit))) 91 | (with-input-file (w file) 92 | (loop :for i :from 0 93 | :for l = (read-line w nil) 94 | :while l 95 | :do (match l 96 | ((ppcre (strcat "^([1-6]+) ([!-~]+)$") n x) 97 | (assert (= i (roll-index n n-dice))) 98 | (setf (aref words i) x)) 99 | (_ (error "invalid line in diceware file: ~A" l))) 100 | :finally (assert (= i limit)))) 101 | words))) 102 | 103 | (defun ensure-diceware-words () 104 | (destructuring-bind (&key file n-dice) *diceware* 105 | (declare (ignore file)) 106 | (unless (typep *diceware-words* `(vector * ,(expt 6 n-dice))) 107 | (setf *diceware-words* (get-diceware-words)))) 108 | (values)) 109 | 110 | 111 | (defun diceware-word (&optional roll) 112 | (ensure-diceware-words) 113 | (aref *diceware-words* (roll-index roll))) 114 | 115 | (exporting-definitions 116 | 117 | ;; 20 words of 5 dice is just over 258 bits. 118 | (defun diceware-phrase (&optional (n-words 20) &rest more-rolls) 119 | (let ((rolls 120 | (if (or more-rolls (not (typep n-words '(integer 1 99)))) 121 | (cons n-words more-rolls) 122 | (loop :repeat n-words :collect (roll-string))))) 123 | (join-strings (mapcar 'diceware-word rolls) :separator " "))) 124 | 125 | (defun genpasswd () 126 | (binascii:encode (random-bytes 32) :base64)) 127 | 128 | (defun genresa () 129 | (format nil "~36R" (random (expt 36 6)))) 130 | 131 | (defun shuffle-lines (&optional n) 132 | (let* ((lines (slurp-stream-lines *standard-input*)) 133 | (n (if n (parse-integer n) (length lines)))) 134 | (map () 'println (shuffle-list lines n))) 135 | (values)) 136 | 137 | (defun random-run (&rest arguments) 138 | (nest 139 | (multiple-value-bind (options args) 140 | (process-command-line-options 141 | '((("log" #\l) :type string :optional t :documentation "specify log file") 142 | (("echo" #\e) :type boolean :optional t :initial-value nil :documentation "echo command before and after") 143 | (("at-once" #\a) :type integer :optional t :initial-value 1 :documentation "number of arguments at once")) 144 | arguments)) 145 | (destructuring-bind (&key log echo at-once) options) 146 | (let* ((pos (position "--" args :test 'equal)) 147 | (prefix (subseq args 0 pos)) 148 | (args-to-randomize (subseq args (1+ pos))) 149 | (random-args (shuffle-list args-to-randomize)))) 150 | (flet ((do-it (logger) 151 | (handler-case 152 | (loop :for args :in (group-by at-once random-args) 153 | :for command = `(,@prefix ,@args) :do 154 | (funcall logger prefix args) 155 | (run/i command :show echo)) 156 | (condition () (quit 3))))) 157 | (if log 158 | (do-it (lambda (prefix args) 159 | (declare (ignore prefix)) 160 | (with-output-file (f log :if-exists :append) 161 | (format f "~A~%" (escape-shell-command args))))) 162 | (do-it (constantly nil))) 163 | (when echo 164 | (format! t "That was ~A~%" (escape-shell-command `(,@prefix ,@random-args))))) 165 | (values))) 166 | 167 | 168 | );exporting-definitions 169 | 170 | (register-commands :fare-scripts/random) 171 | -------------------------------------------------------------------------------- /languages.lisp: -------------------------------------------------------------------------------- 1 | ;; Personal scripts to deal with various programming languages 2 | 3 | (uiop:define-package :fare-scripts/languages 4 | (:use :cl :fare-utils :uiop 5 | :inferior-shell :cl-scripting :fare-scripts/commands 6 | :optima :optima.ppcre 7 | :cl-launch/dispatch) 8 | (:export 9 | #:frob #:frop #:mkba #:mkba2 10 | #:myccl #:mychez #:myclisp #:myecl #:mygcl #:myhott #:mymkcl #:myplt 11 | #:myrust #:mysbcl #:mysbcl-contrib #:upccl #:mygambit #:mygerbil)) 12 | 13 | (in-package :fare-scripts/languages) 14 | 15 | (exporting-definitions 16 | 17 | (progn 18 | (defun nns (x) 19 | (let ((s (native-namestring x))) 20 | (if (and (equal (last-char s) #\/) (not (equal s "/"))) 21 | (subseq s 0 (1- (length s))) 22 | s)))) 23 | 24 | (defun mkba () 25 | (with-current-directory ((subpathname (src-root) "fare/bastiat.org/")) 26 | (run '(make dep)) (run '(make))) 27 | (success)) 28 | 29 | (defun mkba2 () 30 | (mkba) (mkba)) 31 | 32 | (defun myclisp (&optional debug) 33 | (with-current-directory ((subpathname (common-lisp-src) "clisp/")) 34 | (run/i `(hg clean)) ;; requires "[extensions]\npurge = " in ~/.hgrc 35 | (ignore-errors 36 | (delete-directory-tree 37 | (subpathname (common-lisp-src) "clisp/build-dir/") 38 | :validate (lambda (p) 39 | (equal "build-dir" (car (last (pathname-directory p))))))) 40 | (run/i `(./configure 41 | --with-ffcall 42 | (--with-libffcall-prefix=,(common-lisp-src)clisp/tools/x86_64-unknown-linux-gnu) 43 | --with-readline --with-libreadline-prefix=/usr 44 | --with-sigsegv --with-libsigsegv-prefix=/usr 45 | --with-module=asdf 46 | --with-module=bindings/glibc 47 | --with-module=clx/new-clx 48 | --with-module=dbus 49 | --with-module=i18n 50 | --with-module=rawsock 51 | --with-module=readline 52 | --with-module=regexp 53 | --with-module=syscalls 54 | --with-module=zlib 55 | ;;--with-module=berkeley-db 56 | ;;--with-module=dirkey 57 | ;;--with-module=gdbm 58 | ;;--with-module=gtk2 59 | ;;--with-module=fastcgi 60 | ;;--with-module=libsvm 61 | ;;--with-module=matlab 62 | ;;--with-module=netica 63 | ;;--with-module=oracle 64 | ;;--with-module=netica 65 | ;;--with-module=pari 66 | ;;--with-module=pcre 67 | ;;--with-module=postgresql 68 | ;;--with-module=queens 69 | (--prefix=,(stow-root)clisp) 70 | --cbc 71 | ;; The below line is for debugging the GC. See CLISP bug 678 72 | ;; https://sourceforge.net/p/clisp/bugs/678/ 73 | ,@(when debug '(--with-debug "CC=g++")) 74 | build-dir)) 75 | ;;(run/i `(make "-C" build-dir distclean) 76 | (run/i `(make "-C" build-dir)) 77 | (run/i `(make "-C" build-dir check)) 78 | (run/i `(make "-C" build-dir install (prefix=,(stow-root)clisp)))) 79 | (success)) 80 | 81 | (defun myecl () 82 | (with-current-directory ((subpathname (common-lisp-src) "ecl/")) 83 | (run/i `(git clean -xfd)) 84 | (run/i `(./configure (--prefix=,(stow-root)ecl))) 85 | (run/i `(make -l6)) 86 | (run/i `(make install)))) 87 | 88 | (defun mymkcl () 89 | (with-current-directory ((subpathname (common-lisp-src) "mkcl/")) 90 | (run/i `(git clean -xfd)) 91 | (run/i `(./configure (--prefix=,(stow-root)mkcl))) 92 | (run/i `(make -l6)) 93 | (run/i `(make install)))) 94 | 95 | (defun mygcl () 96 | ;; git clone git://git.sv.gnu.org/gcl.git 97 | (with-current-directory ((subpathname (common-lisp-src) "gcl/gcl/")) 98 | (run/i `(git clean -xfd)) 99 | (run/i `(./configure --enable-ansi (--prefix=,(stow-root)gcl))) ;; --disable-dynsysgmp --enable-static ??? 100 | (run/i `(make -l6 install (prefix=,(stow-root)gcl))) 101 | (delete-file (subpathname (stow-root) "gcl/share/info/dir")) 102 | (success))) 103 | 104 | (defun mysbcl (&optional (install-root (subpathname (stow-root) "sbcl/"))) 105 | (DBG "Compiling a custom SBCL") 106 | (with-current-directory ((subpathname (common-lisp-src) "sbcl/")) 107 | (let ((out (subpathname (temporary-directory) "mysbcl.out"))) 108 | (run/i `(pipe ("sh" "./make.sh" ("--prefix=",(nns install-root)) 109 | ,@(when (probe-file "/usr/bin/sbcl") 110 | '("--xc-host=/usr/bin/sbcl --disable-debugger --no-userinit --no-sysinit")) 111 | "--with-sb-threads" "--with-sb-linkable-runtime" "--with-sb-dynamic-core" 112 | "--fancy" (>& 2 1)) 113 | (tee ,out)) :show t) 114 | (ignore-errors 115 | (delete-directory-tree (subpathname install-root "lib/sbcl/") 116 | :validate #'(lambda (p) (subpathp p install-root)))) 117 | (run/i `(pipe (sh "./install.sh" (--prefix=,(nns install-root))) 118 | (tee -a ,out)) :show t))) 119 | (success)) 120 | 121 | (defun mysbcl-contrib (&optional (install-root (subpathname (stow-root) "sbcl/"))) 122 | (let ((out (subpathname (temporary-directory) "mysbcl-contrib.out"))) 123 | (with-current-directory ((subpathname (common-lisp-src) "sbcl/obj/asdf-upstream/")) 124 | (run/i `(pipe (git pull (subpathname (cl-root) "asdf/")) (tee ,out)))) 125 | (with-current-directory ((subpathname (common-lisp-src) "sbcl/contrib/asdf/")) 126 | (run/i `(pipe (make up "SBCL=../../run-sbcl.sh") (tee -a ,out)))) 127 | (with-current-directory ((subpathname (common-lisp-src) "sbcl/")) 128 | (run/i `(pipe (sh "./make-target-contrib.sh" (--prefix=,install-root) --with-sb-core-compression) 129 | (tee -a out))) 130 | (run/i `(pipe (sh "./install.sh" (--prefix=,install-root)) (tee -a out))))) 131 | (success)) 132 | 133 | (defun myrust () 134 | (with-current-directory ((subpathname (src-root) "proglang/rust/")) 135 | (let ((install-root (subpathname (stow-root) "rust/")) 136 | (out (subpathname (temporary-directory) "myrust.out"))) 137 | (run/i `(pipe (sh "./configure" (--prefix=,install-root)) (tee ,out))) 138 | ;;(delete-directory-tree (subpathname install-root "lib/rust/") :validate #'(lambda (p) (subpathp p install-root))) 139 | (run/i `(pipe (make install) (tee -a ,out))))) 140 | (success)) 141 | 142 | (defun myplt () 143 | (with-current-directory ((subpathname (src-root) "racket/plt/")) 144 | (run/i `(git clean -xfd)) 145 | (run/i `(pipe (make -l6 unix-style ("PREFIX"=,(stow-root)plt) (>& 2 1)) 146 | (tee /tmp/plt.out)))) 147 | (success)) 148 | 149 | (defun mychez () 150 | (with-current-directory ((subpathname (src-root) "scheme/ChezScheme/")) 151 | (run/i `(git clean -xfd)) 152 | (run/i `("./configure" 153 | "--threads" 154 | ("--installprefix=",(stow-root)"ChezScheme") 155 | ("--installman=",(stow-root)"share/man"))) 156 | (run/i `(pipe ("make" "-l6" "-C" "ta6le" "install" (>& 2 1)) 157 | (tee /tmp/chez.out)))) 158 | (success)) 159 | 160 | (defun myhott () 161 | (with-current-directory ((subpathname (src-root) "coq/HoTT/")) 162 | (run/i `(etc/install_coq.sh)) 163 | (run/i `(./autogen.sh)) 164 | (run/i `(./configure ("COQBIN=",(getcwd)"/coq-HoTT/bin"))) 165 | (run/i `(make)))) 166 | 167 | (defun frob () 168 | (with-current-directory ((subpathname (src-root) "fare/ngnghm/")) 169 | (run/i '(raco frog -b))) 170 | (success)) 171 | 172 | (defun frop () 173 | (with-current-directory ((subpathname (src-root) "fare/ngnghm/")) 174 | (run/i '(raco frog -bp))) 175 | (success)) 176 | 177 | (defun myccl () 178 | (with-current-directory ((subpathname (common-lisp-src) "ccl/")) 179 | (loop :repeat 2 :do 180 | (run/i '("./lx86cl64" "--no-init" "--eval" "(progn (ccl:rebuild-ccl :full t) (ccl:quit 0))")))) 181 | (success)) 182 | 183 | (defun upccl (version) 184 | (with-current-directory ((subpathname (common-lisp-src) "ccl/")) 185 | (loop 186 | :for external :in (cons " ." (run/lines '(svn propget "svn:externals" "."))) 187 | :for pos = (position #\space external) 188 | :for dir = (when pos (subseq external (1+ pos))) 189 | :when dir 190 | :do (run/i `(svn up (-r ,version) --ignore-externals ,dir) :show t)) 191 | (success))) 192 | 193 | (defun mygambit () 194 | (with-current-directory ((subpathname (src-root) "scheme/gambit/")) 195 | ;; This assumes that https://github.com/gambit/gambit/pull/279 is merged in. 196 | (run/i `(git clean -xfd)) 197 | (run/i `("./configure" 198 | ;; https://github.com/vyzo/gerbil/wiki/Getting-Started-with-Gerbil-development 199 | ("--prefix=" ,(stow-root) "gambit") 200 | "--enable-targets=C,js" ;; NO: java,php,python,ruby arm,riscv-32,riscv-64,x86,x86-64 201 | "--enable-single-host" 202 | "--enable-c-opt=-O1" ;; -O1 compiles faster, even though -Os has overall better performance 203 | "--enable-c-opt-rts=-O2" 204 | "--enable-gcc-opts" 205 | "--enable-shared" 206 | "--enable-absolute-shared-libs" 207 | "--enable-poll" 208 | "--enable-trust-c-tco" 209 | "--enable-dynamic-clib" 210 | ;; "--enable-default-runtime-options=f8,-8,tE8" ;; Default to UTF-8 for source and all I/O 211 | ;; "--enable-guide" 212 | ;; "--enable-profile" 213 | ;; "--enable-coverage" 214 | ;; "--enable-inline-jumps" 215 | ;; "--enable-char-size=1" ; default is 4 216 | ;; "--enable-multiple-versions" 217 | ;; "--enable-multiple-vms" 218 | ;; "--enable-smp" 219 | ;; "--enable-thread-system" 220 | ;; "--enable-max-processors=4" 221 | ;; "--enable-track-scheme" 222 | ;; "--enable-high-res-timing" 223 | ;; "--enable-thread-system=posix" 224 | ;; "--enable-dynamic-tls" 225 | ;; "--enable-openssl" 226 | )) 227 | (run/i `("make" "-j4" "current-gsc-boot")) 228 | (run/i `("make" "-j4" "from-scratch")) 229 | (run/i `("make" "check")) 230 | ;;(run/i `("make" "-j4" "modules")) 231 | (run/i `("make" "install"))) 232 | (success)) 233 | 234 | (defun mygerbil () 235 | (with-current-directory ((subpathname (src-root) "fare/gerbil")) 236 | (setf (getenv "GERBIL_BUILD_CORES") 237 | (run-program '("grep" "-c" "^processor.:" "/proc/cpuinfo") :output :line)) 238 | (run/i `("./configure" ("--prefix=" ,(stow-root) "gerbil/gerbil") 239 | ;;"--with-gambit=master" ;; master, v4.9.5 or other branch or tag 240 | "--enable-shared" 241 | "--disable-deprecated" 242 | "--enable-zlib" 243 | "--enable-sqlite")) 244 | (run/i `("make" "clean")) 245 | (run/i `("./build.sh")) 246 | (run/i `("./install.sh")))) 247 | 248 | );exporting-definitions 249 | 250 | (register-commands :fare-scripts/languages) 251 | -------------------------------------------------------------------------------- /viet-practice.lisp: -------------------------------------------------------------------------------- 1 | ;; http://paste.lisp.org/+742R 2 | 3 | (uiop:define-package :fare-scripts/viet-practice 4 | (:use :common-lisp :uiop :fare-utils :optima :optima.ppcre :cl-unicode) 5 | (:export 6 | #:practice)) 7 | (in-package :fare-scripts/viet-practice) 8 | 9 | (defun practice (list) 10 | (loop do (loop with copy = (copy-list list) 11 | until (null copy) 12 | do (let ((entry (elt copy (random (length copy))))) 13 | (setf copy (remove entry copy)) 14 | (format t "~a~%" (car entry)) 15 | (if (equal (read-line) (cdr entry)) 16 | (format t "correct~%") 17 | (progn (format t "try again~%") 18 | (if (equal (read-line) (cdr entry)) 19 | (format t "correct~%") 20 | (format t "no, the answer is: ~a~%" (cdr entry))))))))) 21 | 22 | (defparameter *weather* 23 | '(("weather" . "thời tiết") 24 | ("warm or hot, as in the weather is hot~%or the coffee is hot" 25 | . "nóng") 26 | ("warm(ish), as in the weather is pleasantly warm" 27 | . "ấm") 28 | ("(very) cold" 29 | . "lạnh") 30 | ("cool, as in the weather is pleasantly cool" 31 | . "mát") 32 | ("humid, damp, of weather for instance" 33 | . "ẩm") 34 | ("dry, of weather, but also dried fruit" 35 | . "khô") 36 | ("sunny" 37 | . "nắng") 38 | ("rainy" 39 | . "mưa") 40 | ("windy" 41 | . "gió"))) 42 | 43 | (defparameter *colors* 44 | '(("red" . "đỏ") 45 | ("black" . "đen") 46 | ("green" . "xanh lá cây") 47 | ("blue (as the sky)" . "xanh da trời") 48 | ("blue (as the sea)" . "xanh nước biển") 49 | ("blue (light blue or green as banana leaves)" . "xanh lơ") 50 | ("white" . "trắng") 51 | ("yellow" . "vàng") 52 | ("orange" . "da cam") 53 | ("purple" . "tím") 54 | ("brown" . "nâu") 55 | ("gray (mixture between black and white)" . "ghi") 56 | ("gray (usual)" . "xám") 57 | ("indigo blue" . "lam") 58 | ("pink" . "hồng"))) 59 | 60 | 61 | (defparameter *sizes* 62 | '(("tall (for people and things), large (for things))" . "lớn") 63 | ("small (for people or things)" . "bé") 64 | ("short (for people or things)" . "thấp") 65 | ("short (for things (like pants))" . "ngắn") 66 | ("big (for things) fat (for people)" . "to") 67 | ("small (for people or things), thin (for people)" . "nhỏ") 68 | ("wide (for things)" . "rộng") 69 | ("narrow (for things)" . "hẹp") 70 | ("fat (of a person or an animal)" . "béo") 71 | ("thin (of a person or an animal)" . "gầy") 72 | ("thick" . "dày") 73 | ("long" . "dài") 74 | )) 75 | 76 | (defparameter *tastes* 77 | '(("sour" . "chua") 78 | ("sweet" . "ngọt") 79 | ("hot, peppery" . "cay") 80 | ("bitter" . "đắng") 81 | ("salty" . "mặn") 82 | ("tasty, good" . "ngon"))) 83 | 84 | (defparameter *adjectives* 85 | (append *weather* *colors* *sizes* *tastes*)) 86 | 87 | (defparameter *relative-positions* 88 | '(("next to" . "ở cạnh") 89 | ("between" . "ở giữa") 90 | ("opposite" . "đối diện với") 91 | ("at the street corner" . "ở góc phố") 92 | ("in front of" . "ở phía trước") 93 | ("behind" . "ở phía sau"))) 94 | 95 | (defparameter *directions* 96 | '(("go straight ahead" . "đi thẳng") 97 | ("turn right" . "rẽ phải") 98 | ("turn left" . "rẽ trái") 99 | ("four-way street" . "ngã tư") 100 | ("three-way street" . "ngã ba") 101 | ("go through, traverse" . "đi qua") 102 | ("to the right of" . "bên phải") 103 | ("to the left of" . "bên trái") 104 | ("return, turn back, make a u-turn" . "quay lại") 105 | ("near" . "ở gần") 106 | ("to be lost" . "bị lạc") 107 | ("continue" . "tiếp tục") 108 | ("go up, step up" . "lên") 109 | ("go down, step down" . "xuống") 110 | )) 111 | 112 | (defparameter *places* 113 | '(("bank" . "ngân hàng") 114 | ("restaurant" . "nhà hàng") 115 | ("hotel" . "khách sạn") 116 | ("public park, public garden" . "công viên") 117 | ("supermarket" . "siêu thị") 118 | ("market" . "chợ") 119 | ("church, cathedral" . "nhà thờ") 120 | ("club" . "câu lạc bộ") 121 | ("foreign language center" . "trung tâm ngoại ngữ") 122 | ("university" . "đại học") 123 | ("elementary school (6-11)" . "trường tiểu học") 124 | ("middle school (12-15)" . "trường trung học cơ sở") 125 | ("high school" . "trung học") 126 | ("filling station" . "trạm xăng") 127 | ("movie theater" . "rạp chiếu phim") 128 | ("museum, maosoleum" . "bảo tàng") 129 | ("bus stop" . "bến xe bus") 130 | ("tram stop" . "bến tàu điện") 131 | ("library" . "thư viện") 132 | ("theater" . "nhà hát") 133 | ("hospital" . "bệnh viện") 134 | ("book store" . "hiệu sách") 135 | ("railway station" . "ga") 136 | ("airport" . "sân bay"))) 137 | 138 | (defparameter *part-of-day* 139 | '(("morning" . "buổi sáng") 140 | ("noon" . "buổi trưa") 141 | ("afternoon" . "buổi chiều") 142 | ("evening, night" . "buổi tối"))) 143 | 144 | (defparameter *time-date* 145 | '(("year" . "năm") 146 | ("month" . "tháng") 147 | ("week" . "tuần") 148 | ("day" . "ngày") 149 | ("hour" . "giờ") 150 | ("minute" . "phút") 151 | ("second" . "giây") 152 | ("tomorrow" . "ngày mai") 153 | ("yesterday" . "hôm qua") 154 | ("the day after tomorrow" . "ngày kia") 155 | ("the day before yesterday" . "hôm kia") 156 | ("the day after the day after tomorrow" . "ngày kìa") 157 | ("the day before the day before yesterday" . "hôm kìa") 158 | ("last, previous (week, month)" . "trước") 159 | ("next (week, month) (1)" . "tới") 160 | ("next (week, month) (2)" . "sau") 161 | ("the preceding day" . "hôm trưóc") 162 | ("the following day" . "hôm tới") 163 | ("today" . "hôm nay") 164 | ("minus (x o'clock minus y minutes" . "kém") 165 | ("this week" . "tuần náy") 166 | ("this month" . "tháng này") 167 | ("this year" . "năm nay") 168 | ("never (1)" . "không bao giờ") 169 | ("never (2)" . "chưa bao giờ") 170 | )) 171 | 172 | (defparameter *vehicles* 173 | '(("car, automobile" . "xe ô tô") 174 | ("bus" . "xe buýt") 175 | ("train xe ..." . "xe lửa") 176 | ("train tàu ..." . "tàu hoả") 177 | ("TGV" . "tàu cau tốc") 178 | ("tram" . "tàu điện") 179 | ("subway, metro" . "tàu diện ngầm") 180 | ("bicycle" . "xe đạp") 181 | ("cycle tri-shaw" . "xích lô") 182 | ("motorbike" . "xe máy") 183 | ("scooter taxi" . "xe ôm") 184 | ("airplane" . "máy bay") 185 | ("truck, lorry" . "xe tải") 186 | ("(small) boat" . "thuyền") 187 | ("steam boat, steamer, (large) boat" . "tàu thủy"))) 188 | 189 | (defparameter *countries* 190 | '(("Algeria" . "An giê ri") 191 | ("Morocco" . "Ma Rốc") 192 | ("Sweden" . "Thụy Điển" ) 193 | ("Denmark" . "Đan Mạch") 194 | ("Finland" . "Phần Lan") 195 | ("Norway" . "Na Uy") 196 | ("Iceland" . "Ai-xơ-len") 197 | ("England, United Kingdom" . "Anh") 198 | ("Belgium" . "Bỉ") 199 | ("Netherlands" . "Hà Lan") 200 | ;; ("Luxemburg" . "") 201 | ("France" . "Pháp") 202 | ("Germany" . "Đức") 203 | ("Spain" . "Tây Ban Nha") 204 | ("Portugal" . "Bồ Đào Nha") 205 | ("Italy" . "Ý") 206 | ("Greece" . "Hy Lạp") 207 | ("Austria" . "Áo") 208 | ("Switzerland" . "Thụy Sĩ") 209 | ("Poland" . "Ba Lan") 210 | ("Turkey" . "Thổ Nhĩ Kỳ") 211 | ;; ("Tcheque Republic" . "") 212 | ;; ("Slovakia" . "") 213 | ;; ("Hungary" . "") 214 | ("USA" . "Mỹ") 215 | ("Canada" . "Canada") 216 | ;; ("Brezil" . "") 217 | ("Vietnam" . "Việt Nam") 218 | ("China" . "Trung Quốc") 219 | ("Cambodia" . "Campuchia") 220 | ("Thailand" . "Thái Lan") 221 | ("Laos" . "Lào") 222 | ("Australia" . "Úc") 223 | ("Japan" . "Nhật Bản") 224 | ("India" . "Ấn Đô") 225 | ;; ("New Zealand" . "") 226 | )) 227 | 228 | (defparameter *clothes* 229 | '(("throw on a scarf" . "quàng khăn") 230 | ("wear eye glasses" . "đeo kính") 231 | ("wear a bracelet, necklace" . "đeo vòng") 232 | ("wear a tie" . "thắt cà vạt") 233 | ("wear a skirt" . "mặc váy") 234 | ("wear trousers" . "mặc quần") 235 | ("wear a blouse, shirt, jacket" . "mặc áo") 236 | ("wear a hat" . "đội nón") 237 | ("wear a cap" . "đội mũ") 238 | ("wear shoes, boots" . "đi giày") 239 | ("wear sandals slippers" . "đi dép") 240 | ("wear tongs" . "đi tông") 241 | ("be barefoot" . "đi chân đất") 242 | )) 243 | 244 | (defparameter *set-phrases* 245 | '(("not only ... but ..." . "không chỉ ... mà ...") 246 | ("because ... it is the case that ..." . "do ... nên ...") 247 | ("if ... then ..." . "nếu ... thì ...") 248 | ("help me by ..." . "để tôi ... giúp em") 249 | ("would you like me to help you ..." . "em ... giúp tôi được không") 250 | ("from ... to ..." . "từ ... đến ...") 251 | ("do something together with someone else" . "cùng với") 252 | ("do something together" . "cùng nhau") 253 | ;; ("" . "") 254 | )) 255 | 256 | (defparameter *animals* 257 | '(("pig, pork" . "heo") 258 | ("cow, beef" . "bò") 259 | ("chicken" . "gà") 260 | ("fish" . "cá") 261 | ("salmon" . "cá hồi") 262 | ("shrimp" . "tôm") 263 | ("prawn" . "tôm he") 264 | ("lobster" . "tôm hùm") 265 | ("crab" . "cua") 266 | ("duck" . "vịt") 267 | ("wild goose" . "ngan") 268 | )) 269 | 270 | (defparameter *food* 271 | '(("vegetables, greens" . "rau") 272 | ("meat" . "thịt") 273 | ("soup" . "canh") 274 | ("salad" . "xà lách") 275 | ("cooked rice, boiled rice" . "cơm") 276 | ("rice porridge" . "cháo") 277 | ("bindweed, spinach" . "rau muống") 278 | ("sweet potato, taro" . "khoai") 279 | ("potato" . "khoai tây") 280 | ("imperial spring rolls" . "nem đế") 281 | ("Saigon spring roll" . "chả giò") 282 | ("raw Saigon spring roll" . "chả giò sống") 283 | ("raw spring roll" . "gỏi sống") 284 | ("liver" . "gan") 285 | ("side, ribs, spare ribs" . "xương sườn") 286 | ;; ("hot pot" . "") 287 | ;; ("morning glory" . "") 288 | ("bread" . "bánh mì") 289 | ("cheese" . "phó mát") 290 | ("butter" . "bơ") 291 | ("milk" . "sữa") 292 | ("sugar" . "đường") 293 | ("fruit" . "quả") 294 | ("dragon fruit" . "thanh long") 295 | ("mango (fruit)" . "xoài") 296 | ("lychee (fruit)" . "vải") 297 | ("longan (fruit)" . "long nhãn") 298 | ("orange (fruit)" . "cam") 299 | ("lemon (fruit)" . "chanh") 300 | ("apple (fruit)" . "bôm") 301 | ("banana (fruit)" . "chuối") 302 | ("coconut" . "dừa") 303 | ("orange juice" . "nước cam") 304 | ("coconut milk" . "nước dừa") 305 | ;; ("apple juice" . "") 306 | ("lemonade" . "nưóc chanh") 307 | ("coffee" . "cà phê") 308 | ("tea" . "trà") 309 | ("wine" . "vang") 310 | ("beer" . "bia") 311 | ("dish, course" . "món") 312 | ("strir fry (with onions and vegetables)" . "xào") 313 | ("special dish" . "đặc sản") 314 | ("menu" . "thực đơn") 315 | ("check please" . "") 316 | ("ice" . "đá") 317 | ("hungry" . "đói") 318 | ("thirsty" . "khát") 319 | )) 320 | 321 | (defparameter *misc* 322 | '(("if" . "nếu") 323 | ("then" . "thì") 324 | ("only" . "chỉ") 325 | ("but" . "nhưng") 326 | ("like" . "như") 327 | ("usually, often, always, several" . "những") 328 | ("why" . "tại sao") 329 | ("because" . "bởi vì") 330 | ("can, be able to, possible" . "có thể") 331 | ("all, the whole of" . "tất cả") 332 | ("choose" . "chọn") 333 | ("friendly" . "thân thiện") 334 | ("never (1)" . "không bao giờ") 335 | ("never (2)" . "chưa bao giờ") 336 | ("how?" . "thế nào") 337 | ("the end of something" . "cuối") 338 | ("ever since, since then" . "từ đấy") 339 | ("also, too, as well" . "cũng") 340 | ("so that, in order to" . "để") 341 | ("to remember" . "nhớ") 342 | ("to forget" . "quên") 343 | ("thing, object, article" . "đồ") 344 | )) 345 | 346 | (defparameter *seasons* 347 | '(("winter" . "mùa đong") 348 | ("summer" . "mùa hè") 349 | ("spring" . "mùa xuân") 350 | ("fall, autumn" . "mùa thu") 351 | ("rainy season" . "mùa mưa") 352 | ("dry season" . "mùa khô") 353 | ("sunny season" . "mùa nắng") 354 | )) 355 | 356 | (defparameter *rooms* 357 | '(("dining room" . "phòng ăn") 358 | ("public dining room" . "phòng ăn công cộng") 359 | ("ticket office " . "phòng bán vé") 360 | ("kitchen" . "phòng bếp") 361 | ("press room" . "phòng báo chí") 362 | ("bedroom" . "phòng ngủ") 363 | ("guest room" . "phòng khác") 364 | ("bathroom" . "phòng tắm") 365 | ("library" . "phòng đọc sách") 366 | ("workshop" . "xưởng") 367 | ("engine room" . "phòng máy") 368 | ("photo studio" . "phòng ảnh") 369 | )) 370 | 371 | (defparameter *math* 372 | '(("percent" . "phần trăm") 373 | ("plus, add" . "cộng") 374 | ("add, join, augment, increase" . "thêm") 375 | ("plus sign" . "dấu cộng") 376 | ;; ("subtract" . "") 377 | )) 378 | 379 | (defparameter *body-parts* 380 | '(("foot, leg" . "chân") 381 | )) 382 | 383 | (defparameter *tourism* 384 | '(("photograph" . "ảnh") 385 | ("take a picture" . "chup ảnh") 386 | ("air conditioning" . "máy lạnh") 387 | ("refrigerator" . "tủ lạnh") 388 | ("to pay" . "trả") 389 | ("telephone" . "điện thoại") 390 | ("luxurious" . "sang trọng") 391 | ("bank card; credit card" . "thẻ ngân hàng") 392 | ("cash" . "tiền mặt") 393 | ("withdraw cash" . "rút tiền mặt") 394 | ("(national) police" . "cảnh sát") 395 | ("(municipal) police" . "công an") 396 | ("to visit" . "thăm") 397 | ("cheap" . "rẻ") 398 | ("expensive (1)" . "mắc") 399 | ("expensive (2)" . "đắt đỏ") 400 | )) 401 | -------------------------------------------------------------------------------- /edgar.lisp: -------------------------------------------------------------------------------- 1 | #| 2 | Script to download contents from EDGAR 3 | (e.g. as base data on which to train various machine learning tools) 4 | 5 | (asdf:make :load-quicklisp) (asdf:make :fare-scripts/edgar) (in-package :fare-scripts/edgar) 6 | 7 | In ~/tmp/edgar/ I used: 8 | lftp ftp.sec.gov 9 | cd edgar 10 | mirror daily-index 11 | 12 | 13 | FARE-SCRIPTS/EDGAR> (time (defparameter *d* (multiple-value-list (collect-index-data (get-all-compressed-form-indexes))))) 14 | Evaluation took: 15 | 239.139 seconds of real time 16 | 221.767000 seconds of total run time (194.312000 user, 27.455000 system) 17 | [ Run times consist of 12.609 seconds GC time, and 209.158 seconds non-GC time. ] 18 | 92.74% CPU 19 | 32 lambdas converted 20 | 430,443,370,423 processor cycles 21 | 50,257,987,904 bytes consed 22 | 23 | *D* 24 | FARE-SCRIPTS/EDGAR> (car *d*) 25 | 16021766 26 | FARE-SCRIPTS/EDGAR> (mapcar 'length (cdr *d*)) 27 | (696 612150 563489) ;; distinct type forms, distinct company names, distinct CIKs 28 | |# 29 | 30 | (uiop:define-package #:fare-scripts/edgar 31 | (:documentation "downloading EDGAR from ftp.sec.org") 32 | (:mix :common-lisp :uiop :fare-utils :inferior-shell 33 | :alexandria :split-sequence 34 | :org.mapcar.ftp.client 35 | ;;:bordeaux-threads 36 | ;;:drakma 37 | ;;:cl-date-time-parser :local-time 38 | :optima :optima.ppcre)) 39 | 40 | (defpackage #:fare-scripts/edgar/form-types (:use)) 41 | (defpackage #:fare-scripts/edgar/company-names (:use)) 42 | 43 | (cl:in-package #:fare-scripts/edgar) 44 | 45 | (declaim (optimize (speed 1) (safety 2) (debug 3))) 46 | 47 | (defparameter *edgar-server* "ftp.sec.gov") 48 | (defparameter *edgar-base* (subpathname (user-homedir-pathname) "tmp/")) 49 | 50 | (defmacro with-edgar-ftp ((conn) &body body) 51 | `(with-ftp-connection (,conn :hostname *edgar-server* 52 | :username "anonymous" :password "fare-scripts@" 53 | :passive-ftp-p t) ,@body)) 54 | 55 | (defun get-file (path) 56 | (check-type path string) 57 | (let ((local (ensure-pathname 58 | path :namestring :unix 59 | :want-relative t :want-file t :ensure-absolute t :defaults *edgar-base*))) 60 | (ensure-directories-exist local) 61 | (when (file-exists-p local) 62 | (rename-file-overwriting-target local (add-pathname-suffix local "-OLD"))) 63 | (with-edgar-ftp (c) 64 | (retrieve-file c path local)))) 65 | 66 | (defun compact-date->universal-time (date) 67 | (multiple-value-bind (year md) (floor date 10000) 68 | (multiple-value-bind (month date) (floor md 100) 69 | (assert (<= 1900 year 9999)) 70 | (assert (<= 1 month 12)) 71 | (assert (<= 1 date 31)) 72 | (encode-universal-time 0 0 0 date month year 0)))) 73 | 74 | (defun universal-time->compact-date (universal-time) 75 | (multiple-value-bind (second minute hour date month year) 76 | (decode-universal-time universal-time 0) 77 | (assert (zerop second)) 78 | (assert (zerop minute)) 79 | (assert (zerop hour)) 80 | (assert (<= 1900 year 9999)) 81 | (+ (* 10000 year) (* 100 month) date))) 82 | 83 | (defun normalize-form-type (form-type) 84 | (symbol-name (intern form-type (load-time-value (find-package* '#:fare-scripts/edgar/form-types))))) 85 | 86 | (defun normalize-company-name (name) 87 | (symbol-name (intern name (load-time-value (find-package* '#:fare-scripts/edgar/company-names))))) 88 | 89 | (defun canonicalize-index-fields (index-fields) 90 | (destructuring-bind (&key form-type company-name cik date path) index-fields 91 | (append form-type company-name cik date path))) 92 | 93 | (defun parse-index-line (canonical-index-fields line) 94 | (flet ((get-field () 95 | (string-trim " " (subseq line (pop canonical-index-fields) (pop canonical-index-fields))))) 96 | (let* ((form-type (get-field)) 97 | (company-name (get-field)) 98 | (cik0 (get-field)) 99 | (cik (parse-integer cik0)) 100 | (date0 (get-field)) 101 | (date (unless (emptyp date0) (parse-integer date0))) 102 | (path (get-field))) 103 | (assert (<= (length form-type) 11)) 104 | (assert (<= (length company-name) 61)) 105 | (assert (<= 0 cik 9999999999)) 106 | (when date ;; NB: big Y10K bug if USG still exists by then. 107 | (assert (<= 19800101 date 99991231))) 108 | (ematch path 109 | ((ppcre "^(?:edgar/)?data[0-9]*/([0-9]{1,10}|[.]deleted)/([0-9]{10})-([0-9]{2})-([0-9]{6})[.]txt$" 110 | cik1 id1 year id2) 111 | (let ((yr (parse-integer year)) 112 | (i1 (parse-integer id1)) 113 | (i2 (parse-integer id2)) 114 | (pcik (unless (equal cik1 ".deleted") (parse-integer cik1)))) 115 | #| 116 | ;; This happens more often than not before 2011: 117 | (unless (equal pcik cik) 118 | ;; Sometimes, we have (equal pcik i1). Sometimes, not even. 119 | (warn "path for CIK doesn't match: ~A" line)) 120 | |# 121 | #| 122 | ;; Most of the time, year is either year of filing (same as date of index file) 123 | ;; or year of document (for back-filing). 124 | ;; But sometimes not. 125 | (assert (or (= (mod (- file-year 80) 100) (mod (- yr 80) 100)) 126 | (= yr (mod (floor date 10000) 100)))) 127 | |# 128 | (list form-type company-name cik date pcik i1 yr i2))))))) 129 | 130 | (defun parse-index-stream (input) 131 | (nest 132 | (let ((line-buffer nil))) 133 | (labels ((peek-line () (or line-buffer (setf line-buffer (read-line input nil)))) 134 | (get-line () (if line-buffer 135 | (prog1 line-buffer (setf line-buffer nil)) 136 | (read-line input nil))))) 137 | (let* ((headers 138 | (loop :for line = (string-trim " " (peek-line)) 139 | :until (emptyp line) 140 | :do (get-line) :collect 141 | (match line 142 | ((ppcre "^([^:]+): +(.*)$" key value) (cons key value)) 143 | (_ (error "Bad header: ~A" line))))) 144 | ;;(last-data-received 145 | ;; (parse-date-time (cdr (assoc "Last Data Received" headers :test 'equal)))) 146 | ) 147 | ;; Skip empty lines 148 | (loop :for line = (string-trim " " (peek-line)) 149 | :while (emptyp line) :do (get-line))) 150 | (let* ((index-fields 151 | ;; Skip more headers 152 | ;; TODO: these are only valid for the form*.idx indexes. Support the master*.idx too. 153 | (canonicalize-index-fields 154 | (let ((line (string-trim " " (get-line)))) 155 | (cond 156 | ((equal line "Form Type Company Name CIK") 157 | (assert (equal (get-line) " Date Filed File Name")) 158 | '(:form-type (0 12) 159 | :company-name (12 74) 160 | :cik (74 86) 161 | :date (86 98) 162 | :path (98 nil))) 163 | ((equal line "Form Type Company Name CIK Date Filed File Name") 164 | '(:form-type (0 12) 165 | :company-name (12 74) 166 | :cik (74 86) 167 | :date (86 98) 168 | :path (98 nil))) 169 | (t 170 | (error "Unrecognized index file type: ~A" line)))))) 171 | (line (get-line))) 172 | (assert (every (lambda (c) (eql c #\-)) line)) 173 | (assert (<= 120 (length line)))) 174 | 175 | #|(multiple-value-bind (ignore-seconds ignore-minutes ignore-hours 176 | file-date file-month file-year 177 | ignore-weekday ignore-dst ignore-tz) 178 | (decode-universal-time last-data-received 0) 179 | (declare (ignore ignore-seconds ignore-minutes ignore-hours 180 | file-date file-month file-year 181 | ignore-weekday ignore-dst ignore-tz)))|# 182 | (let ((entries 183 | (loop :for line = (get-line) :while line :collect (parse-index-line index-fields line))))) 184 | (list headers entries))) 185 | 186 | (defun parse-index-file (path &key (compressed (equal (pathname-type path) "gz"))) 187 | (let ((path (ensure-pathname 188 | path :namestring :unix :want-file t 189 | :ensure-absolute t :defaults *edgar-base*))) 190 | (if compressed 191 | (run-program '("gzip" "-d") 192 | :input path :output #'parse-index-stream :error-output nil 193 | :external-format :latin1) 194 | (with-input-file (input path :external-format :latin1) 195 | (parse-index-stream input))))) 196 | 197 | (defun collect-index-data (index-files) 198 | (let ((count 0) 199 | (form-types (make-hash-table :test 'equal)) 200 | (company-names (make-hash-table :test 'equal)) 201 | (ciks (make-hash-table :test 'equal))) 202 | (loop :for file :in index-files 203 | :for (() entries) = (parse-index-file file) :do 204 | (loop :for (form-type company-name cik . ()) :in entries :do 205 | (incf count) 206 | (incf (gethash form-type form-types 0)) 207 | (incf (gethash company-name company-names 0)) 208 | (incf (gethash cik ciks 0)))) 209 | (values 210 | count 211 | (sort (hash-table-alist form-types) #'< :key #'cdr) 212 | (sort (hash-table-alist company-names) #'< :key #'cdr) 213 | (sort (hash-table-alist ciks) #'< :key #'cdr)))) 214 | 215 | (defvar *all-entries* nil) 216 | (defvar *by-form-type* nil) 217 | (defvar *by-company-name* nil) 218 | (defvar *by-cik* nil) 219 | 220 | (defun read-indexes (index-files &key (filter t)) 221 | (setf *all-entries* '() 222 | *by-form-type* (make-hash-table :test 'equal) 223 | *by-company-name* (make-hash-table :test 'equal) 224 | *by-cik* (make-hash-table :test 'equal)) 225 | (loop :for file :in index-files :for i :from 1 226 | :for (() entries) = (parse-index-file file) :do 227 | (format t "~&Reading index file #~D ~A~%" i file) 228 | (when (= 1 (mod i 100)) 229 | (sb-ext:gc :full t) 230 | (room)) 231 | (loop :for entry :in entries 232 | :for (form-type company-name cik . ()) = entry 233 | :when (call-function filter entry) :do 234 | (push entry *all-entries*) 235 | (push entry (gethash form-type *by-form-type* nil)) 236 | (push entry (gethash company-name *by-company-name* nil)) 237 | (push entry (gethash cik *by-cik* nil))))) 238 | 239 | (defun get-all-compressed-form-indexes () 240 | (directory (subpathname *edgar-base* #p"edgar/daily-index/????/QTR?/form*.idx.gz"))) 241 | 242 | (defun initialize-indexes (&key (filter t)) 243 | (multiple-value-setq (*all-entries* *by-form-type* *by-company-name* *by-cik*) 244 | (read-indexes (get-all-compressed-form-indexes) :filter filter)) 245 | (values)) 246 | 247 | (defun summarize-index (table &key (comparator #'string<) (summarizer #'length)) 248 | (sort (loop :for key :being :the :hash-keys :of table :using (:hash-value value) 249 | :collect (funcall summarizer value)) comparator)) 250 | 251 | (defun summarize-indexes () 252 | (values 253 | (length *all-entries*) 254 | (summarize-index *by-form-type*) 255 | (summarize-index *by-company-name*) 256 | (summarize-index *by-cik* :comparator #'<))) 257 | 258 | (defun entry-directory (entry) 259 | (nest 260 | (destructuring-bind (form-type company-name cik date pcik id1 year id2) entry 261 | (declare (ignore form-type company-name cik date))) 262 | (format nil "edgar/data/~D/~2,'0D/~10,'0D~2,'0D~6,'0D/" 263 | pcik year id1 year id2))) 264 | 265 | (defun entry-textfile (entry) 266 | (nest 267 | (destructuring-bind (form-type company-name cik date pcik id1 year id2) entry 268 | (declare (ignore form-type company-name cik date pcik))) 269 | (format nil "~10,'0D-~2,'0D-~6,'0D.txt" id1 year id2))) 270 | 271 | (defun lftp-files (directory) 272 | (check-type directory string) 273 | (let ((lines (run/lines 274 | `(lftp ("ftp://" ,*edgar-server*) "-e" 275 | ("cd /" ,directory " && ls ; quit")) 276 | :on-error nil))) 277 | ;;(ematch (first lines) 278 | ;;((ppcre "^cd ok, cwd=/(.*)$" cwd) (assert (equal cwd origin)) (pop lines))) 279 | (loop :for l :in lines 280 | :for skip = (nth-value 1 (split-sequence:split-sequence #\space l :count 8 :remove-empty-subseqs t)) 281 | :collect (subseq l skip)))) 282 | 283 | (defun lftp-get (files) 284 | (run/i `(lftp ("ftp://" ,*edgar-server* "/") "-e" 285 | ("get -c " 286 | ,@(mapcar (lambda (file) (list " " file " -o " file ".tmp")) files) 287 | " ; quit")))) 288 | 289 | (defun in-edgar-base (file &key type) 290 | (subpathname *edgar-base* file :type type)) 291 | 292 | (defun retrieve-entry-files (entry &key (mode :lftp)) 293 | (block nil 294 | (let* ((directory (entry-directory entry)) 295 | (local-dir (in-edgar-base directory :type :directory))) 296 | (when (directory-exists-p local-dir) 297 | (if-let ((local-files (directory-files local-dir))) 298 | (when (loop :for file :in local-files :always 299 | (and (equal (pathname-type file) "xz") 300 | (not (string-suffix-p (pathname-name file) ".tmp")))) 301 | ;; All files already downloaded! 302 | (return (values (mapcar 'pathname-name local-files) nil))))) 303 | (labels 304 | ((in-local-dir (x &key type) 305 | (in-edgar-base (strcat directory x) :type type)) 306 | (body (list-files get-files) 307 | (let* ((files (funcall list-files directory)) 308 | (interesting-files 309 | (remove-if 310 | (let ((textfile (entry-textfile entry))) 311 | (lambda (x) (or (equal x textfile) (string-suffix-p x ".zip")))) 312 | files)) 313 | (new-files 314 | (remove-if 315 | (lambda (x) (file-exists-p (in-local-dir x :type "xz"))) 316 | interesting-files))) 317 | (when new-files 318 | (ignore-errors 319 | (ensure-directories-exist (subpathname *edgar-base* directory)) 320 | (funcall get-files (mapcar (lambda (x) (strcat directory x)) new-files)) 321 | (run/i `("sh" "-c" 322 | ("(xz " 323 | ,@(mapcan 324 | (lambda (x) (list (in-local-dir x :type "tmp") " ")) 325 | new-files) 326 | ,@(mapcan 327 | (lambda (x) (list 328 | " ; mv -f " 329 | (in-local-dir (strcat x ".tmp.xz")) 330 | " " 331 | (in-local-dir x :type "xz"))) 332 | new-files) 333 | ")&"))) 334 | (values interesting-files new-files)))))) 335 | (ecase mode 336 | (:cl-ftp 337 | (with-edgar-ftp (c) 338 | (body (lambda (dir) 339 | (mapcar 'file-namestring (retrieve-filename-list c dir))) 340 | (lambda (files) 341 | (dolist (file files) 342 | (retrieve-file c file (in-edgar-base file :type "tmp"))))))) 343 | (:lftp 344 | (with-current-directory (*edgar-base*) 345 | (body #'lftp-files #'lftp-get)))))))) 346 | 347 | (defparameter *S&P500-2016* 348 | (alist-hash-table 349 | '(("MMM" . 66740) ("ABT" . 1800) ("ABBV" . 1551152) ("ACN" . 1467373) 350 | ("ATVI" . 718877) ("ADBE" . 796343) ("ADT" . 1546640) ("AAP" . 1158449) 351 | ("AES" . 874761) ("AET" . 1122304) ("AFL" . 4977) ("AMG" . 1004434) 352 | ("A" . 1090872) ("GAS" . 1004155) ("APD" . 2969) ("ARG" . 804212) 353 | ("AKAM" . 1086222) ("AA" . 4281) ("AGN" . 1578845) ("ALXN" . 899866) 354 | ("ALLE" . 1579241) ("ADS" . 1101215) ("ALL" . 899051) ("GOOGL" . 1288776) 355 | ("GOOG" . 1288776) ("MO" . 764180) ("AMZN" . 1018724) ("AEE" . 1002910) 356 | ("AAL" . 6201) ("AEP" . 4904) ("AXP" . 4962) ("AIG" . 5272) ("AMT" . 1053507) 357 | ("AWK" . 1410636) ("AMP" . 820027) ("ABC" . 1140859) ("AME" . 1037868) 358 | ("AMGN" . 318154) ("APH" . 820313) ("APC" . 773910) ("ADI" . 6281) 359 | ("AON" . 315293) ("APA" . 6769) ("AIV" . 922864) ("AAPL" . 320193) 360 | ("AMAT" . 6951) ("ADM" . 7084) ("AIZ" . 1267238) ("T" . 732717) 361 | ("ADSK" . 769397) ("ADP" . 8670) ("AN" . 350698) ("AZO" . 866787) 362 | ("AVGO" . 1649338) ("AVB" . 915912) ("AVY" . 8818) ("BHI" . 808362) 363 | ("BLL" . 9389) ("BAC" . 70858) ("BK" . 1390777) ("BCR" . 9892) 364 | ("BXLT" . 1620546) ("BAX" . 10456) ("BBT" . 92230) ("BDX" . 10795) 365 | ("BBBY" . 886158) ("BRK-B" . 1067983) ("BBY" . 764478) ("BIIB" . 875045) 366 | ("BLK" . 1364742) ("HRB" . 12659) ("BA" . 12927) ("BWA" . 908255) 367 | ("BXP" . 1037540) ("BSX" . 885725) ("BMY" . 14272) ("BF-B" . 14693) 368 | ("CHRW" . 1043277) ("CA" . 356028) ("CVC" . 1053112) ("COG" . 858470) 369 | ("CPB" . 16732) ("COF" . 927628) ("CAH" . 721371) ("HSIC" . 1000228) 370 | ("KMX" . 1170010) ("CCL" . 815097) ("CAT" . 18230) ("CBG" . 1138118) 371 | ("CBS" . 813828) ("CELG" . 816284) ("CNC" . 1071739) ("CNP" . 1130310) 372 | ("CTL" . 18926) ("CERN" . 804753) ("CF" . 1324404) ("SCHW" . 316709) 373 | ("CHK" . 895126) ("CVX" . 93410) ("CMG" . 1058090) ("CB" . 896159) 374 | ("CHD" . 313927) ("CI" . 701221) ("XEC" . 1168054) ("CINF" . 20286) 375 | ("CTAS" . 723254) ("CSCO" . 858877) ("C" . 831001) ("CTXS" . 877890) 376 | ("CLX" . 21076) ("CME" . 1156375) ("CMS" . 811156) ("COH" . 1116132) 377 | ("KO" . 21344) ("CCE" . 1491675) ("CTSH" . 1058290) ("CL" . 21665) 378 | ("CPGX" . 1629995) ("CMCSA" . 1166691) ("CMA" . 28412) ("CAG" . 23217) 379 | ("CXO" . 1358071) ("COP" . 1163165) ("ED" . 1047862) ("STZ" . 16918) 380 | ("GLW" . 24741) ("COST" . 909832) ("CCI" . 1051470) ("CSRA" . 1646383) 381 | ("CSX" . 277948) ("CMI" . 26172) ("CVS" . 64803) ("DHI" . 882184) 382 | ("DHR" . 313616) ("DRI" . 940944) ("DVA" . 927066) ("DE" . 315189) 383 | ("DLPH" . 1521332) ("DAL" . 27904) ("XRAY" . 818479) ("DVN" . 1090012) 384 | ("DO" . 949039) ("DFS" . 1393612) ("DISCA" . 1437107) ("DISCK" . 1437107) 385 | ("DG" . 29534) ("DLTR" . 935703) ("D" . 715957) ("DOV" . 29905) 386 | ("DOW" . 29915) ("DPS" . 1418135) ("DTE" . 936340) ("DD" . 30554) 387 | ("DUK" . 1326160) ("DNB" . 1115222) ("ETFC" . 1015780) ("EMN" . 915389) 388 | ("ETN" . 1551182) ("EBAY" . 1065088) ("ECL" . 31462) ("EIX" . 827052) 389 | ("EW" . 1099800) ("EA" . 712515) ("EMC" . 790070) ("EMR" . 32604) 390 | ("ENDP" . 1593034) ("ETR" . 65984) ("EOG" . 821189) ("EQT" . 33213) 391 | ("EFX" . 33185) ("EQIX" . 1101239) ("EQR" . 906107) ("ESS" . 920522) 392 | ("EL" . 1001250) ("ES" . 72741) ("EXC" . 1109357) ("EXPE" . 1324424) 393 | ("EXPD" . 746515) ("ESRX" . 1532063) ("EXR" . 1289490) ("XOM" . 34088) 394 | ("FFIV" . 1048695) ("FB" . 1326801) ("FAST" . 815556) ("FRT" . 34903) 395 | ("FDX" . 1048911) ("FIS" . 1136893) ("FITB" . 35527) ("FSLR" . 1274494) 396 | ("FE" . 1031296) ("FISV" . 798354) ("FLIR" . 354908) ("FLS" . 30625) 397 | ("FLR" . 1124198) ("FMC" . 37785) ("FTI" . 1135152) ("FL" . 850209) 398 | ("F" . 37996) ("BEN" . 38777) ("FCX" . 831259) ("FTR" . 20520) 399 | ("GME" . 1326380) ("GPS" . 39911) ("GRMN" . 1121788) ("GD" . 40533) 400 | ("GE" . 40545) ("GGP" . 1496048) ("GIS" . 40704) ("GM" . 1467858) 401 | ("GPC" . 40987) ("GILD" . 882095) ("GS" . 886982) ("GT" . 42582) 402 | ("GWW" . 277135) ("HAL" . 45012) ("HBI" . 1359841) ("HOG" . 793952) 403 | ("HAR" . 800459) ("HRS" . 202058) ("HIG" . 874766) ("HAS" . 46080) 404 | ("HCA" . 860730) ("HCP" . 765880) ("HP" . 46765) ("HES" . 4447) 405 | ("HPE" . 1645590) ("HOLX" . 859737) ("HD" . 354950) ("HON" . 773840) 406 | ("HRL" . 48465) ("HST" . 1070750) ("HPQ" . 47217) ("HUM" . 49071) 407 | ("HBAN" . 49196) ("ITW" . 49826) ("ILMN" . 1110803) ("IR" . 1466258) 408 | ("INTC" . 50863) ("ICE" . 1571949) ("IBM" . 51143) ("IP" . 51434) 409 | ("IPG" . 51644) ("IFF" . 51253) ("INTU" . 896878) ("ISRG" . 1035267) 410 | ("IVZ" . 914208) ("IRM" . 1020569) ("JEC" . 52988) ("JBHT" . 728535) 411 | ("JNJ" . 200406) ("JCI" . 53669) ("JPM" . 19617) ("JNPR" . 1043604) 412 | ("KSU" . 54480) ("K" . 55067) ("KEY" . 91576) ("KMB" . 55785) ("KIM" . 879101) 413 | ("KMI" . 1506307) ("KLAC" . 319201) ("KSS" . 885639) ("KHC" . 1637459) 414 | ("KR" . 56873) ("LB" . 701985) ("LLL" . 1056239) ("LH" . 920148) 415 | ("LRCX" . 707549) ("LM" . 704051) ("LEG" . 58492) ("LEN" . 920760) 416 | ("LVLT" . 794323) ("LUK" . 96223) ("LLY" . 59478) ("LNC" . 59558) 417 | ("LLTC" . 791907) ("LMT" . 936468) ("L" . 60086) ("LOW" . 60667) 418 | ("LYB" . 1489393) ("MTB" . 36270) ("MAC" . 912242) ("M" . 794367) 419 | ("MNK" . 1567892) ("MRO" . 101778) ("MPC" . 1510295) ("MAR" . 1048286) 420 | ("MMC" . 62709) ("MLM" . 916076) ("MAS" . 62996) ("MA" . 1141391) 421 | ("MAT" . 63276) ("MKC" . 63754) ("MCD" . 63908) ("MHFI" . 64040) 422 | ("MCK" . 927653) ("MJN" . 1452575) ("WRK" . 1636023) ("MDT" . 1613103) 423 | ("MRK" . 310158) ("MET" . 1099219) ("KORS" . 1530721) ("MCHP" . 827054) 424 | ("MU" . 723125) ("MSFT" . 789019) ("MHK" . 851968) ("TAP" . 24545) 425 | ("MDLZ" . 1103982) ("MON" . 1110783) ("MNST" . 865752) ("MCO" . 1059556) 426 | ("MS" . 895421) ("MOS" . 1285785) ("MSI" . 68505) ("MUR" . 717423) 427 | ("MYL" . 1623613) ("NDAQ" . 1120193) ("NOV" . 1021860) ("NAVI" . 1593538) 428 | ("NTAP" . 1002047) ("NFLX" . 1065280) ("NWL" . 814453) ("NFX" . 912750) 429 | ("NEM" . 1164727) ("NWSA" . 1564708) ("NWS" . 1564708) ("NEE" . 753308) 430 | ("NLSN" . 1492633) ("NKE" . 320187) ("NI" . 1111711) ("NBL" . 72207) 431 | ("JWN" . 72333) ("NSC" . 702165) ("NTRS" . 73124) ("NOC" . 1133421) 432 | ("NRG" . 1013871) ("NUE" . 73309) ("NVDA" . 1045810) ("ORLY" . 898173) 433 | ("OXY" . 797468) ("OMC" . 29989) ("OKE" . 1039684) ("ORCL" . 1341439) 434 | ("OI" . 812074) ("PCAR" . 75362) ("PH" . 76334) ("PDCO" . 891024) 435 | ("PAYX" . 723531) ("PYPL" . 1633917) ("PNR" . 77360) ("PBCT" . 1378946) 436 | ("PEP" . 77476) ("PKI" . 31791) ("PRGO" . 1585364) ("PFE" . 78003) 437 | ("PCG" . 1004980) ("PM" . 1413329) ("PSX" . 1534701) ("PNW" . 764622) 438 | ("PXD" . 1038357) ("PBI" . 78814) ("PNC" . 713676) ("RL" . 1037038) 439 | ("PPG" . 79879) ("PPL" . 922224) ("PX" . 884905) ("CFG" . 759944) 440 | ("PCLN" . 1075531) ("PFG" . 1126328) ("PG" . 80424) ("PGR" . 80661) 441 | ("PLD" . 1045609) ("PRU" . 1137774) ("PEG" . 788784) ("PSA" . 1393311) 442 | ("PHM" . 822416) ("PVH" . 78239) ("QRVO" . 1604778) ("PWR" . 1050915) 443 | ("QCOM" . 804328) ("DGX" . 1022079) ("RRC" . 315852) ("RTN" . 1047122) 444 | ("O" . 726728) ("RHT" . 1087423) ("REGN" . 872589) ("RF" . 1281761) 445 | ("RSG" . 1060391) ("RAI" . 1275283) ("RHI" . 315213) ("ROK" . 1024478) 446 | ("COL" . 1137411) ("ROP" . 882835) ("ROST" . 745732) ("RCL" . 884887) 447 | ("R" . 85961) ("CRM" . 1108524) ("SNDK" . 1000180) ("SCG" . 754737) 448 | ("SLB" . 87347) ("SNI" . 1430602) ("STX" . 1137789) ("SEE" . 1012100) 449 | ("SRE" . 1032208) ("SHW" . 89800) ("SIG" . 832988) ("SPG" . 1063761) 450 | ("SWKS" . 4127) ("SLG" . 1040971) ("SJM" . 91419) ("SNA" . 91440) 451 | ("SO" . 92122) ("LUV" . 92380) ("SWN" . 7332) ("SE" . 1373835) 452 | ("STJ" . 203077) ("SWK" . 93556) ("SPLS" . 791519) ("SBUX" . 829224) 453 | ("HOT" . 316206) ("STT" . 93751) ("SRCL" . 861878) ("SYK" . 310764) 454 | ("STI" . 750556) ("SYMC" . 849399) ("SYF" . 1601712) ("SYY" . 96021) 455 | ("TROW" . 1113169) ("TGT" . 27419) ("TEL" . 1385157) ("TE" . 350563) 456 | ("TGNA" . 39899) ("TDC" . 816761) ("TSO" . 50104) ("TXN" . 97476) 457 | ("TXT" . 217346) ("HSY" . 47111) ("TRV" . 86312) ("TMO" . 97745) 458 | ("TIF" . 98246) ("TWX" . 1105705) ("TWC" . 1377013) ("TJX" . 109198) 459 | ("TMK" . 320335) ("TSS" . 721683) ("TSCO" . 916365) ("RIG" . 1451505) 460 | ("TRIP" . 1526520) ("FOXA" . 1308161) ("FOX" . 1308161) ("TSN" . 100493) 461 | ("TYC" . 833444) ("UDR" . 74208) ("ULTA" . 1403568) ("USB" . 36104) 462 | ("UA" . 1336917) ("UNP" . 100885) ("UAL" . 100517) ("UNH" . 731766) 463 | ("UPS" . 1090727) ("URI" . 1067701) ("UTX" . 101829) ("UHS" . 352915) 464 | ("UNM" . 5513) ("URBN" . 912615) ("VFC" . 103379) ("VLO" . 1035002) 465 | ("VAR" . 203527) ("VTR" . 740260) ("VRSN" . 1014473) ("VRSK" . 1442145) 466 | ("VZ" . 732712) ("VRTX" . 875320) ("VIAB" . 1339947) ("V" . 1403161) 467 | ("VNO" . 899689) ("VMC" . 1396009) ("WMT" . 104169) ("WBA" . 1618921) 468 | ("DIS" . 1001039) ("WM" . 823768) ("WAT" . 1000697) ("ANTM" . 1156039) 469 | ("WFC" . 72971) ("HCN" . 766704) ("WDC" . 106040) ("WU" . 1365135) 470 | ("WY" . 106535) ("WHR" . 106640) ("WFM" . 865436) ("WMB" . 107263) 471 | ("WLTW" . 1140536) ("WEC" . 783325) ("WYN" . 1361658) ("WYNN" . 1174922) 472 | ("XEL" . 72903) ("XRX" . 108772) ("XLNX" . 743988) ("XL" . 875159) 473 | ("XYL" . 1524472) ("YHOO" . 1011006) ("YUM" . 1041061) ("ZBH" . 1136869) 474 | ("ZION" . 109380) ("ZTS" . 1555280)) 475 | :test 'equal)) 476 | 477 | (defun reverse-hash-table (table &key test) 478 | (alist-hash-table 479 | (mapcar (lambda (x) (cons (cdr x) (car x))) (hash-table-alist table)) 480 | :test test)) 481 | 482 | (defparameter *cik-sp500* 483 | (reverse-hash-table *S&P500-2016* :test 'equal)) 484 | 485 | ;; NB: The tickers were retrieved with a python program using finsymbols. 486 | ;; The CIKs were retrieved with this function, using drakma: 487 | #| 488 | (defun cik-from-ticker (ticker) 489 | (check-type ticker string) 490 | (let* ((ticker0 (remove #\- ticker)) 491 | (result (http-request "http://www.sec.gov/cgi-bin/browse-edgar" ;; uses drakma: 492 | :parameters `(("CIK" . ,ticker0) ("action" . "getcompany"))))) 493 | (match result 494 | ((ppcre "CIK=([0-9]{10})&" cik) (values (parse-integer cik)))))) 495 | |# 496 | 497 | (defun entry-sp500-p (entry) 498 | (gethash (third entry) *cik-sp500*)) 499 | 500 | (defun read-sp500-index () 501 | (read-indexes (get-all-compressed-form-indexes) :filter 'entry-sp500-p)) 502 | 503 | (defun coerce-to-cik (x) 504 | (etypecase x 505 | (keyword (values (gethash (symbol-name x) *S&P500-2016*))) 506 | (string (values (gethash x *S&P500-2016*))) 507 | (integer x))) 508 | 509 | (defun retrieve-cik-files (x) 510 | (map () 'retrieve-entry-files (gethash (coerce-to-cik x) *by-cik*))) 511 | 512 | (defun retrieve-sp500-files () 513 | (loop :for cik :in (hash-table-values *S&P500-2016*) :do (retrieve-cik-files cik))) 514 | 515 | (defun number-of-digits (n) 516 | ;; This function works well enough for all the numbers we care about, 517 | ;; but let's not let it silently give the wrong answer. 518 | (assert (< (integer-length (integer-length n)) 50)) ;; NB: a double's mantissa is 53 bits long. 519 | (ceiling (log (+ 1 n) 10d0))) 520 | 521 | (defun n-out-of-m (n m &optional s) 522 | (with-output (s) 523 | (format s "[~v,' D/~D]" (number-of-digits m) n m))) 524 | 525 | (defun retrieve-entries (l) 526 | (loop 527 | :with ll = (length l) 528 | :for i :from 0 529 | :for e :in l :do 530 | (format t "~A ~A~%" (n-out-of-m i ll) (entry-directory e)) 531 | (retrieve-entry-files e))) 532 | 533 | (defun retrieve-10k-files () 534 | (retrieve-entries (gethash "10-K" *by-form-type*))) 535 | 536 | (defun retrieve-google-files () 537 | (retrieve-entries (gethash (coerce-to-cik :goog) *by-cik*))) 538 | -------------------------------------------------------------------------------- /vicode.lisp: -------------------------------------------------------------------------------- 1 | ;;; -*- mode: Lisp; coding: utf-8 -*- 2 | ;;; vicode - support for computing in vietnamese 3 | ;;; 4 | ;;; This contains code as translated from from my original perl script vncode 5 | ;;; Copyright (c) 1996-1998, 2016 François-René "Faré" Rideau DDan(.ng-Vu~ Ba^n 6 | ;;; 7 | ;;; TODO: Use byte streams instead of character pseudo-streams assuming 8 | ;;; one-byte character and/or abusing the latin1 encoding. 9 | 10 | (uiop:define-package :fare-scripts/vicode 11 | (:use :common-lisp :uiop :fare-utils :optima :optima.ppcre :cl-unicode) 12 | (:export 13 | #:*viet-dict* #:split-viet-dict-file #:dict-key #:run-sort-program 14 | #:sort-viet-dict 15 | #:init-vichar-tables 16 | #:viletter-from-vichar #:full-viletter-from-full-vichar 17 | #:vichar-from-viletter #:full-vichar-from-full-viletter 18 | #:new-viletter-tables #:full-viletter-get 19 | #:full-viletter-from-viqr #:full-vichar-from-viqr 20 | #:viqr-from-full-viletter 21 | #:ascii-vowel-p #:full-vichar-simple-p 22 | #:*viqr-quote* 23 | #:init-viqr-tables #:get-viqr-char #:put-viqr-char #:viqr-from-full-vichar 24 | #:init-viscii11-tables #:get-viscii11-char #:put-viscii11-char #:viscii11-from-full-vichar 25 | #:init-viscii10-tables #:get-viscii10-char #:put-viscii10-char 26 | #:init-vietword-tables #:get-vietword-char #:put-vietword-char 27 | #:init-unicode-tables #:get-unicode-char #:put-unicode-char 28 | #:init-ndn-tables #:get-ndn-char #:put-ndn-char 29 | #:init-sort-tables #:get-sort-char #:put-sort-char 30 | #:vnsort #:vi-sortable-string #:transcode 31 | #:unicode-from-viqr #:viqr-from-unicode 32 | )) 33 | (in-package :fare-scripts/vicode) 34 | 35 | (declaim (optimize (speed 0) (safety 3) (debug 3))) 36 | 37 | ;;; We want to keep this file sorted, in particular 38 | (defparameter *viet-dict* 39 | (subpathname (user-homedir-pathname) "fare/scratch/viet")) 40 | 41 | ;;; The file has two sections; only the first one needs to be sorted; this splits them. 42 | (defun split-viet-dict-file () 43 | (let* ((all (with-input-file (i *viet-dict*) (slurp-stream-lines i))) 44 | (end-dict (position-if (lambda (line) (string-prefix-p "----" line)) all))) 45 | (values (subseq all 0 end-dict) (subseq all end-dict)))) 46 | 47 | ;;; This extracts the key that needs to be sorted in Vietnamese dictionary order 48 | (defun dict-key (line) 49 | (match line 50 | ((ppcre "[0-5] [0-5] ([(][^()]+[)] )*([^()=]+) ([(][^()]+[)] )*= .*" _ word _) 51 | word) 52 | (_ 53 | (error "Invalid line: ~A~%" line)))) 54 | 55 | ;;; First attempt: use the glibc unicode support for collation. 56 | (defun run-sort-program (entries &key key sort) 57 | (let* ((vector (coerce entries 'vector)) 58 | (len (length vector)) 59 | (klen (ceiling (log len 10))) 60 | (keyfun (or key #'identity))) 61 | (labels ((prepare-input (s) 62 | (loop :for line :across vector 63 | :for i :from 0 64 | :do (format s "~v,'0D ~A~%" klen i (funcall keyfun line)))) 65 | (process-output (s) 66 | (loop :for line = (read-line s nil nil) 67 | :for i = (when line (parse-integer line :junk-allowed t)) 68 | :while line 69 | :collect (aref vector i)))) 70 | (run-program (or sort '("sort" "-sk2")) 71 | :input #'prepare-input 72 | :output #'process-output 73 | :error-output t)))) 74 | 75 | ;; Problem: it doesn't sort word by word by collates all words into one before it sorts!!!!! 76 | (defun sort-with-libc (lines &key (key #'identity)) 77 | (run-sort-program lines :key key :sort `("/usr/bin/env" "LC_ALL=vi_VN.UTF-8" "sort" "-sk2"))) 78 | 79 | (defun sort-with-vicode (lines &key (key #'identity)) 80 | (sort (copy-list lines) 'string< :key (lambda (line) (vi-sortable-string (funcall key line))))) 81 | 82 | (defun sort-viet-dict () 83 | (multiple-value-bind (words appendix) (split-viet-dict-file) 84 | (format t "~{~A~%~}" 85 | (append (sort-with-vicode words :key 'dict-key) appendix)))) 86 | 87 | 88 | ;;;; ENCODING DATABASE 89 | 90 | ;; The VIQR/VISCII 1.1 table was extracted from Trichlor's report with jed 91 | ;; The VIETWORD table was extracted and intermixed 92 | ;; from an earlier 8088 asm program with perl 93 | ;; For NDN's Tintuc encoding, see below 94 | 95 | (defparameter *vi-table* 96 | #(;; VIQR 1.1, Unicode, VISCII 1.1, VIETWORD 1.10, TCVN 97 | #(#("A" #x41 #x41 #x41) 98 | #("A`" #xc0 #x60 -1) 99 | #("A?" #xc4 #x7c -1) 100 | #("A~" #xc3 #x7d -1) 101 | #("A'" #xc1 #x5e -1) 102 | #("A." #x80 #x7e -1)) 103 | #(#("A(" #xc5 #xfc #xa1) 104 | #("A(`" #x82 #xb1 -1) 105 | #("A(?" #x02 #xb2 -1) 106 | #("A(~" #x05 #xb3 -1) 107 | #("A('" #x81 #xb0 -1) 108 | #("A(." #x83 #x7b -1)) 109 | #(#("A^" #xc2 #xfd #xa2) 110 | #("A^`" #x85 #x87 -1) 111 | #("A^?" #x86 #x8c -1) 112 | #("A^~" #x06 #x90 -1) 113 | #("A^'" #x84 #x80 -1) 114 | #("A^." #x87 #xaf -1)) 115 | #(#("E" #x45 #x45 #x45) 116 | #("E`" #xc8 #xb5 -1) 117 | #("E?" #xcb #xb6 -1) 118 | #("E~" #x88 #xb7 -1) 119 | #("E'" #xc9 #xb4 -1) 120 | #("E." #x89 #xb8 -1)) 121 | #(#("E^" #xca #xfe #xa3) 122 | #("E^`" #x8b #xba -1) 123 | #("E^?" #x8c #xbb -1) 124 | #("E^~" #x8d #xbc -1) 125 | #("E^'" #x8a #xb9 -1) 126 | #("E^." #x8e #xbd -1)) 127 | #(#("I" #x49 #x49 #x49) 128 | #("I`" #xcc #xbf -1) 129 | #("I?" #x9b #xc0 -1) 130 | #("I~" #xce #xc1 -1) 131 | #("I'" #xcd #xbe -1) 132 | #("I." #x98 #xc2 -1)) 133 | #(#("O" #x4f #x4f #x4f) 134 | #("O`" #xd2 #xc4 -1) 135 | #("O?" #x99 #xc5 -1) 136 | #("O~" #xa0 #xc6 -1) 137 | #("O'" #xd3 #xc3 -1) 138 | #("O." #x9a #xc7 -1)) 139 | #(#("O^" #xd4 #xf9 #xa4) 140 | #("O^`" #x90 #xc9 -1) 141 | #("O^?" #x91 #xca -1) 142 | #("O^~" #x92 #xcb -1) 143 | #("O^'" #x8f #xc8 -1) 144 | #("O^." #x93 #xcc -1)) 145 | #(#("O+" #xb4 #xfa #xa5) 146 | #("O+`" #x96 #xce #xea) 147 | #("O+?" #x97 #xcf -1) 148 | #("O+~" #xb3 #xd0 -1) 149 | #("O+'" #x95 #xcd -1) 150 | #("O+." #x94 #xd1 -1)) 151 | #(#("U" #x55 #x55 #x55) 152 | #("U`" #xd9 #xd3 -1) 153 | #("U?" #x9c #xd4 -1) 154 | #("U~" #x9d #xd5 -1) 155 | #("U'" #xda #xd2 -1) 156 | #("U." #x9e #xd6 -1)) 157 | #(#("U+" #xbf #xfb #xa6) 158 | #("U+`" #xbb #xd8 -1) 159 | #("U+?" #xbc #xd9 -1) 160 | #("U+~" #xff #xda -1) 161 | #("U+'" #xba #xd7 -1) 162 | #("U+." #xb9 #xdb -1)) 163 | #(#("Y" #x59 #x59 #x59) 164 | #("Y`" #x9f #xdd -1) 165 | #("Y?" #x14 #xde #x14) 166 | #("Y~" #x19 #xdf #x19) 167 | #("Y'" #xdd #xdc -1) 168 | #("Y." #x1e #x40 -1)) 169 | #(#("a" #x61 #x61 #x61) 170 | #("a`" #xe0 #x85 #xb5) 171 | #("a?" #xe4 #xe0 #xb6) 172 | #("a~" #xe3 #xe1 #xb7) 173 | #("a'" #xe1 #xa0 #xb8) 174 | #("a." #xd5 #xe2 #xb9)) 175 | #(#("a(" #xe5 #xe8 #xa8) 176 | #("a(`" #xa2 #xea #xbb) 177 | #("a(?" #xc6 #xeb #xbc) 178 | #("a(~" #xc7 #xec #xbd) 179 | #("a('" #xa1 #xe9 #xbe) 180 | #("a(." #xa3 #xed #xc6)) 181 | #(#("a^" #xe2 #x83 #xa9) 182 | #("a^`" #xa5 #xe4 #xc7) 183 | #("a^?" #xa6 #xe5 #xc8) 184 | #("a^~" #xe7 #xe6 #xc9) 185 | #("a^'" #xa4 #xe3 #xca) 186 | #("a^." #xa7 #xe7 #xcb)) 187 | #(#("e" #x65 #x65 #x65) 188 | #("e`" #xe8 #x8a #xcc) 189 | #("e?" #xeb #x81 -1) 190 | #("e~" #xa8 #x84 #xcf) 191 | #("e'" #xe9 #x82 #xd0) 192 | #("e." #xa9 #x86 #xd1)) 193 | #(#("e^" #xea #x88 #xaa) 194 | #("e^`" #xab #x8b #xd2) 195 | #("e^?" #xac #x8e #xd3) 196 | #("e^~" #xad #x8f #xd4) 197 | #("e^'" #xaa #x89 #xd5) 198 | #("e^." #xae #xac #xd6)) 199 | #(#("i" #x69 #x69 #x69) 200 | #("i`" #xec #x8d #xd7) 201 | #("i?" #xef #xa9 #xd8) 202 | #("i~" #xee #xaa #xdc) 203 | #("i'" #xed #xa1 #xdd) 204 | #("i." #xb8 #xab -1)) 205 | #(#("o" #x6f #x6f #x6f) 206 | #("o`" #xf2 #x95 #xdf) 207 | #("o?" #xf6 #x91 #xe1) 208 | #("o~" #xf5 #x92 #xe2) 209 | #("o'" #xf3 #xa2 #xe3) 210 | #("o." #xf7 #x94 #xe4)) 211 | #(#("o^" #xf4 #x93 #xab) 212 | #("o^`" #xb0 #x98 #xe5) 213 | #("o^?" #xb1 #x99 #xe6) 214 | #("o^~" #xb2 #x9a #xe7) 215 | #("o^'" #xaf #x96 #xe8) 216 | #("o^." #xb5 #x9b #xe9)) 217 | #(#("o+" #xbd #xf3 #xac) 218 | #("o+`" #xb6 #xf5 -1) 219 | #("o+?" #xb7 #xf6 #xeb) 220 | #("o+~" #xde #xf7 #xec) 221 | #("o+'" #xbe #xf4 #xed) 222 | #("o+." #xfe #xf8 #xee)) 223 | #(#("u" #x75 #x75 #x75) 224 | #("u`" #xf9 #x97 #xef) 225 | #("u?" #xfc #x9c #xf1) 226 | #("u~" #xfb #x9d #xf2) 227 | #("u'" #xfa #xa3 #xf3) 228 | #("u." #xf8 #x9e #xf4)) 229 | #(#("u+" #xdf #x9f #xad) 230 | #("u+`" #xd7 #xa5 #xf5) 231 | #("u+?" #xd8 #xa6 #xf6) 232 | #("u+~" #xe6 #xa7 #xf7) 233 | #("u+'" #xd1 #xa4 #xf8) 234 | #("u+." #xf1 #xa8 #xf9)) 235 | #(#("y" #x79 #x79 #x79) 236 | #("y`" #xcf #xef #xfa) 237 | #("y?" #xd6 #xf0 #xfb) 238 | #("y~" #xdb #xf1 #xfc) 239 | #("y'" #xfd #xee #xfd) 240 | #("y." #xdc #xf2 #xfe)) 241 | #(#("D" #x44 #x44 #x44)) 242 | #(#("d" #x64 #x64 #x64)) 243 | #(#("DD" #xd0 #xae #xa7)) 244 | #(#("dd" #xf0 #xad #xae)))) 245 | 246 | ;; VISCII 1.1 to Unicode translation... 247 | ;; From cuong@haydn.Stanford.EDU (Cuong T. Nguyen) 248 | (defparameter *viscii11-to-unicode* 249 | #(#x0000 #x0001 #x1eb2 #x0003 #x0004 #x1eb4 #x1eaa #x0007 250 | #x0008 #x0009 #x000a #x000b #x000c #x000d #x000e #x000f 251 | #x0010 #x0011 #x0012 #x0013 #x1ef6 #x0015 #x0016 #x0017 252 | #x0018 #x1ef8 #x001a #x001b #x001c #x001d #x1ef4 #x001f 253 | #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 254 | #x0028 #x0029 #x002a #x002b #x002c #x002d #x002e #x002f 255 | #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 256 | #x0038 #x0039 #x003a #x003b #x003c #x003d #x003e #x003f 257 | #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 258 | #x0048 #x0049 #x004a #x004b #x004c #x004d #x004e #x004f 259 | #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 260 | #x0058 #x0059 #x005a #x005b #x005c #x005d #x005e #x005f 261 | #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 262 | #x0068 #x0069 #x006a #x006b #x006c #x006d #x006e #x006f 263 | #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 264 | #x0078 #x0079 #x007a #x007b #x007c #x007d #x007e #x007f 265 | #x1ea0 #x1eae #x1eb0 #x1eb6 #x1ea4 #x1ea6 #x1ea8 #x1eac 266 | #x1ebc #x1eb8 #x1ebe #x1ec0 #x1ec2 #x1ec4 #x1ec6 #x1ed0 267 | #x1ed2 #x1ed4 #x1ed6 #x1ed8 #x1ee2 #x1eda #x1edc #x1ede 268 | #x1eca #x1ece #x1ecc #x1ec8 #x1ee6 #x0168 #x1ee4 #x1ef2 269 | #x00d5 #x1eaf #x1eb1 #x1eb7 #x1ea5 #x1ea7 #x1ea9 #x1ead 270 | #x1ebd #x1eb9 #x1ebf #x1ec1 #x1ec3 #x1ec5 #x1ec7 #x1ed1 271 | #x1ed3 #x1ed5 #x1ed7 #x1ee0 #x01a0 #x1ed9 #x1edd #x1edf 272 | #x1ecb #x1ef0 #x1ee8 #x1eea #x1eec #x01a1 #x1edb #x01af 273 | #x00c0 #x00c1 #x00c2 #x00c3 #x1ea2 #x0102 #x1eb3 #x1eb5 274 | #x00c8 #x00c9 #x00ca #x1eba #x00cc #x00cd #x0128 #x1ef3 275 | #x0110 #x1ee9 #x00d2 #x00d3 #x00d4 #x1ea1 #x1ef7 #x1eeb 276 | #x1eed #x00d9 #x00da #x1ef9 #x1ef5 #x00dd #x1ee1 #x01b0 277 | #x00e0 #x00e1 #x00e2 #x00e3 #x1ea3 #x0103 #x1eef #x1eab 278 | #x00e8 #x00e9 #x00ea #x1ebb #x00ec #x00ed #x0129 #x1ec9 279 | #x0111 #x1ef1 #x00f2 #x00f3 #x00f4 #x00f5 #x1ecf #x1ecd 280 | #x1ee5 #x00f9 #x00fa #x0169 #x1ee7 #x00fd #x1ee3 #x1eee)) 281 | 282 | ;;; Hand-written database 283 | 284 | (defparameter *viqr-vowels-uc* '("A" "A(" "A^" "E" "E^" "I" "O" "O^" "O+" "U" "U+" "Y")) 285 | (defparameter *viqr-vowels-lc* (mapcar 'string-downcase *viqr-vowels-uc*)) 286 | (defparameter *n-vi-vowels* (length *viqr-vowels-uc*)) 287 | (defparameter *viqr-vowels* `#(,@*viqr-vowels-uc* ,@*viqr-vowels-lc*)) 288 | (defparameter *viqr-letters* `#(,@*viqr-vowels-uc* ,@*viqr-vowels-lc* "D" "d" "DD" "dd")) 289 | (defparameter *n-vi-letters* (length *viqr-letters*)) 290 | (defparameter *viqr-accents* #("" "`" "?" "~" "'" ".")) ;; nationalist order: '`?~. 291 | (defparameter *n-vi-accents* (length *viqr-accents*)) 292 | 293 | ;;; In case we want to support telex mode someday... 294 | (defparameter *telex-vowels-uc* #("A" "AW" "AA" "E" "EE" "I" "O" "OO" "OW" "U" "UW" "Y")) 295 | (defparameter *telex-accents* #("" "S" "F" "R" "X" "J")) 296 | 297 | (defparameter *vnencodings* 298 | '((("VIQR 1.1" "VIQR11" "Q11" "VIQR 1.0" "VIQR10" "Q10" "VIQR" "Q") 299 | init-viqr-tables 300 | get-viqr-char 301 | put-viqr-char) 302 | (("VISCII 1.1" "VISCII11" "V11" "V1" "1" "VISCII" "V") 303 | init-viscii11-tables 304 | get-viscii11-char 305 | put-viscii11-char) 306 | (("VIETWORD" "VW" "W") 307 | init-vietword-tables 308 | get-vietword-char 309 | put-vietword-char) 310 | (("NDN" "N" "TINTUC") 311 | init-ndn-tables 312 | get-ndn-char 313 | put-ndn-char) 314 | (("SORT" "S" "VNSORT" "SORTVN") 315 | init-sort-tables 316 | get-sort-char 317 | put-sort-char) 318 | (("VISCII 1.0" "VISCII10" "V10" "V0" "0") 319 | init-viscii10-tables 320 | get-viscii10-char 321 | put-viscii10-char) 322 | (("UNICODE" "UTF-8" "UTF8" "ISO10646" "10646" "UNI" "U") 323 | init-unicode-tables 324 | get-utf8-char 325 | put-utf8-char))) 326 | 327 | (defparameter *vn-encoding-table* 328 | (let ((table (make-hash-table :test 'equal))) 329 | (loop :for encoding :in *vnencodings* :do 330 | (loop :for name :in (first encoding) :do 331 | (setf (gethash name table) encoding))))) 332 | 333 | 334 | ;;; Internal Tables 335 | 336 | ;; A "viletter" is an index in the above list of letters. 337 | ;; 338 | ;; A "full-viletter" is a pair of a viletter and an accent number (or 0 if not a vi vowel). 339 | ;; 340 | ;; A "vichar" is a pair of two things: an ASCII character and 341 | ;; the index number of the vietnamese letter among those whose VIQR encoding begins 342 | ;; with the given ASCII character if there are many, or NIL if there is only one. 343 | ;; 344 | ;; A "full-vichar" is a pair of a pair of a vichar and the 345 | ;; index number of the vietnamese accent used if the letter is a vowel, or 0 if not. 346 | 347 | ;;; Internal Database for letter/number <-> char association 348 | 349 | (defparameter *viletter-to-vichar* nil 350 | "A table, for each viletter, of the corresponding vichar") 351 | (defparameter *vichar-to-viletter* nil 352 | "A table, for every ASCII char that starts a vichar, the viletters") 353 | (defparameter *muletter* nil 354 | "Table that to every index of viletter associates the index of the viletter with ^ if defined.") 355 | (defparameter *nomuletter* nil 356 | "Table that to every index of viletter with ^ associates the index of the viletter without ^.") 357 | 358 | (defun make-extensible-vector (&rest keys) 359 | (apply 'make-array '(0) :adjustable t :fill-pointer t keys)) 360 | 361 | (defun init-vichar-tables () 362 | (setf *viletter-to-vichar* (make-array (list *n-vi-letters*)) 363 | *vichar-to-viletter* (make-array '(128) :initial-element nil) 364 | *muletter* (make-hash-table :test 'equal) 365 | *nomuletter* (make-hash-table :test 'equal)) 366 | (loop :for vowel :across "aeoAEO" 367 | :for vl-naked = (position (string vowel) *viqr-letters* :test 'equal) 368 | :for vl-mu = (position (strcat vowel "^") *viqr-letters* :test 'equal) :do 369 | (setf (gethash vl-naked *muletter*) vl-mu) 370 | (setf (gethash vl-mu *nomuletter*) vl-naked)) 371 | (loop 372 | :for viletter :from 0 ; index of the viletter 373 | :for viqr :across *viqr-letters* ; VIQR for the viletter 374 | :for char = (aref viqr 0) ; first char 375 | :for code = (char-code char) ; ASCII code for the first char 376 | :for () = (unless (aref *vichar-to-viletter* code) 377 | (setf (aref *vichar-to-viletter* code) (make-extensible-vector))) 378 | :for num = (fill-pointer (aref *vichar-to-viletter* code)) :do 379 | (setf (aref *viletter-to-vichar* viletter) (cons char num)) 380 | (vector-push-extend viletter (aref *vichar-to-viletter* code)))) 381 | 382 | (defun viletter-from-vichar (vichar) 383 | (destructuring-bind (char . num) vichar 384 | (when num 385 | (aref (aref *vichar-to-viletter* (char-code char)) num)))) 386 | 387 | (defun full-viletter-from-full-vichar (full-vichar) 388 | (destructuring-bind (vichar . accent) full-vichar 389 | (cons (viletter-from-vichar vichar) accent))) 390 | 391 | (defun vichar-from-viletter (viletter) 392 | (aref *viletter-to-vichar* viletter)) 393 | 394 | (defun full-vichar-from-full-viletter (full-viletter) 395 | (destructuring-bind (viletter . accent) full-viletter 396 | (cons (vichar-from-viletter viletter) accent))) 397 | 398 | (defun full-viletter-get (table-position full-viletter) 399 | (destructuring-bind (viletter . accent) full-viletter 400 | (aref (aref (aref *vi-table* viletter) accent) table-position))) 401 | 402 | 403 | ;;; Understanding VIQR characters in the hand-written database 404 | (defun full-viletter-from-viqr (viqr) 405 | (let* ((accent-index (position-if (lambda (accent) (string-suffix-p viqr accent)) 406 | *viqr-accents* :from-end t)) 407 | (accent (aref *viqr-accents* accent-index)) 408 | (letter (subseq viqr 0 (- (length viqr) (length accent)))) 409 | (letter-index (position letter *viqr-letters* :test 'equal))) 410 | (cons letter-index accent-index))) 411 | 412 | (defun full-vichar-from-viqr (viqr) 413 | (destructuring-bind (viletter . accent) (full-viletter-from-viqr viqr) 414 | (cons (if viletter (vichar-from-viletter viletter) (cons (first-char viqr) nil)) 415 | accent))) 416 | 417 | (defparameter *viqr-position* 0) 418 | 419 | (defun viqr-from-full-viletter (full-viletter) 420 | (full-viletter-get *viqr-position* full-viletter)) 421 | 422 | ;;; Recognizing vietnamese text 423 | (defun ascii-vowel-p (char) 424 | (and (find (char-downcase char) "aeiouy") t)) 425 | 426 | (defun full-vichar-simple-p (full-vichar) 427 | (destructuring-bind ((char . num) . accent) full-vichar 428 | (declare (ignore char)) 429 | (and (null num) (null accent)))) 430 | 431 | 432 | ;;;; VIQR encoding 433 | 434 | ;; Currently not really VIQR, but rather just VIR, with extensions. 435 | ;; As the quote character is not quite supported... 436 | ;; Also, should be able to select \v-ish mode, 437 | ;; independently on I/O 438 | ;; We should be able to configure this in lots of ways: 439 | ;; alternate character sets, 440 | ;; what non-canonical combinations to accept, 441 | ;; disabling extensions, etc. 442 | 443 | ;; ..... 444 | 445 | (defparameter *viqr-quote* nil) ;; could be #\\ instead 446 | 447 | (defun init-viqr-tables () (values)) 448 | 449 | (defun get-viqr-char (peek next) 450 | (nest 451 | (if-let ((char (funcall peek)))) 452 | (if (find char "Dd") 453 | (progn 454 | (funcall next) 455 | (let ((d (funcall peek))) 456 | (if (find d "Dd+-") ; This is an extension to VIQR! 457 | (progn 458 | (funcall next) 459 | (cons (cons char 1) 0)) 460 | (cons (cons char 0) 0))))) 461 | (if (not (ascii-vowel-p char)) 462 | (progn 463 | (funcall next) 464 | (cons (cons char nil) nil))) 465 | (let* ((char-viletters (aref *vichar-to-viletter* (char-code char))) 466 | (ext (progn (funcall next) (funcall peek))) 467 | (num (or (position ext char-viletters 468 | :key (lambda (viletter) (char (aref *viqr-letters* viletter) 1)) 469 | :start 1 :test 'equal) 0)) 470 | (accent (progn (when (plusp num) (funcall next)) (funcall peek))) 471 | ;; what about alternate accents, such as / and \ for sach and huyen? 472 | (accent-num (if-let (num (position accent *viqr-accents* :test 'equal :key 'first-char)) 473 | (progn (funcall next) num) 474 | 0))) 475 | (cons (cons char num) accent-num)))) 476 | 477 | (defun viqr-from-full-vichar (full-vichar) 478 | (destructuring-bind (vichar . accent) full-vichar 479 | (destructuring-bind (char . num) vichar 480 | (if num 481 | (let* ((viletter (viletter-from-vichar vichar)) 482 | (string (strcat (aref *viqr-letters* viletter) 483 | (aref *viqr-accents* accent)))) 484 | (if (< 1 (length string)) 485 | (strcat *viqr-quote* string) 486 | string)) 487 | (string char))))) 488 | 489 | (defun put-viqr-char (put-char full-vichar) 490 | (when full-vichar 491 | (map () put-char (viqr-from-full-vichar full-vichar)))) 492 | 493 | 494 | ;;;; One-char encodings 495 | 496 | ;;; Generic routines for encodings where every letter+accent symbol 497 | ;;; represented by a single byte char. 498 | 499 | (defun make-inverse-vi-table (table-position make-entry) 500 | (loop :for viletter :from 0 501 | :for variants :across *vi-table* :do 502 | (loop :for accent :from 0 503 | :for variant :across variants 504 | :for code = (aref variant table-position) 505 | :when (<= 0 code) :do 506 | (funcall make-entry (cons viletter accent) code)))) 507 | 508 | (defun make-one-byte-table (table-position) 509 | (let ((to-vichar (make-array '(256) :initial-element nil))) 510 | (labels ((make-entry (full-viletter code) 511 | (setf (aref to-vichar code) (full-vichar-from-full-viletter full-viletter)))) 512 | (make-inverse-vi-table table-position #'make-entry) 513 | to-vichar))) 514 | 515 | (defun get-one-byte-char (to-vichar peek next) 516 | (let ((char (prog1 (funcall peek) (funcall next)))) 517 | (and char (aref to-vichar (char-code char))))) 518 | 519 | (defun convert-one-byte-char (table-position full-vichar) 520 | (when full-vichar 521 | (if (cdr full-vichar) 522 | (full-viletter-get table-position (full-viletter-from-full-vichar full-vichar)) 523 | (caar full-vichar)))) 524 | 525 | (defun put-one-byte-char (viletter-to put-char full-vichar) 526 | (when full-vichar 527 | (funcall put-char (code-char (convert-one-byte-char viletter-to full-vichar))))) 528 | 529 | 530 | ;;;; VISCII 1.1 ENCODING 531 | 532 | (defparameter *viscii11-to-full-vichar* nil) 533 | (defparameter *viscii11-position* 1) 534 | 535 | (defun init-viscii11-tables () 536 | (unless *viscii11-to-full-vichar* 537 | (setf *viscii11-to-full-vichar* (make-one-byte-table *viscii11-position*)))) 538 | 539 | (defun get-viscii11-char (peek next) 540 | (get-one-byte-char *viscii11-to-full-vichar* peek next)) 541 | 542 | (defun put-viscii11-char (put-char full-vichar) 543 | (put-one-byte-char *viscii11-position* put-char full-vichar)) 544 | 545 | (defun viscii11-from-full-vichar (full-vichar) 546 | (convert-one-byte-char *viscii11-position* full-vichar)) 547 | 548 | 549 | ;;;; VISCII 1.0 ENCODING 550 | 551 | (defparameter *viscii10-to-full-vichar* nil) 552 | 553 | (defun frob-viscii-code (code) 554 | ;; Exchange a. and O~, respectively D5 and A0 in viscii 1.1, and A0 and D5 in viscii 1.0 555 | (case code 556 | (#xD5 #xA0) 557 | (#xA0 #xD5) 558 | (otherwise code))) 559 | 560 | (defun init-viscii10-tables () 561 | (init-viscii11-tables)) 562 | 563 | (defun get-viscii10-char (peek next) 564 | (let ((char (prog1 (funcall peek) (funcall next)))) 565 | (and char (aref *viscii11-to-full-vichar* (frob-viscii-code (char-code char)))))) 566 | 567 | (defun put-viscii10-char (put-char full-vichar) 568 | (when full-vichar 569 | (let ((code (frob-viscii-code (convert-one-byte-char *viscii11-position* full-vichar)))) 570 | (funcall put-char (code-char code))))) 571 | 572 | 573 | ;;;; VIETWORD ENCODING 574 | 575 | (defparameter *vietword-to-full-vichar* nil) 576 | (defparameter *vietword-position* nil) 577 | 578 | (defun get-vietword-char (peek next) 579 | ;; Vietword 1.10 inserts sometimes chr(#xFF) before end-of-lines, 580 | ;; with no apparent reason. It also ends files with \0's and ^Z's. 581 | ;; We remove all that crap, as it has no meaning anyway... 582 | (loop :while (find (funcall peek) #.(map 'string 'code-char '(#xFF #x00 #x1A))) :do 583 | (funcall next)) 584 | (get-one-byte-char *vietword-to-full-vichar* peek next)) 585 | 586 | (defun put-vietword-char (put-char full-vichar) 587 | (put-one-byte-char *vietword-position* put-char full-vichar)) 588 | 589 | (defun init-vietword-tables () 590 | (unless *vietword-to-full-vichar* 591 | (setf *vietword-to-full-vichar* (make-one-byte-table *vietword-position*)))) 592 | 593 | 594 | ;;; UTF-8 encoding 595 | 596 | (defparameter *unicode-to-viscii11* (make-hash-table :test 'equal)) 597 | 598 | (defun init-unicode-tables () 599 | (loop :for i :below 256 600 | :for c :across *viscii11-to-unicode* :do 601 | (setf (gethash c *unicode-to-viscii11*) i)) 602 | (init-viscii11-tables)) 603 | 604 | (defun get-unicode-char (peek next) 605 | (flet ((get-char () (prog1 (funcall peek) (funcall next)))) 606 | (if-let (c (get-char)) 607 | (block nil 608 | (if-let (viscii11 (gethash (char-code c) *unicode-to-viscii11*)) 609 | (if-let (full-vichar (aref *viscii11-to-full-vichar* viscii11)) 610 | (return full-vichar))) 611 | (cons (cons c nil) nil))))) 612 | 613 | (defun unicode-from-full-vichar (full-vichar) 614 | (when full-vichar 615 | (if (cdr full-vichar) 616 | (code-char 617 | (aref *viscii11-to-unicode* 618 | (full-viletter-get *viscii11-position* 619 | (full-viletter-from-full-vichar full-vichar)))) 620 | (caar full-vichar)))) 621 | 622 | (defun put-unicode-char (put-char full-vichar) 623 | (when full-vichar 624 | (funcall put-char (unicode-from-full-vichar full-vichar)))) 625 | 626 | 627 | ;;;; Encoding from NDN's amateur MacIntosh fonts 628 | 629 | ;; Codes were guessed from binary dumps of word files. 630 | ;; There are errors, and more particular cases may exist... :( :( :( 631 | 632 | (defun to-char-code (x) 633 | (if (characterp x) (char-code x) x)) 634 | 635 | (defparameter *ndn-vowels-uc* 636 | (mapcar 'to-char-code '(#\A 174 227 #\E 211 #\I #\O 231 243 #\U 200 #\Y))) 637 | (defparameter *ndn-vowels-lc* 638 | (mapcar 'to-char-code '(#\a #\& 142 #\e #\" #\i #\o 141 #\! #\u 143 #\y))) 639 | (defparameter *ndn-letters* 640 | `(,@*ndn-vowels-uc* ,@*ndn-vowels-lc* ,@(mapcar 'to-char-code '(#\D #\d #\F #\f)))) 641 | ;; NB: Last two accent actually not known, repeated from the lower-case ones. 642 | ;; NDN accents are prefix 643 | ;; supplementary accents for in the lower-case case : mu+sach, mu+huyen on lowercase! 644 | (defparameter *ndn-accents-uc* 645 | `(nil ,@(mapcar 'to-char-code '(228 239 #\% #\] 208 0 0)))) 646 | (defparameter *ndn-accents-lc* 647 | `(nil ,@(mapcar 'to-char-code '(157 #\^ #\% 172 #\_ 161 #\))))) 648 | (defparameter *ndn-accents* 649 | (append *ndn-accents-uc* *ndn-accents-lc*)) 650 | (defparameter *n-ndn-accents* (length *ndn-accents-lc*)) 651 | 652 | (defparameter *viqr-to-ndn* 653 | '(("a`" . 137) ("e`" . 144) ("o`" . 153) ("u`" . 158) 654 | ("a~" . 138) ("e~" . 145) ("o~" . 145) ("u~" . 154) 655 | ("y~" . 159) ("I'" . 216) ("I`" . 232) ("I?" . 223) 656 | ("I~" . 234) ("I." . 235) ("i'" . 187) ("i`" . 233) 657 | ("i?" . 148) ("i~" . 236) ("i." . 149))) 658 | 659 | ;; Non-viet characters: 660 | (defparameter *char-to-ndn* 661 | `((,(code-char #o12) . #o15) 662 | (#\F . #.(char-code #\+)) 663 | (#\f . #.(char-code #\=)) 664 | (#\) . 136) 665 | (#\" . 210) 666 | (#\= . 173))) 667 | 668 | (defparameter *ndn-to-full-vichar* nil) 669 | (defparameter *viletter-to-ndn* nil) 670 | 671 | (defun init-ndn-tables () 672 | (setf *ndn-to-full-vichar* (make-hash-table :test 'equal)) 673 | (setf *viletter-to-ndn* (make-array (list *n-vi-letters*))) 674 | (loop :for l :below *n-vi-vowels* :do 675 | (setf (aref *viletter-to-ndn* l) (make-array *n-vi-accents*)) 676 | (loop :for a :below *n-vi-accents* :do 677 | (flet ((register (l a nl na) 678 | (setf (aref (aref *viletter-to-ndn* l) a) 679 | (map 'string 'code-char 680 | (list (aref *ndn-accents* na) (aref *ndn-letters* nl)))))) 681 | (register l a l a) 682 | (let ((ll (+ l *n-vi-vowels*)) 683 | (la (+ a *n-vi-accents*))) 684 | (if (and (< 0 a) (< a 3) (gethash ll *nomuletter*)) 685 | (register ll a (gethash ll *nomuletter*) (+ a 5)) 686 | (register ll a ll la)))))) 687 | 688 | (loop :for (viqr . ndn) :in *viqr-to-ndn* 689 | :for full-vichar = (full-vichar-from-viqr viqr) 690 | :for (vichar . accent) = full-vichar :do 691 | (setf (aref *ndn-to-full-vichar* ndn) full-vichar) 692 | (setf (aref (aref *viletter-to-ndn* (viletter-from-vichar vichar)) accent) ndn)) 693 | 694 | (loop :for (char . ndn) :in *char-to-ndn* :do 695 | (setf (aref *ndn-to-full-vichar* ndn) (cons (cons char nil) nil)))) 696 | 697 | (defun get-ndn-char (peek next) 698 | (flet ((get-char () (prog1 (funcall peek) (funcall next)))) 699 | (let* ((acc 0) 700 | (mu nil) 701 | (char (get-char))) 702 | (loop :for azz = (and char (position (char-code char) *ndn-accents* :test 'equal)) 703 | :while azz :do 704 | (setf acc (mod azz *n-ndn-accents*)) 705 | (setf char (get-char)) 706 | (when (<= *n-vi-accents* acc) 707 | (setf mu t) 708 | (decf acc 5))) 709 | (or (gethash (char-code char) *ndn-to-full-vichar*) 710 | (let ((viletter (position char *ndn-letters* :test 'equal))) 711 | (when viletter 712 | (when mu 713 | (setf viletter (gethash viletter *muletter*)))) 714 | (cons (vichar-from-viletter viletter) acc)))))) 715 | 716 | (defun put-ndn-char (put-char full-vichar) 717 | (when full-vichar 718 | (destructuring-bind (vichar . accent) full-vichar 719 | (destructuring-bind (char . num) vichar 720 | (declare (ignore num)) 721 | (if (full-vichar-simple-p full-vichar) 722 | (funcall put-char 723 | (if-let (ndn (gethash char *char-to-ndn*)) 724 | (code-char ndn) 725 | char)) 726 | (map () put-char 727 | (aref (aref *viletter-to-ndn* (viletter-from-vichar vichar)) accent))))))) 728 | 729 | 730 | ;;;; SORTABLE ENCODING 731 | 732 | (defun init-sort-tables () (values)) 733 | 734 | ;; Buffer for get-sort-char and pointer into said buffer 735 | (defparameter *gsc-buffer* (make-extensible-vector)) 736 | (defparameter *gsc-pointer* 0) 737 | 738 | (defun get-sort-char (peek next) 739 | (labels 740 | ((get-char () (prog1 (funcall peek) (funcall next))) 741 | (refill () 742 | (adjust-array *gsc-buffer* '(0)) 743 | (setf *gsc-pointer* 0) 744 | (let ((n-expected-accents 0)) 745 | (loop :for char = (funcall peek) 746 | :while (and char (not (ascii-letter-p char))) :do 747 | (funcall next) 748 | (vector-push-extend (cons (cons char nil) nil) *gsc-buffer*)) 749 | (loop :for char = (get-char) 750 | :for () = (unless char (return)) 751 | :for code = (char-code char) 752 | :for viletter-p = (plusp (length (aref *vichar-to-viletter* code))) 753 | :for num = (when viletter-p 754 | (funcall next) 755 | (digit-char-p (funcall peek))) :do 756 | (when (ascii-vowel-p char) (incf n-expected-accents)) 757 | (vector-push-extend (cons (cons char num) (when viletter-p 0)) *gsc-buffer*) 758 | (let ((char (funcall peek))) 759 | (unless (and char (ascii-letter-p char)) (return)))) 760 | (loop :with ptr = -1 761 | :repeat n-expected-accents :do 762 | (loop :until (ascii-vowel-p (caar (aref *gsc-buffer* (incf ptr))))) 763 | (setf (cdr (aref *gsc-buffer* ptr)) (digit-char-p (get-char))))))) 764 | (block nil 765 | (when (= *gsc-pointer* (length *gsc-buffer*)) 766 | (if (funcall peek) 767 | (refill) 768 | (return nil))) 769 | (aref *gsc-buffer* (post-incf *gsc-pointer*))))) 770 | 771 | ;; Buffers to put-sort-char 772 | (defparameter *psc-letter-buffer* (make-extensible-vector :element-type 'character)) 773 | (defparameter *psc-accent-buffer* (make-extensible-vector :element-type 'character)) 774 | 775 | (defun put-sort-char (put-char full-vichar) 776 | (if (or (null full-vichar) (not (ascii-letter-p (caar full-vichar)))) 777 | (progn 778 | (map () put-char *psc-letter-buffer*) 779 | (map () put-char *psc-accent-buffer*) 780 | (when full-vichar 781 | (funcall put-char (caar full-vichar))) 782 | (adjust-array *psc-letter-buffer* 0) 783 | (adjust-array *psc-accent-buffer* 0)) 784 | (destructuring-bind ((char . num) . accent) full-vichar 785 | (vector-push-extend char *psc-letter-buffer*) 786 | (when (plusp (length (aref *vichar-to-viletter* (char-code char)))) 787 | (vector-push-extend (digit-char num) *psc-letter-buffer*)) 788 | (when (ascii-vowel-p char) 789 | (vector-push-extend (digit-char accent) *psc-accent-buffer*))))) 790 | 791 | ;; TODO: vietlex.com claims that lower-case comes before upper-case, 792 | ;; and that multiple non-letters are squashed into a zero. 793 | (defun vi-sortable-string (string &optional (get-vichar 'get-unicode-char)) 794 | (let ((i 0) 795 | (l (length string)) 796 | (eos nil) 797 | (ss (make-string-output-stream)) 798 | (letters (make-string-output-stream)) 799 | (accents (make-string-output-stream)) 800 | (unread nil)) 801 | (labels 802 | ((peek () 803 | (when (< i l) (char string i))) 804 | (next () 805 | (incf i)) 806 | (next-vichar () 807 | (block nil 808 | (when unread 809 | (return (prog1 unread (setf unread nil)))) 810 | (when (<= l i) (setf eos t)) 811 | (unless eos 812 | (funcall get-vichar #'peek #'next)))) 813 | (unread (x) 814 | (setf unread x))) 815 | (loop :until eos :do 816 | (loop :for full-vichar = (next-vichar) 817 | :for base-char = (caar full-vichar) 818 | :until (or (null full-vichar) 819 | (and (ascii-letter-p base-char) (unread full-vichar))) :do 820 | (write-char base-char ss)) 821 | (loop :for full-vichar = (next-vichar) 822 | :while (and full-vichar 823 | (or (ascii-letter-p (caar full-vichar)) 824 | (progn (unread full-vichar) nil))) :do 825 | (destructuring-bind ((char . num) . accent) full-vichar 826 | (write-char char letters) 827 | (when num (write-char (digit-char num) letters)) 828 | (when accent (write-char (digit-char accent) accents)))) 829 | (write-string (get-output-stream-string letters) ss) 830 | (write-string (get-output-stream-string accents) ss)) 831 | (get-output-stream-string ss)))) 832 | 833 | (defun vnsort (get-vichar sequence) 834 | (let ((sortable (map 'vector (lambda (x) (cons x (vi-sortable-string get-vichar x))) sequence))) 835 | (sort sortable 'string< :key 'cdr) 836 | (map 'list 'car sortable))) 837 | 838 | 839 | ;;;; TRANSLATE STRING 840 | (defun transcode (full-vichar-getter full-vichar-putter 841 | &key (input *standard-input*) (output *standard-output*)) 842 | (with-input (input) 843 | (with-output (output) 844 | (labels 845 | ((peek () (peek-char nil input nil)) 846 | (next () (read-char input nil)) 847 | (put-char (char) (write-char char output))) 848 | (loop :for full-vichar = (funcall full-vichar-getter #'peek #'next) 849 | :for () = (funcall full-vichar-putter #'put-char full-vichar) 850 | :while full-vichar))))) ;; NB: the last call with nil allows for buffer flush 851 | 852 | (defun unicode-from-viqr (&key input output) 853 | (transcode #'get-viqr-char #'put-unicode-char :input input :output output)) 854 | 855 | (defun viqr-from-unicode (&key input output) 856 | (transcode #'get-unicode-char #'put-viqr-char :input input :output output)) 857 | 858 | #| 859 | ############################### OPTION PROCESSING ########################### 860 | 861 | my ($input_encoding, $ienc) ; 862 | my ($output_encoding, $oenc) ; 863 | 864 | my $get_vn_char ; 865 | my $put_vn_char ; 866 | 867 | my $do_it = \&usage ; 868 | 869 | #(I hate those things) 870 | sub Get_Options () { 871 | 872 | if ( $#ARGV < 0 ) { usage () ;} 873 | OPTION: 874 | while ($_=shift(@ARGV)) { 875 | if (/^(-i|--input)$/) { 876 | if ( ( $#ARGV >= 0 ) && 877 | ( $input_encoding = shift(@ARGV) , 878 | defined $vn_encodings{uc($input_encoding)} ) ) { 879 | $ienc = $vn_encodings{uc($input_encoding)} ; 880 | &{$init_tables[$ienc]}() ; 881 | $get_vn_char = $get_vn_char[$ienc] ; 882 | } else { 883 | errusage(); 884 | }; 885 | } elsif (/^(-o|--output)$/) { 886 | if ( ( $#ARGV >= 0 ) && 887 | ( $output_encoding = shift(@ARGV) , 888 | defined $vn_encodings{uc($output_encoding)} ) ) { 889 | $oenc = $vn_encodings{uc($output_encoding)} ; 890 | &{$init_tables[$oenc]}() ; 891 | $put_vn_char = $put_vn_char[$oenc] ; 892 | $do_it = \&translate ; 893 | } else { 894 | errusage(); 895 | }; 896 | } elsif (/^(-s|--sort)$/) { 897 | ($output_encoding,$oenc) = (\&vnsort, $vn_encodings{"VNSORT"}) ; 898 | $do_it = \&do_sort; 899 | } elsif (/^(-V|--version)$/) { 900 | version(); 901 | } elsif (/^(-[h\?]|--help)$/) { 902 | usage(); 903 | } elsif (/^(-k|--keymap)$/) { 904 | $do_it = \&do_linux_keymap; 905 | } elsif (/^(-C|--compose)$/) { 906 | $do_it = \&do_X_Compose_table; 907 | } elsif (/^(-p|--psf)$/) { 908 | $do_it = \&do_linux_psf_table; 909 | } elsif (/^(--lisp-table)$/) { 910 | $do_it = \&do_lisp_table; 911 | } elsif (/--/) { 912 | last OPTION; 913 | } else { 914 | unshift @ARGV, $_ ; 915 | last OPTION; 916 | } 917 | } 918 | # if ( $ienc == $oenc ) { 919 | # die("Output encoding cannot be the same as input encoding!\n"); 920 | # } 921 | } 922 | 923 | #################### FONT AND KEYMAP TABLES FOR LINUX CONSOLE ################ 924 | sub do_linux_psf_table { 925 | print <<"HEADER-END" ; 926 | # 927 | # This is a psf table for linux VISCII 1.1 console fonts. 928 | # Automatically generated by <> (c) 1996-1999 Уng-Vû Bân 929 | # Use psfaddtable(1) to configure a font 930 | # ÐÄ ÐÄO BÁC 931 | # You can freely distribute this software, 932 | # as long as you make it preserve this notice, 933 | # and make it clear what you possibly modified. 934 | # You can freely use this software, as long as you're not a communist. 935 | HEADER-END 936 | my $i ; 937 | for($i=0;$i<256;$i++) { 938 | printf "#x%02X U+%04X\n",$i,$viscii11_To_unicode[$i] ; 939 | } 940 | } 941 | 942 | sub lxquote { 943 | my $c = shift ; 944 | if ($c eq "'") { "\\'" } else { $c } 945 | } 946 | 947 | sub do_linux_keymap { 948 | my ($i) ; 949 | my @k1 = 950 | ( "A", "Z", "S", "E", "R", "I", "O", "L", "P", "U", "J", "Y", 951 | "a", "z", "s", "e", "r", "i", "o", "l", "p", "u", "j", "y", 952 | "D", "d", "D", "d" ); 953 | my @k2 = ( " ", "\'", "`", "?", "~", "." ); 954 | # my @k2 = ( " ", "b", "n", ",", ";", ":" ); 955 | # my @k2 = ( " ", "b", "n", "m", ",", "." ); 956 | print <<"HEADER-END" 957 | #!/usr/bin/loadkeys 958 | # This is a map of linux keyboard compose combinations for VISCII 1.1 input 959 | # Automatically generated by <> (c) 1996, 1997 Уng-Vû Bân 960 | # pipe it into loadkeys(1), or append it to your default keymap. 961 | # ÐÄ ÐÄO BÁC 962 | # You can freely distribute this software, 963 | # as long as you make it preserve this notice, 964 | # and make it clear what you possibly modified. 965 | # You can freely use this software, as long as you're not a communist. 966 | HEADER-END 967 | ; 968 | for ($i=0;$i<=$#vn_table;$i++) { 969 | my $viqr=$vn_table[$i][0] ; 970 | my $viscii=chr($vn_table[$i][1]) ; 971 | my ($let,$acc) = viqr2let($viqr) ; 972 | my $c1 = $k1[$let] ; 973 | my $k1 = lxquote($c1) ; 974 | my $k2 = $acc ? $k2[$acc] : $k1 ; 975 | print "compose '$k1' '$k2' to '$viscii'\n" ; 976 | if (length($viqr)==2 && !$acc) { 977 | # Linux can only compose two characters :( ]-: 978 | print "compose ", 979 | (map {"'".lxquote(chr($_))."' "} unpack("c*",$viqr)), 980 | "to '$viscii'\n" ; 981 | } 982 | } 983 | } 984 | 985 | 986 | sub do_lisp_table { 987 | my $i ; 988 | for($i=0;$i<=$#vn_table;$i++) { 989 | my $viqr = $vn_table[$i][0]; 990 | my $viscii11 = $vn_table[$i][1]; 991 | my $unicode = $viscii11_To_unicode[$viscii11] ; 992 | printf " (%-5s #x%04X)\n","\"$viqr\"",$unicode ; 993 | } 994 | } 995 | 996 | 997 | 998 | ########################### MISCELLANEOUS COMMANDS ########################### 999 | 1000 | sub version { 1001 | print("$PROGRAM $VERSION\n"); 1002 | exit(); 1003 | } 1004 | 1005 | my $usage="Usage: $0 [options] [input-files] 1006 | 1007 | Options summary: 1008 | short long parameter meaning 1009 | ========================================================== 1010 | -i --input specify input encoding 1011 | -o --output specify output encoding 1012 | -s --sort sort input 1013 | -k --keymap generate linux keymap hack 1014 | -C --compose generate X Compose keymap hack 1015 | -h --help print this help message 1016 | -V --version print version number 1017 | -- -- end of options 1018 | 1019 | Supported encodings: 1020 | s long name 1021 | =================================================================== 1022 | q viqr11 (viqr) VIQR 1.1 without quotes 1023 | v viscii11 (viscii) VISCII 1.1 (RFC1456) 1024 | w vietword (vw) VIETWORD 1.10 1025 | n tintuc (ndn) NDN's amateur Mac fonts from Tin Tu+'c 1026 | s vnsort (sort) VNSORT ready-to-sort format 1027 | 0 viscii10 (v10) VISCII 1.0 1028 | u unicode (uni) Unicode (ISO-10646/UCS-2 vietnamese subset) 1029 | "; 1030 | 1031 | ################################# SORT LINES ################################# 1032 | sub do_sort { 1033 | ### need an input encoding 1034 | if (!$input_encoding) { 1035 | die("cannot sort: input encoding not specified."); 1036 | } 1037 | 1038 | ### Slurp input 1039 | my @text = () ; 1040 | while (<>) { 1041 | push @text, $_; 1042 | } 1043 | 1044 | ### Sort it 1045 | my @sorted = vnsort ($get_vn_char,@text) ; 1046 | 1047 | ### Outputing it 1048 | print @sorted ; 1049 | 1050 | } 1051 | 1052 | 1053 | |# 1054 | 1055 | (init-vichar-tables) 1056 | (init-viqr-tables) 1057 | (init-viscii11-tables) 1058 | (init-unicode-tables) 1059 | --------------------------------------------------------------------------------