├── .gitattributes ├── .gitignore ├── src ├── port.lisp ├── uri │ ├── file.lisp │ ├── ftp.lisp │ ├── http.lisp │ └── ldap.lisp ├── error.lisp ├── uri.lisp ├── etld.lisp ├── encode.lisp ├── util.lisp ├── decode.lisp ├── domain.lisp ├── quri.lisp └── parser.lisp ├── t ├── benchmark.lisp ├── parser.lisp ├── encode.lisp ├── decode.lisp ├── etld.lisp ├── domain.lisp └── quri.lisp ├── .travis.yml ├── .github └── workflows │ └── ci.yml ├── quri-test.asd ├── quri.asd └── README.markdown /.gitattributes: -------------------------------------------------------------------------------- 1 | data/* linguist-vendored=false 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.fasl 2 | *.dx32fsl 3 | *.dx64fsl 4 | *.lx32fsl 5 | *.lx64fsl 6 | *.x86f 7 | *~ 8 | .#* -------------------------------------------------------------------------------- /src/port.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri.port 3 | (:use :cl) 4 | (:import-from :alexandria 5 | :plist-hash-table) 6 | (:export :scheme-default-port)) 7 | (in-package :quri.port) 8 | 9 | (defvar +default-ports+ 10 | (plist-hash-table 11 | '("ftp" 21 12 | "ssh" 22 13 | "telnet" 23 14 | "http" 80 15 | "ldap" 389 16 | "https" 443 17 | "ldaps" 636 18 | "ws" 80 19 | "wss" 443) 20 | :test 'equal)) 21 | 22 | (defun scheme-default-port (scheme) 23 | (gethash scheme +default-ports+)) 24 | -------------------------------------------------------------------------------- /t/benchmark.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri-test.benchmark 3 | (:use :cl 4 | :quri) 5 | (:export :run-benchmark)) 6 | (in-package :quri-test.benchmark) 7 | 8 | (defun run-benchmark () 9 | (format t "~2&# QURI:URI~2%") 10 | (time 11 | (dotimes (i 100000) 12 | (quri:uri "http://www.ics.uci.edu/pub/ietf/uri/#Related"))) 13 | (format t "~2&# QURI:URL-DECODE~2%") 14 | (time 15 | (dotimes (i 100000) 16 | (quri:url-decode "/foo%E3%81%82"))) 17 | (format t "~2&# QURI:URL-ENCODE~2%") 18 | (time 19 | (dotimes (i 100000) 20 | (quri:url-encode "/fooあ")))) 21 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: common-lisp 2 | sudo: required 3 | 4 | env: 5 | global: 6 | - PATH=~/.roswell/bin:$PATH 7 | - ROSWELL_BRANCH=release 8 | - ROSWELL_INSTALL_DIR=$HOME/.roswell 9 | - COVERAGE_EXCLUDE=t 10 | matrix: 11 | - LISP=sbcl-bin COVERALLS=true 12 | - LISP=ccl-bin 13 | - LISP=abcl 14 | - LISP=clisp 15 | - LISP=ecl 16 | - LISP=allegro 17 | - LISP=cmucl 18 | 19 | matrix: 20 | allow_failures: 21 | - env: LISP=clisp 22 | - env: LISP=cmucl 23 | 24 | install: 25 | - curl -L https://raw.githubusercontent.com/roswell/roswell/$ROSWELL_BRANCH/scripts/install-for-ci.sh | sh 26 | - ros install prove 27 | 28 | script: 29 | - ros -s quri-test 30 | - run-prove quri-test.asd 31 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push] 4 | 5 | jobs: 6 | test: 7 | name: ${{ matrix.lisp }} on ${{ matrix.os }} 8 | runs-on: ${{ matrix.os }} 9 | strategy: 10 | matrix: 11 | lisp: [sbcl-bin] 12 | os: [ubuntu-latest] 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | - name: Install Roswell 17 | env: 18 | LISP: ${{ matrix.lisp }} 19 | run: curl -L https://raw.githubusercontent.com/roswell/roswell/master/scripts/install-for-ci.sh | sh -x 20 | - name: Install Prove 21 | run: ros install prove cl-utilities 22 | - name: Run tests 23 | run: | 24 | PATH="~/.roswell/bin:$PATH" 25 | ros -s quri-test 26 | run-prove quri-test.asd 27 | -------------------------------------------------------------------------------- /t/parser.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri-test.parser 3 | (:use :cl 4 | :quri.parser 5 | :prove)) 6 | (in-package :quri-test.parser) 7 | 8 | (plan nil) 9 | 10 | (subtest "parser string bounds" 11 | (is (nth-value 0 (parse-uri "foo://bar")) "foo") 12 | (is (nth-value 0 (parse-uri "foo://bar" :start 4)) nil) 13 | (is (nth-value 4 (parse-uri "foo://bar/xyz?a=b#c")) "/xyz") 14 | (is (nth-value 4 (parse-uri "foo://bar/xyz?a=b#c" :end 12)) "/xy") 15 | (is (nth-value 5 (parse-uri "foo://bar/xyz?a=b#c")) "a=b") 16 | (is (nth-value 5 (parse-uri "foo://bar/xyz?a=b#c" :end 13)) nil) 17 | (is (nth-value 6 (parse-uri "foo://bar/xyz?a=b#c")) "c") 18 | (is (nth-value 6 (parse-uri "foo://bar/xyz?a=b#c" :end 17)) nil)) 19 | 20 | (finalize) 21 | -------------------------------------------------------------------------------- /quri-test.asd: -------------------------------------------------------------------------------- 1 | #| 2 | This file is a part of quri project. 3 | Copyright (c) 2014 Eitaro Fukamachi (e.arrows@gmail.com) 4 | |# 5 | 6 | (defsystem quri-test 7 | :author "Eitaro Fukamachi" 8 | :license "BSD 3-Clause" 9 | :depends-on (:quri 10 | :prove) 11 | :components ((:module "t" 12 | :components 13 | ((:test-file "quri") 14 | (:test-file "parser") 15 | (:test-file "decode") 16 | (:test-file "encode") 17 | (:test-file "domain") 18 | (:test-file "etld") 19 | (:file "benchmark")))) 20 | 21 | :defsystem-depends-on (:prove-asdf) 22 | :perform (test-op :after (op c) 23 | (funcall (intern #.(string :run-test-system) :prove-asdf) c) 24 | (asdf:clear-system c))) 25 | -------------------------------------------------------------------------------- /src/uri/file.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri.uri.file 3 | (:use :cl) 4 | (:import-from :quri.uri 5 | :uri 6 | :scheme 7 | :port 8 | :uri-path) 9 | (:export :uri-file 10 | :uri-file-p 11 | :make-uri-file 12 | :uri-file-pathname)) 13 | (in-package :quri.uri.file) 14 | 15 | (defstruct (uri-file (:include uri (scheme "file") (port nil)) 16 | (:constructor %make-uri-file))) 17 | 18 | (defun make-uri-file (&rest initargs &key path &allow-other-keys) 19 | (when (pathnamep path) 20 | (setf (getf initargs :path) 21 | (uiop:native-namestring path))) 22 | (apply #'%make-uri-file initargs)) 23 | 24 | (declaim (ftype (function (uri-file) pathname) uri-file-pathname)) 25 | (defun uri-file-pathname (file) 26 | "Get a lisp pathname object from a file URI. 27 | Assumes that the path of the file URI is correct path syntax for the environment." 28 | (parse-namestring (uri-path file))) 29 | -------------------------------------------------------------------------------- /src/error.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri.error 3 | (:use :cl) 4 | (:export :uri-error 5 | 6 | :uri-malformed-string 7 | :uri-invalid-port 8 | 9 | :url-decoding-error 10 | 11 | :uri-malformed-urlencoded-string)) 12 | (in-package :quri.error) 13 | 14 | (define-condition uri-error (error) ()) 15 | 16 | (define-condition uri-malformed-string (uri-error) 17 | ((data :initarg :data) 18 | (position :initarg :position)) 19 | (:report (lambda (condition stream) 20 | (with-slots (data position) condition 21 | (format stream "URI ~S contains an illegal character ~S at position ~S." 22 | data (aref data position) position))))) 23 | (define-condition uri-invalid-port (uri-malformed-string) 24 | () 25 | (:report (lambda (condition stream) 26 | (with-slots (data position) condition 27 | (format stream "URI ~S contains an illegal character ~S at position ~S." 28 | data (aref data position) position))))) 29 | 30 | (define-condition url-decoding-error (uri-error) ()) 31 | 32 | (define-condition uri-malformed-urlencoded-string (uri-error) ()) 33 | -------------------------------------------------------------------------------- /t/encode.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri-test.encode 3 | (:use :cl 4 | :quri.encode 5 | :prove)) 6 | (in-package :quri-test.encode) 7 | 8 | (plan 2) 9 | 10 | (subtest "url-encode" 11 | (is (url-encode "Tiffany") "Tiffany") 12 | (is (url-encode "Tiffany & Co.") "Tiffany%20%26%20Co.") 13 | (is (url-encode "Tiffany & Co." :space-to-plus t) 14 | "Tiffany+%26+Co.") 15 | (is (url-encode "{\"field\": \"test\", \"data\": 0, \"products\": {\"name\": \"apples\"}, \"status\": true}") 16 | "%7B%22field%22%3A%20%22test%22%2C%20%22data%22%3A%200%2C%20%22products%22%3A%20%7B%22name%22%3A%20%22apples%22%7D%2C%20%22status%22%3A%20true%7D")) 17 | 18 | (subtest "url-encode-params" 19 | (is (url-encode-params '(("a" . "b") ("c" . "d"))) 20 | "a=b&c=d") 21 | (is (url-encode-params 22 | `(("a" . ,(make-array 1 :element-type '(unsigned-byte 8) 23 | :initial-contents (list (char-code #\b)))))) 24 | "a=b") 25 | (is (url-encode-params '(("a" . "b") ("c" . 1))) 26 | "a=b&c=1") 27 | (is (let ((*print-base* 2)) 28 | (url-encode-params '(("a" . 5)))) 29 | "a=5") 30 | (is (url-encode-params '(("alpha" . "абв"))) 31 | "alpha=%D0%B0%D0%B1%D0%B2") 32 | (is (url-encode-params '(("alpha" . "абв")) :percent-encode nil) 33 | "alpha=абв")) 34 | 35 | (finalize) 36 | -------------------------------------------------------------------------------- /quri.asd: -------------------------------------------------------------------------------- 1 | (defsystem "quri" 2 | :version "0.7.0" 3 | :author "Eitaro Fukamachi" 4 | :maintainer "André A. Gomes" 5 | :license "BSD 3-Clause" 6 | :depends-on ("babel" 7 | "alexandria" 8 | "split-sequence" 9 | "cl-utilities" 10 | "idna" 11 | #+sbcl "sb-cltl2") 12 | :components ((:module "src" 13 | :components 14 | ((:file "quri" :depends-on ("uri" "uri-classes" "domain" "parser" "decode" "encode" "error")) 15 | (:file "uri" :depends-on ("port")) 16 | (:module "uri-classes" 17 | :pathname "uri" 18 | :depends-on ("uri" "port" "encode" "decode") 19 | :components 20 | ((:file "ftp") 21 | (:file "http") 22 | (:file "ldap") 23 | (:file "file"))) 24 | (:file "domain" :depends-on ("uri" "etld")) 25 | (:file "etld") 26 | (:file "parser" :depends-on ("error" "util")) 27 | (:file "decode" :depends-on ("error" "util")) 28 | (:file "encode" :depends-on ("error" "util")) 29 | (:file "port") 30 | (:file "util") 31 | (:file "error")))) 32 | :description "Yet another URI library for Common Lisp" 33 | :in-order-to ((test-op (test-op "quri-test")))) 34 | -------------------------------------------------------------------------------- /src/uri/ftp.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri.uri.ftp 3 | (:use :cl) 4 | (:import-from :quri.uri 5 | :uri 6 | :scheme 7 | :port 8 | :uri-path) 9 | (:import-from :quri.port 10 | :scheme-default-port) 11 | (:export :uri-ftp 12 | :uri-ftp-p 13 | :uri-ftp-typecode 14 | :make-uri-ftp)) 15 | (in-package :quri.uri.ftp) 16 | 17 | (defstruct (uri-ftp (:include uri (scheme "ftp") (port #.(scheme-default-port "ftp"))) 18 | (:constructor %make-uri-ftp)) 19 | typecode) 20 | 21 | (defun make-uri-ftp (&rest initargs) 22 | (let ((ftp (apply #'%make-uri-ftp initargs))) 23 | (multiple-value-bind (path typecode) 24 | (parse-ftp-typecode (uri-path ftp)) 25 | (when path 26 | (setf (uri-path ftp) path 27 | (uri-ftp-typecode ftp) typecode))) 28 | ftp)) 29 | 30 | (defun parse-ftp-typecode (path) 31 | (let ((len (length path))) 32 | (when (and (< #.(length ";type=") len) 33 | (string= path ";type=" 34 | :start1 (- len 1 #.(length ";type=")) 35 | :end1 (1- len))) 36 | (let ((typecode (aref path (1- len)))) 37 | (when (or (char= typecode #\a) 38 | (char= typecode #\i) 39 | (char= typecode #\d)) 40 | (values (subseq path 0 (- len #.(1+ (length ";type=")))) 41 | typecode)))))) 42 | -------------------------------------------------------------------------------- /src/uri/http.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri.uri.http 3 | (:use :cl) 4 | (:import-from :quri.uri 5 | :uri 6 | :scheme 7 | :port 8 | :uri-query) 9 | (:import-from :quri.port 10 | :scheme-default-port) 11 | (:import-from :quri.encode 12 | :url-encode-params) 13 | (:import-from :quri.decode 14 | :url-decode-params) 15 | (:import-from :alexandria 16 | :when-let) 17 | (:export :uri-http 18 | :make-uri-http 19 | :uri-http-p 20 | 21 | :uri-https 22 | :make-uri-https 23 | :uri-https-p 24 | 25 | :uri-query-params)) 26 | (in-package :quri.uri.http) 27 | 28 | (defstruct (uri-http (:include uri (scheme "http") (port #.(scheme-default-port "http"))))) 29 | 30 | (defstruct (uri-https (:include uri-http (scheme "https") (port #.(scheme-default-port "https"))))) 31 | 32 | (defun uri-query-params (http &key (lenient t) (percent-decode t)) 33 | (when-let (query (uri-query http)) 34 | (url-decode-params query 35 | :lenient lenient 36 | :percent-decode percent-decode))) 37 | 38 | (defun (setf uri-query-params) (new http &key lenient (percent-encode t)) 39 | (declare (ignore lenient)) 40 | (setf (uri-query http) 41 | (if new 42 | (url-encode-params 43 | new :percent-encode percent-encode) 44 | nil))) 45 | -------------------------------------------------------------------------------- /t/decode.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri-test.decode 3 | (:use :cl 4 | :quri.decode 5 | :prove)) 6 | (in-package :quri-test.decode) 7 | 8 | (plan 8) 9 | 10 | (is (url-decode-params "a=b&c=d") 11 | '(("a" . "b") ("c" . "d")) 12 | "normal case") 13 | 14 | (is (url-decode-params "a=b&c=d&e") 15 | '(("a" . "b") ("c" . "d") ("e")) 16 | "field only") 17 | 18 | (is (url-decode-params "alpha=%D0%B0%D0%B1%D0%B2") 19 | '(("alpha" . "абв")) 20 | "percent decode") 21 | 22 | (is (url-decode-params "alpha=%D0%B0%D0%B1%D0%B2" :percent-decode nil) 23 | '(("alpha" . "%D0%B0%D0%B1%D0%B2")) 24 | "no percent decode") 25 | 26 | (is-error (url-decode-params "a=b=c") 27 | 'quri:uri-malformed-urlencoded-string 28 | "Raise a malformed error") 29 | 30 | (is (url-decode-params "a=b=c" :lenient t) 31 | '(("a" . "b=c")) 32 | ":lenient t") 33 | 34 | (is-error (url-decode-params "a=%!@#&b=1") 35 | 'quri:url-decoding-error 36 | "Raise a decoding error") 37 | 38 | (is (url-decode-params "a=%!@#&b=1" :lenient t) 39 | '(("a" . "%!@#") ("b" . "1"))) 40 | 41 | (defvar *replacement-character* 42 | #+abcl 43 | (babel:octets-to-string (coerce #(239 191 189) '(array (unsigned-byte 8) (3)))) 44 | #-abcl 45 | (princ-to-string #\replacement_character)) 46 | 47 | (is (url-decode "%bf" :lenient t) 48 | *replacement-character*) 49 | 50 | (is (url-decode-params "%bf" :lenient t) 51 | `((,*replacement-character*))) 52 | 53 | (finalize) 54 | -------------------------------------------------------------------------------- /src/uri.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri.uri 3 | (:use :cl) 4 | (:import-from :quri.port 5 | :scheme-default-port) 6 | (:export :uri 7 | :make-basic-uri 8 | :uri-p 9 | :uri-scheme 10 | :uri-userinfo 11 | :uri-host 12 | :uri-port 13 | :uri-path 14 | :uri-query 15 | :uri-fragment 16 | :uri-authority 17 | 18 | :urn 19 | :make-urn 20 | :urn-p 21 | :urn-nid 22 | :urn-nss)) 23 | (in-package :quri.uri) 24 | 25 | (defstruct (uri (:constructor %make-uri)) 26 | (scheme nil :read-only t) 27 | userinfo 28 | host 29 | port 30 | path 31 | query 32 | fragment) 33 | 34 | (defmethod make-load-form ((object uri) &optional environment) 35 | (make-load-form-saving-slots object :environment environment)) 36 | 37 | (defun make-basic-uri (&rest args &key scheme userinfo host port path query fragment) 38 | (declare (ignore scheme userinfo host port path query fragment)) 39 | (let ((uri (apply #'%make-uri args))) 40 | (unless (uri-port uri) 41 | (setf (uri-port uri) (scheme-default-port (uri-scheme uri)))) 42 | (when (pathnamep (uri-path uri)) 43 | (setf (uri-path uri) 44 | (uiop:native-namestring (uri-path uri)))) 45 | uri)) 46 | 47 | (defun uri-authority (uri) 48 | (when (uri-host uri) 49 | (let ((default-port (scheme-default-port (uri-scheme uri)))) 50 | (with-standard-io-syntax 51 | (format nil "~:[~;~:*~A@~]~A~:[:~A~;~*~]" 52 | (uri-userinfo uri) 53 | (uri-host uri) 54 | (eql (uri-port uri) default-port) 55 | (uri-port uri)))))) 56 | 57 | (defstruct (urn (:include uri (scheme :urn)) 58 | (:constructor %make-urn)) 59 | nid 60 | nss) 61 | 62 | (defun make-urn (&rest initargs) 63 | (let ((urn (apply #'%make-urn initargs))) 64 | (when (uri-path urn) 65 | (let ((colon-pos (position #\: (uri-path urn)))) 66 | (if colon-pos 67 | (setf (urn-nid urn) (subseq (uri-path urn) 0 colon-pos) 68 | (urn-nss urn) (subseq (uri-path urn) (1+ colon-pos))) 69 | (setf (urn-nid urn) (uri-path urn))))) 70 | urn)) 71 | -------------------------------------------------------------------------------- /src/uri/ldap.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri.uri.ldap 3 | (:use :cl) 4 | (:import-from :quri.uri 5 | :uri 6 | :scheme 7 | :port 8 | :uri-path 9 | :uri-query) 10 | (:import-from :quri.port 11 | :scheme-default-port) 12 | (:import-from :split-sequence 13 | :split-sequence) 14 | (:import-from :alexandria 15 | :when-let) 16 | (:export :uri-ldap 17 | :make-uri-ldap 18 | :uri-ldap-p 19 | 20 | :uri-ldaps 21 | :make-uri-ldaps 22 | :uri-ldaps-p 23 | 24 | :uri-ldap-dn 25 | :uri-ldap-attributes 26 | :uri-ldap-scope 27 | :uri-ldap-filter 28 | :uri-ldap-extensions)) 29 | (in-package :quri.uri.ldap) 30 | 31 | (defstruct (uri-ldap (:include uri (scheme "ldap") (port #.(scheme-default-port "ldap"))))) 32 | 33 | (defstruct (uri-ldaps (:include uri-ldap (scheme "ldaps") (port #.(scheme-default-port "ldaps"))))) 34 | 35 | (defun uri-ldap-dn (ldap) 36 | (let ((path (uri-path ldap))) 37 | (when (and path 38 | (/= 0 (length path))) 39 | (if (char= (aref path 0) #\/) 40 | (subseq path 1) 41 | path)))) 42 | 43 | (defun (setf uri-ldap-dn) (new ldap) 44 | (setf (uri-path ldap) 45 | (concatenate 'string "/" new)) 46 | new) 47 | 48 | (defun nth-uri-ldap-lists (ldap n) 49 | (check-type ldap uri-ldap) 50 | (when-let (query (uri-query ldap)) 51 | (car (last (split-sequence #\? query :count n))))) 52 | 53 | (defun (setf nth-uri-ldap-lists) (new ldap n) 54 | (check-type ldap uri-ldap) 55 | (check-type new string) 56 | (let ((query (uri-query ldap))) 57 | (setf (uri-query ldap) 58 | (if query 59 | (let ((parts (split-sequence #\? query))) 60 | (with-output-to-string (s) 61 | (dotimes (i n) 62 | (princ (or (pop parts) "") s) 63 | (write-char #\? s)) 64 | (princ new s) 65 | (pop parts) ;; ignore 66 | (dolist (part parts) 67 | (write-char #\? s) 68 | (princ part s)))) 69 | new)))) 70 | 71 | (defun uri-ldap-attributes (ldap) 72 | (nth-uri-ldap-lists ldap 1)) 73 | (defun (setf uri-ldap-attributes) (new ldap) 74 | (setf (nth-uri-ldap-lists ldap 0) new)) 75 | 76 | (defun uri-ldap-scope (ldap) 77 | (nth-uri-ldap-lists ldap 2)) 78 | (defun (setf uri-ldap-scope) (new ldap) 79 | (setf (nth-uri-ldap-lists ldap 1) new)) 80 | 81 | (defun uri-ldap-filter (ldap) 82 | (nth-uri-ldap-lists ldap 3)) 83 | (defun (setf uri-ldap-filter) (new ldap) 84 | (setf (nth-uri-ldap-lists ldap 2) new)) 85 | 86 | (defun uri-ldap-extensions (ldap) 87 | (nth-uri-ldap-lists ldap 4)) 88 | (defun (setf uri-ldap-extensions) (new ldap) 89 | (setf (nth-uri-ldap-lists ldap 3) new)) 90 | -------------------------------------------------------------------------------- /t/etld.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri-test.etld 3 | (:use :cl 4 | :quri.etld 5 | :prove)) 6 | (in-package :quri-test.etld) 7 | 8 | (plan nil) 9 | 10 | (subtest "parse-domain" 11 | (is (parse-domain "com") nil) 12 | (is (parse-domain "example.com") "example.com") 13 | (is (parse-domain "www.example.com") "example.com") 14 | (is (parse-domain "uk.com") nil) 15 | (is (parse-domain "example.uk.com") "example.uk.com") 16 | (is (parse-domain "b.example.uk.com") "example.uk.com") 17 | (is (parse-domain "a.b.example.uk.com") "example.uk.com") 18 | (is (parse-domain "test.ac") "test.ac") 19 | 20 | ;; TLD with only 1 (wildcard) rule 21 | (is (parse-domain "cy") nil) 22 | (is (parse-domain "c.cy") nil) 23 | (is (parse-domain "b.c.cy") "b.c.cy") 24 | (is (parse-domain "a.b.c.cy") "b.c.cy") 25 | 26 | ;; jp domain 27 | (is (parse-domain "jp") nil) 28 | (is (parse-domain "test.jp") "test.jp") 29 | (is (parse-domain "www.test.jp") "test.jp") 30 | (is (parse-domain "ac.jp") nil) 31 | (is (parse-domain "test.ac.jp") "test.ac.jp") 32 | (is (parse-domain "kyoto.jp") nil) 33 | (is (parse-domain "test.kyoto.jp") "test.kyoto.jp") 34 | (is (parse-domain "ide.kyoto.jp") nil) 35 | (is (parse-domain "b.ide.kyoto.jp") "b.ide.kyoto.jp") 36 | (is (parse-domain "a.b.ide.kyoto.jp") "b.ide.kyoto.jp") 37 | (is (parse-domain "c.kobe.jp") nil) 38 | (is (parse-domain "b.c.kobe.jp") "b.c.kobe.jp") 39 | (is (parse-domain "a.b.c.kobe.jp") "b.c.kobe.jp") 40 | (is (parse-domain "city.kobe.jp") "city.kobe.jp") 41 | (is (parse-domain "www.city.kobe.jp") "city.kobe.jp") 42 | 43 | ;; TLD with a wildcard rule and exceptions 44 | (is (parse-domain "ck") nil) 45 | (is (parse-domain "test.ck") nil) 46 | (is (parse-domain "b.test.ck") "b.test.ck") 47 | (is (parse-domain "a.b.test.ck") "b.test.ck") 48 | (is (parse-domain "www.ck") "www.ck") 49 | (is (parse-domain "www.www.ck") "www.ck") 50 | 51 | ;; US K12 52 | (is (parse-domain "us") nil) 53 | (is (parse-domain "test.us") "test.us") 54 | (is (parse-domain "www.test.us") "test.us") 55 | (is (parse-domain "ak.us") nil) 56 | (is (parse-domain "test.ak.us") "test.ak.us") 57 | (is (parse-domain "www.test.ak.us") "test.ak.us") 58 | (is (parse-domain "k12.ak.us") nil) 59 | (is (parse-domain "test.k12.ak.us") "test.k12.ak.us") 60 | (is (parse-domain "www.test.k12.ak.us") "test.k12.ak.us") 61 | 62 | ;; IDN labels. 63 | (is (parse-domain "公司.cn") nil) 64 | 65 | ;; Unlisted TLD 66 | (is (parse-domain "example") "example") 67 | (is (parse-domain "example.example") "example.example") 68 | (is (parse-domain "b.example.example") "example.example") 69 | (is (parse-domain "a.b.example.example") "example.example") 70 | 71 | ;; Listed TLD, but non-Internet TLD 72 | (is (parse-domain "local") "local") 73 | (is (parse-domain "example.local") "example.local") 74 | (is (parse-domain "b.example.local") "example.local") 75 | (is (parse-domain "a.b.example.local") "example.local")) 76 | 77 | (finalize) 78 | -------------------------------------------------------------------------------- /src/etld.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri.etld 3 | (:use :cl) 4 | (:import-from :alexandria 5 | :starts-with-subseq 6 | :ends-with-subseq) 7 | (:export :parse-domain)) 8 | (in-package :quri.etld) 9 | 10 | (eval-when (:compile-toplevel :load-toplevel :execute) 11 | (defvar *default-etld-names* 12 | #.(asdf:system-relative-pathname :quri #P"data/effective_tld_names.dat")) 13 | 14 | (defun load-etld-data (&optional (etld-names-file *default-etld-names*)) 15 | (with-open-file (in etld-names-file 16 | :element-type #+lispworks :default #-lispworks 'character 17 | :external-format #+clisp charset:utf-8 #-clisp :utf-8) 18 | (loop with special-tlds = nil 19 | with normal-tlds = (make-hash-table :test 'equal) 20 | with wildcard-tlds = (make-hash-table :test 'equal) 21 | for line = (read-line in nil nil) 22 | while line 23 | unless (or (= 0 (length line)) 24 | (starts-with-subseq "//" line)) 25 | do (cond 26 | ((starts-with-subseq "*" line) 27 | (setf (gethash (subseq line 2) wildcard-tlds) t)) 28 | ((starts-with-subseq "!" line) 29 | (push (subseq line 1) special-tlds)) 30 | (t 31 | (setf (gethash line normal-tlds) t))) 32 | finally (return (list normal-tlds wildcard-tlds special-tlds)))))) 33 | 34 | (defvar *etlds* 35 | #+(or abcl (and ecl win32 msvc)) (load-etld-data) 36 | #-(or abcl (and ecl win32 msvc)) '#.(load-etld-data)) 37 | 38 | (defun next-subdomain (hostname &optional (start 0)) 39 | (let ((pos (position #\. hostname :start start))) 40 | (when pos 41 | (incf pos) 42 | (values (subseq hostname pos) 43 | pos)))) 44 | 45 | (defun make-subdomain-iter (hostname) 46 | (let ((current-pos 0) 47 | (first t)) 48 | (lambda () 49 | (block nil 50 | (when first 51 | (setq first nil) 52 | (return hostname)) 53 | (multiple-value-bind (subdomain pos) 54 | (next-subdomain hostname current-pos) 55 | (when subdomain 56 | (setf current-pos pos) 57 | subdomain)))))) 58 | 59 | (defun parse-domain (hostname) 60 | (dolist (tld (third *etlds*)) 61 | (when (ends-with-subseq tld hostname) 62 | (if (= (length tld) (length hostname)) 63 | (return-from parse-domain hostname) 64 | (when (char= (aref hostname (- (length hostname) (length tld) 1)) 65 | #\.) 66 | (return-from parse-domain 67 | (subseq hostname 68 | (- (length hostname) (length tld)))))))) 69 | 70 | (loop with iter = (make-subdomain-iter hostname) 71 | with pre-prev-subdomain = nil 72 | with prev-subdomain = nil 73 | for subdomain = (funcall iter) 74 | while subdomain 75 | if (gethash subdomain (second *etlds*)) do 76 | (return pre-prev-subdomain) 77 | else if (gethash subdomain (first *etlds*)) do 78 | (return (if (string= subdomain hostname) 79 | nil 80 | prev-subdomain)) 81 | do (setf pre-prev-subdomain prev-subdomain 82 | prev-subdomain subdomain) 83 | finally 84 | (let* ((pos (position #\. hostname :from-end t)) 85 | (pos (and pos 86 | (position #\. hostname :from-end t :end pos)))) 87 | (return 88 | (if pos 89 | (subseq hostname (1+ pos)) 90 | hostname))))) 91 | -------------------------------------------------------------------------------- /t/domain.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri-test.domain 3 | (:use :cl 4 | :quri.domain 5 | :prove)) 6 | (in-package :quri-test.domain) 7 | 8 | (plan nil) 9 | 10 | (subtest "ipv4-addr-p" 11 | (ok (ipv4-addr-p "127.0.0.1") 12 | "127.0.0.1 is valid") 13 | (ok (ipv4-addr-p "255.255.255.255") 14 | "255.255.255.255 is valid") 15 | (ok (not (ipv4-addr-p "256.255.255.255")) 16 | "256.255.255.255 is not valid") 17 | (ok (not (ipv4-addr-p "345.23.1.0")) 18 | "345.23.1.0 is not valid") 19 | (ok (not (ipv4-addr-p "127.0")) 20 | "127.0 is not valid") 21 | (ok (not (ipv4-addr-p "127.0.0.0.1")) 22 | "127.0.0.0.1 is not valid") 23 | (ok (not (ipv4-addr-p "2ch.net")) 24 | "2ch.net is not valid") 25 | (ok (not (ipv4-addr-p "127..0.1")) 26 | "127..0.1 is not valid") 27 | (ok (not (ipv4-addr-p "...")) 28 | "... is not valid")) 29 | 30 | (subtest "ipv6-addr-p" 31 | (ok (ipv6-addr-p "2001:0db8:bd05:01d2:288a:1fc0:0001:10ee")) 32 | (ok (ipv6-addr-p "2001:db8:20:3:1000:100:20:3")) 33 | (ok (ipv6-addr-p "2001:db8::1234:0:0:9abc")) 34 | (ok (ipv6-addr-p "2001:db8::9abc")) 35 | (ok (ipv6-addr-p "::1")) 36 | (ok (ipv6-addr-p "::")) 37 | (ok (ipv6-addr-p "1::")) 38 | (ok (not (ipv6-addr-p "1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1")))) 39 | 40 | (subtest "ip-addr=" 41 | (is (ip-addr= "127.0.0.1" "127.0.0.1") t) 42 | (is (ip-addr= "127.0.0.1" "127.0.0.2") nil) 43 | (is (ip-addr= "127.0.0.1" "localhost") nil) 44 | (is (ip-addr= "::1" "0:0:0:0:0:0:0:1") t) 45 | (is (ip-addr= "[::1]" "0:0:0:0:0:0:0:1") t) 46 | (is (ip-addr= "[::1]" "0:0:0:0:0:0:0:2") nil)) 47 | 48 | (subtest "cookie-domain-p" 49 | (is (cookie-domain-p "com" "com") nil) 50 | (is (cookie-domain-p "com" "example.com") nil) 51 | (is (cookie-domain-p "com" "foo.example.com") nil) 52 | (is (cookie-domain-p "com" "bar.foo.example.com") nil) 53 | 54 | (is (cookie-domain-p "example.com" "com") nil) 55 | (is (cookie-domain-p "example.com" "example.com") t) 56 | (is (cookie-domain-p "example.com" "foo.example.com") nil) 57 | (is (cookie-domain-p "example.com" "bar.foo.example.com") nil) 58 | 59 | (is (cookie-domain-p "foo.example.com" "com") nil) 60 | (is (cookie-domain-p "foo.example.com" "example.com") t) 61 | (is (cookie-domain-p "foo.example.com" "foo.example.com") t) 62 | (is (cookie-domain-p "foo.example.com" "bar.foo.example.com") nil) 63 | 64 | (is (cookie-domain-p "b.sapporo.jp" "jp") nil) 65 | (is (cookie-domain-p "b.sapporo.jp" "sapporo.jp") nil) 66 | (is (cookie-domain-p "b.sapporo.jp" "b.sapporo.jp") nil) 67 | (is (cookie-domain-p "b.sapporo.jp" "a.b.sapporo.jp") nil) 68 | 69 | (is (cookie-domain-p "b.c.sapporo.jp" "jp") nil) 70 | (is (cookie-domain-p "b.c.sapporo.jp" "sapporo.jp") nil) 71 | (is (cookie-domain-p "b.c.sapporo.jp" "c.sapporo.jp") nil) 72 | (is (cookie-domain-p "b.c.sapporo.jp" "b.c.sapporo.jp") t) 73 | (is (cookie-domain-p "b.c.sapporo.jp" "a.b.c.sapporo.jp") nil) 74 | 75 | (is (cookie-domain-p "b.c.d.sapporo.jp" "jp") nil) 76 | (is (cookie-domain-p "b.c.d.sapporo.jp" "sapporo.jp") nil) 77 | (is (cookie-domain-p "b.c.d.sapporo.jp" "d.sapporo.jp") nil) 78 | (is (cookie-domain-p "b.c.d.sapporo.jp" "c.d.sapporo.jp") t) 79 | (is (cookie-domain-p "b.c.d.sapporo.jp" "b.c.d.sapporo.jp") t) 80 | (is (cookie-domain-p "b.c.d.sapporo.jp" "a.b.c.d.sapporo.jp") nil) 81 | 82 | (is (cookie-domain-p "city.sapporo.jp" "jp") nil) 83 | (is (cookie-domain-p "city.sapporo.jp" "sapporo.jp") nil) 84 | (is (cookie-domain-p "city.sapporo.jp" "city.sapporo.jp") t) 85 | (is (cookie-domain-p "city.sapporo.jp" "a.city.sapporo.jp") nil) 86 | 87 | (is (cookie-domain-p "b.city.sapporo.jp" "jp") nil) 88 | (is (cookie-domain-p "b.city.sapporo.jp" "sapporo.jp") nil) 89 | (is (cookie-domain-p "b.city.sapporo.jp" "city.sapporo.jp") t) 90 | (is (cookie-domain-p "b.city.sapporo.jp" "b.city.sapporo.jp") t) 91 | (is (cookie-domain-p "b.city.sapporo.jp" "a.b.city.sapporo.jp") nil)) 92 | 93 | (finalize) 94 | -------------------------------------------------------------------------------- /src/encode.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri.encode 3 | (:use :cl 4 | :quri.util) 5 | (:import-from :babel-encodings 6 | :*default-character-encoding*) 7 | (:export :url-encode 8 | :url-encode-params)) 9 | (in-package :quri.encode) 10 | 11 | (declaim (type (simple-array character (16)) +hexdigit-char+)) 12 | (defvar +hexdigit-char+ 13 | (let ((ary (make-array 16 :element-type 'character))) 14 | (loop for char across "0123456789ABCDEF" 15 | for i from 0 16 | do (setf (aref ary i) char)) 17 | ary)) 18 | 19 | (defun integer-to-hexdigit (byte) 20 | (declare (type (unsigned-byte 8) byte) 21 | (optimize (speed 3) (safety 0))) 22 | (let ((res (make-string 2))) 23 | (multiple-value-bind (quotient remainder) 24 | (floor byte 16) 25 | (setf (aref res 0) (aref +hexdigit-char+ quotient) 26 | (aref res 1) (aref +hexdigit-char+ remainder))) 27 | res)) 28 | 29 | (defun unreservedp (byte) 30 | (declare (type (unsigned-byte 8) byte) 31 | (optimize (speed 3) (safety 0))) 32 | (or (<= (char-code #\A) byte (char-code #\Z)) 33 | (<= (char-code #\a) byte (char-code #\z)) 34 | (<= (char-code #\0) byte (char-code #\9)) 35 | #.`(or ,@(loop for char across "-._~" 36 | collect `(= byte ,(char-code char)))))) 37 | 38 | (declaim (type (simple-array string (97)) +byte-to-string+)) 39 | (defvar +byte-to-string+ 40 | (let ((ary (make-array 97 :element-type 'string :initial-element ""))) 41 | (loop for i from 0 to 96 42 | unless (unreservedp i) 43 | do (setf (aref ary i) (integer-to-hexdigit i))) 44 | ary)) 45 | 46 | (defun url-encode (data &key 47 | (encoding babel-encodings:*default-character-encoding*) 48 | (start 0) 49 | end 50 | space-to-plus) 51 | (declare (type (or string simple-byte-vector) data) 52 | (type integer start) 53 | (optimize (speed 3) (safety 2))) 54 | (let* ((octets (if (stringp data) 55 | (babel:string-to-octets data :encoding encoding :start start :end end) 56 | data)) 57 | (res (make-array (* (length octets) 3) :element-type 'character :fill-pointer t)) 58 | (i 0)) 59 | (declare (type simple-byte-vector octets) 60 | (type string res) 61 | (type integer i)) 62 | (loop for byte of-type (unsigned-byte 8) across octets do 63 | (cond 64 | ((and space-to-plus 65 | (= byte #.(char-code #\Space))) 66 | (setf (aref res i) #\+) 67 | (incf i)) 68 | ((< byte #.(char-code #\a)) 69 | (locally (declare (optimize (speed 3) (safety 0))) 70 | (let ((converted (aref +byte-to-string+ byte))) 71 | (if (zerop (length converted)) 72 | (progn 73 | (setf (aref res i) (code-char byte)) 74 | (incf i)) 75 | (progn 76 | (setf (aref res i) #\%) 77 | (incf i) 78 | (replace res converted :start1 i) 79 | (incf i 2)))))) 80 | ((unreservedp byte) 81 | (setf (aref res i) (code-char byte)) 82 | (incf i)) 83 | (t 84 | (setf (aref res i) #\%) 85 | (incf i) 86 | (replace res (integer-to-hexdigit byte) :start1 i) 87 | (incf i 2)))) 88 | (setf (fill-pointer res) i) 89 | res)) 90 | 91 | (defun url-encode-params (params-alist 92 | &key 93 | (encoding babel-encodings:*default-character-encoding*) 94 | space-to-plus 95 | (percent-encode t)) 96 | (declare (optimize (speed 3))) 97 | (check-type params-alist list) 98 | (flet ((maybe-encode (string) 99 | (if percent-encode 100 | (url-encode string 101 | :encoding encoding 102 | :space-to-plus space-to-plus) 103 | string))) 104 | (with-output-to-string (s) 105 | (loop for ((field . value) . rest) on params-alist do 106 | (write-string (maybe-encode field) s) 107 | (when value 108 | (write-char #\= s) 109 | (check-type value (or string number simple-byte-vector)) 110 | (write-string (maybe-encode 111 | (if (numberp value) 112 | (with-standard-io-syntax 113 | (write-to-string value)) 114 | value)) 115 | s)) 116 | (when rest 117 | (write-char #\& s)))))) 118 | -------------------------------------------------------------------------------- /src/util.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri.util 3 | (:use :cl) 4 | (:import-from :alexandria 5 | :with-gensyms) 6 | (:export :simple-byte-vector 7 | :standard-alpha-char-p 8 | :standard-alpha-byte-p 9 | :standard-alphanumeric-p 10 | :standard-alphanumeric-byte-p 11 | :with-array-parsing 12 | :with-string-parsing 13 | :with-byte-array-parsing 14 | :redo 15 | :gonext 16 | :goto)) 17 | (in-package :quri.util) 18 | 19 | (deftype simple-byte-vector (&optional (len '*)) `(simple-array (unsigned-byte 8) (,len))) 20 | 21 | (defun standard-alpha-char-p (char) 22 | (declare (type character char) 23 | (optimize (speed 3) (safety 0))) 24 | (standard-alpha-byte-p (char-code char))) 25 | 26 | (defun standard-alpha-byte-p (byte) 27 | (declare (type (unsigned-byte 8) byte) 28 | (optimize (speed 3) (safety 0))) 29 | (or (<= #.(char-code #\A) byte #.(char-code #\Z)) 30 | (<= #.(char-code #\a) byte #.(char-code #\z)))) 31 | 32 | (defun standard-alphanumeric-p (char) 33 | (declare (type character char) 34 | (optimize (speed 3) (safety 0))) 35 | (or (digit-char-p char) 36 | (standard-alpha-char-p char))) 37 | 38 | (defun standard-alphanumeric-byte-p (byte) 39 | (declare (type (unsigned-byte 8) byte) 40 | (optimize (speed 3) (safety 0))) 41 | (or (<= #.(char-code #\0) byte #.(char-code #\9)) 42 | (standard-alpha-byte-p byte))) 43 | 44 | (define-condition parsing-end-unexpectedly (simple-error) 45 | ((state :initarg :state 46 | :initform nil)) 47 | (:report (lambda (condition stream) 48 | (format stream "Parsing ended unexpectedly~:[~;~:* at ~A~]" 49 | (slot-value condition 'state))))) 50 | 51 | (define-condition no-next-state (simple-error) ()) 52 | 53 | (defmacro with-string-parsing ((elem p seq &optional (start 0) end key) &body body) 54 | `(let ((,elem #\Nul)) 55 | (declare (type character ,elem)) 56 | (%with-array-parsing (,elem ,p ,seq ,start ,end ,key) ,@body))) 57 | 58 | (defmacro with-byte-array-parsing ((elem p seq &optional (start 0) end key) &body body) 59 | `(let ((,elem 0)) 60 | (declare (type (unsigned-byte 8) ,elem)) 61 | (%with-array-parsing (,elem ,p ,seq ,start ,end ,key) ,@body))) 62 | 63 | (defmacro with-array-parsing ((elem p seq &optional (start 0) end key) &body body) 64 | `(let (,elem) 65 | (%with-array-parsing (,elem ,p ,seq ,start ,end ,key) ,@body))) 66 | 67 | (defmacro %with-array-parsing ((elem p seq &optional (start 0) end key) &body body) 68 | (with-gensyms (g-end no-next-state last key-fn) 69 | (let ((eof-exists nil)) 70 | `(let (,@(and key `((,key-fn ,key))) 71 | (,p ,start) 72 | (,g-end (locally (declare #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note)) 73 | (or ,end (length ,seq))))) 74 | (declare (ignorable ,p ,g-end)) 75 | ,@(loop for (exp . rest) on body 76 | while (and (listp exp) (eq (car exp) 'declare)) 77 | collect exp 78 | do (setq body rest)) 79 | (macrolet ((goto (tag &optional (amount 1)) 80 | `(locally (declare (optimize (speed 3) (safety 0))) 81 | (incf ,',p ,amount) 82 | ,@(if (eql amount 0) 83 | () 84 | `((when (= ,',p ,',g-end) 85 | (go :eof)) 86 | (setq ,',elem 87 | ,',(if key 88 | `(if ,key-fn 89 | (funcall ,key-fn (aref ,seq ,p)) 90 | (aref ,seq ,p)) 91 | `(aref ,seq ,p))))) 92 | (go ,tag)))) 93 | (tagbody 94 | (when (= ,p ,g-end) 95 | (go :eof)) 96 | (locally (declare (optimize (speed 3) (safety 0))) 97 | (setq ,elem ,@(if key 98 | `((if ,key-fn 99 | (funcall ,key-fn (aref ,seq ,p)) 100 | (aref ,seq ,p))) 101 | `((aref ,seq ,p))))) 102 | ,@(loop for (tagpart . rest) on body 103 | for (tag . part) = tagpart 104 | if (eq tag :eof) 105 | append (progn 106 | (setf eof-exists t) 107 | `(,@tagpart 108 | (go ,last))) 109 | else 110 | append 111 | (list tag 112 | `(macrolet ((redo (&optional (amount 1)) 113 | `(goto ,',tag ,amount)) 114 | (gonext (&optional (amount 1)) 115 | `(goto ,',(or (caar rest) no-next-state) 116 | ,amount))) 117 | ,@part 118 | (error 'parsing-end-unexpectedly :state ',tag)))) 119 | 120 | ,no-next-state 121 | (error 'no-next-state) 122 | 123 | ,@(if eof-exists 124 | () 125 | '(:eof)) 126 | 127 | ,last)))))) 128 | -------------------------------------------------------------------------------- /src/decode.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri.decode 3 | (:use :cl 4 | :quri.util 5 | :quri.error) 6 | (:import-from :babel 7 | :octets-to-string) 8 | (:import-from :babel-encodings 9 | :*default-character-encoding*) 10 | (:import-from :cl-utilities 11 | :collecting 12 | :collect) 13 | (:export :url-decode 14 | :url-decode-params)) 15 | (in-package :quri.decode) 16 | 17 | (declaim (ftype (function (character) (unsigned-byte 4)) hexdigit-to-integer)) 18 | (defun hexdigit-to-integer (char) 19 | (declare (type character char) 20 | (optimize (speed 3) (safety 0))) 21 | (let ((code (char-code char))) 22 | (declare (type fixnum code)) 23 | (cond 24 | ((<= #.(char-code #\0) code #.(char-code #\9)) 25 | (- code #.(char-code #\0))) 26 | ((<= #.(char-code #\A) code #.(char-code #\F)) 27 | (- code #.(- (char-code #\A) 10))) 28 | ((<= #.(char-code #\a) code #.(char-code #\f)) 29 | (- code #.(- (char-code #\a) 10))) 30 | (t (error 'url-decoding-error))))) 31 | 32 | (defun url-decode (data &key 33 | (encoding babel-encodings:*default-character-encoding*) 34 | (start 0) 35 | end 36 | (lenient nil)) 37 | (declare (type (or string simple-byte-vector) data) 38 | (type integer start) 39 | (optimize (speed 3) (safety 2))) 40 | (let* ((end (or end (length data))) 41 | (buffer (make-array (- end start) 42 | :element-type '(unsigned-byte 8))) 43 | (i 0) 44 | parsing-encoded-part) 45 | (declare (type integer end i) 46 | (type simple-byte-vector buffer)) 47 | (flet ((write-to-buffer (byte) 48 | (declare (optimize (speed 3) (safety 0))) 49 | (setf (aref buffer i) byte) 50 | (incf i))) 51 | (with-array-parsing (char p data start end (and (not (stringp data)) 52 | #'code-char)) 53 | (parsing 54 | (cond 55 | ((char= char #\%) 56 | (gonext)) 57 | ((char= char #\+) 58 | (write-to-buffer #.(char-code #\Space)) 59 | (redo)) 60 | (t 61 | (write-to-buffer (char-code char)) 62 | (redo)))) 63 | 64 | (parsing-encoded-part 65 | (setq parsing-encoded-part char) 66 | (gonext)) 67 | 68 | (parsing-encoded-part-second 69 | (handler-bind ((url-decoding-error 70 | (lambda (error) 71 | (declare (ignore error)) 72 | (when lenient 73 | (write-to-buffer #.(char-code #\%)) 74 | (write-to-buffer (char-code parsing-encoded-part)) 75 | (write-to-buffer (char-code char)) 76 | (setq parsing-encoded-part nil) 77 | (goto parsing))))) 78 | (write-to-buffer 79 | (+ (* 16 (hexdigit-to-integer parsing-encoded-part)) 80 | (hexdigit-to-integer char)))) 81 | (setq parsing-encoded-part nil) 82 | (goto parsing)) 83 | 84 | (:eof 85 | (when parsing-encoded-part 86 | (error 'url-decoding-error))))) 87 | (babel:octets-to-string buffer :end i :encoding encoding :errorp (not lenient)))) 88 | 89 | (defun url-decode-params (data &key 90 | (delimiter #\&) 91 | (encoding babel-encodings:*default-character-encoding*) 92 | (start 0) 93 | end 94 | (lenient nil) 95 | (percent-decode t)) 96 | (declare (type (or string simple-byte-vector) data) 97 | (type integer start) 98 | (type character delimiter) 99 | (optimize (speed 3) (safety 2))) 100 | (let ((end (or end (length data))) 101 | (start-mark nil) 102 | (=-mark nil)) 103 | (declare (type integer end)) 104 | (collecting 105 | (labels ((maybe-decode (string encoding start end) 106 | (if percent-decode 107 | (url-decode string 108 | :encoding encoding 109 | :start start 110 | :end end 111 | :lenient lenient) 112 | (subseq string start end))) 113 | (collect-pair (p) 114 | (tagbody 115 | (handler-bind ((url-decoding-error 116 | (lambda (error) 117 | (declare (ignore error)) 118 | (when lenient 119 | (go continue))))) 120 | (collect 121 | (cons (maybe-decode data encoding start-mark =-mark) 122 | (maybe-decode data encoding (1+ =-mark) p)))) 123 | continue) 124 | (setq start-mark nil 125 | =-mark nil)) 126 | (collect-field (p) 127 | (tagbody 128 | (handler-bind ((url-decoding-error 129 | (lambda (error) 130 | (declare (ignore error)) 131 | (when lenient 132 | (go continue))))) 133 | (collect 134 | (cons (maybe-decode data encoding start-mark p) 135 | nil))) 136 | continue) 137 | (setq start-mark nil))) 138 | (with-array-parsing (char p data start end (and (not (stringp data)) 139 | #'code-char)) 140 | (start 141 | (setq start-mark p) 142 | (if lenient 143 | (cond 144 | ((char= char #\=) 145 | (setq =-mark p) 146 | (goto parsing-value)) 147 | ((char= char delimiter) 148 | (redo))) 149 | (when (or (char= char #\=) 150 | (char= char delimiter)) 151 | (error 'uri-malformed-urlencoded-string))) 152 | (gonext)) 153 | 154 | (parsing-field 155 | (cond 156 | ((char= char #\=) 157 | (setq =-mark p) 158 | (gonext)) 159 | ((char= char delimiter) 160 | ;; field only 161 | (collect-field p) 162 | (goto start))) 163 | (redo)) 164 | 165 | (parsing-value 166 | (cond 167 | ((char= char #\=) 168 | (unless lenient 169 | (error 'uri-malformed-urlencoded-string))) 170 | ((char= char delimiter) 171 | (collect-pair p) 172 | (goto start))) 173 | (redo)) 174 | 175 | (:eof 176 | (cond 177 | (=-mark (collect-pair p)) 178 | (start-mark (collect-field p))))))))) 179 | -------------------------------------------------------------------------------- /src/domain.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri.domain 3 | (:use :cl) 4 | (:import-from :quri.uri 5 | :uri-host) 6 | (:import-from :quri.etld 7 | :parse-domain) 8 | (:import-from :alexandria 9 | :xor 10 | :ends-with-subseq 11 | :length= 12 | :when-let) 13 | (:import-from :split-sequence 14 | :split-sequence) 15 | (:export :ipv4-addr-p 16 | :ipv6-addr-p 17 | :ip-addr-p 18 | :ip-addr= 19 | :uri-tld 20 | :uri-domain 21 | :cookie-domain-p)) 22 | (in-package :quri.domain) 23 | 24 | (defun uri-tld (uri) 25 | (let ((host (uri-host uri))) 26 | (when (and host 27 | (not (ip-addr-p host))) 28 | (let ((pos (position #\. host :from-end t))) 29 | (if pos 30 | (subseq host (1+ pos)) 31 | host))))) 32 | 33 | (defun uri-domain (uri) 34 | (let ((host (uri-host uri))) 35 | (when (and host 36 | (not (ip-addr-p host))) 37 | (parse-domain host)))) 38 | 39 | (defun ipv4-addr-p (host) 40 | (declare (optimize (speed 3) (safety 2)) 41 | #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note)) 42 | (check-type host string) 43 | (flet ((read-byte-string (string start) 44 | (declare (type fixnum start)) 45 | (when (<= (length string) start) 46 | (return-from read-byte-string nil)) 47 | (let* ((end (+ start 2)) 48 | (endp (<= (1- (length string)) end)) 49 | (end (if endp 50 | (1- (length string)) 51 | end)) 52 | (res 0)) 53 | (declare (type fixnum end res)) 54 | (do ((i start (1+ i))) 55 | ((< end i)) 56 | (declare (type fixnum i)) 57 | (unless (char<= #\0 (aref string i) #\9) 58 | (return-from read-byte-string 59 | (if (= i start) 60 | nil 61 | (values res i nil)))) 62 | (setq res 63 | (+ (* res 10) 64 | (- (char-code (aref string i)) 48)))) 65 | (cond 66 | (endp 67 | (values res end t)) 68 | ((char= (aref string (1+ end)) #\.) 69 | (values res (1+ end) nil)))))) 70 | (let ((start 0)) 71 | (dotimes (i 4 t) 72 | (multiple-value-bind (byte pos endp) 73 | (read-byte-string host start) 74 | (unless (typep byte '(unsigned-byte 8)) 75 | (return nil)) 76 | (unless (xor endp (not (= i 3))) 77 | (return nil)) 78 | (setq start (1+ pos))))))) 79 | 80 | (defun trim-brackets (host) 81 | (if (char= (aref host 0) #\[) 82 | (if (char= (aref host (1- (length host))) #\]) 83 | (subseq host 1 (1- (length host))) 84 | nil) 85 | host)) 86 | 87 | (defun ipv6-addr-p (host) 88 | (declare (optimize (speed 3) (safety 2)) 89 | #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note)) 90 | (check-type host string) 91 | (when (= (length host) 0) 92 | (return-from ipv6-addr-p nil)) 93 | 94 | (labels ((read-section (string start &optional read-colons) 95 | (declare (type string string) 96 | (type fixnum start)) 97 | (when (<= (length string) start) 98 | (return-from read-section 99 | (values start read-colons t))) 100 | (when (char= (aref string start) #\:) 101 | (cond 102 | ((<= (length string) (1+ start)) 103 | (return-from read-section nil)) 104 | ((char= (aref string (1+ start)) #\:) 105 | (if read-colons 106 | (return-from read-section nil) 107 | (return-from read-section (read-section string (+ 2 start) t)))) 108 | (t (incf start)))) 109 | (let* ((end (+ start 4)) 110 | (endp (<= (length string) end)) 111 | (end (if endp 112 | (length string) 113 | end))) 114 | (declare (type fixnum end)) 115 | 116 | (do ((i start (1+ i))) 117 | ((= end i)) 118 | (let ((ch (aref string i))) 119 | (cond 120 | ((char= ch #\:) 121 | (return-from read-section 122 | (values i read-colons nil))) 123 | ((or (char<= #\0 ch #\9) 124 | (char<= #\a ch #\f) 125 | (char<= #\A ch #\F))) 126 | (t (return-from read-section nil))))) 127 | 128 | (if endp 129 | (values end read-colons endp) 130 | (if (char= (aref string end) #\:) 131 | (values end read-colons endp) 132 | nil))))) 133 | 134 | (setq host (trim-brackets host)) 135 | (unless host 136 | (return-from ipv6-addr-p nil)) 137 | 138 | (let ((start 0) 139 | (read-colons-p nil)) 140 | (dotimes (i 8 t) 141 | (multiple-value-bind (e read-colons endp) 142 | (read-section host start read-colons-p) 143 | (unless e 144 | (return-from ipv6-addr-p nil)) 145 | (when endp 146 | (when (and (not (= i 7)) 147 | (not read-colons)) 148 | (return-from ipv6-addr-p nil)) 149 | (return-from ipv6-addr-p t)) 150 | (when (and (= i 7) (not endp)) 151 | (return-from ipv6-addr-p nil)) 152 | (setq start e 153 | read-colons-p read-colons)))))) 154 | 155 | (defun ip-addr-p (host) 156 | (or (ipv4-addr-p host) 157 | (ipv6-addr-p host))) 158 | 159 | (defun ip-addr= (ip1 ip2) 160 | (flet ((parse-ipv6 (ip) 161 | (setq ip (trim-brackets ip)) 162 | (cond 163 | ((char= (aref ip 0) #\:) 164 | (setq ip (concatenate 'string "0" ip))) 165 | ((char= (aref ip (1- (length ip))) #\:) 166 | (setq ip (concatenate 'string ip "0")))) 167 | (let* ((ip-parsed (split-sequence #\: ip)) 168 | (len (length ip-parsed))) 169 | (loop for section in ip-parsed 170 | if (string= section "") 171 | append (make-list (- 9 len) :initial-element 0) 172 | else 173 | collect (parse-integer section :radix 16))))) 174 | (cond 175 | ((ipv4-addr-p ip1) 176 | (string= ip1 ip2)) 177 | ((ipv6-addr-p ip1) 178 | (and (ipv6-addr-p ip2) 179 | (equal (parse-ipv6 ip1) 180 | (parse-ipv6 ip2))))))) 181 | 182 | (defun cookie-domain-p (domain cookie-domain) 183 | (unless cookie-domain 184 | (return-from cookie-domain-p t)) 185 | (if (ip-addr-p domain) 186 | (ip-addr= domain cookie-domain) 187 | (progn 188 | ;; ignore the preceding "." 189 | (when (char= (aref cookie-domain 0) #\.) 190 | (setq cookie-domain (subseq cookie-domain 1))) 191 | (when-let (registered-domain (parse-domain domain)) 192 | (cond 193 | ((length= registered-domain cookie-domain) 194 | (string= registered-domain cookie-domain)) 195 | ((length= domain cookie-domain) 196 | (string= domain cookie-domain)) 197 | (t (and (ends-with-subseq domain cookie-domain) 198 | (char= #\. 199 | (aref cookie-domain (- (length cookie-domain) 200 | (length registered-domain))))))))))) 201 | -------------------------------------------------------------------------------- /t/quri.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri-test 3 | (:use :cl 4 | :quri :quri.uri 5 | :prove)) 6 | (in-package :quri-test) 7 | 8 | (plan nil) 9 | 10 | (defun common-uri-equivalences-assertions () 11 | (is (make-uri :scheme "http" :host "b.hatena.ne.jp" :port 80 :path "/path") 12 | (make-uri :scheme "http" :host "b.hatena.ne.jp" :port 80 :path "/path") 13 | "Same scheme, host, port, path query and fragment.") 14 | (isnt (make-uri :scheme "http" :host "b.hatena.ne.jp" :port 80 :path "/path") 15 | (make-uri :scheme "https" :host "b.hatena.ne.jp" :port 80 :path "/path") 16 | "Differ by scheme (distinct struct type).") 17 | (isnt (make-uri :scheme "foo" :host "b.hatena.ne.jp" :port 80 :path "/path") 18 | (make-uri :scheme "bar" :host "b.hatena.ne.jp" :port 80 :path "/path") 19 | "Differ by scheme (same struct type).") 20 | (isnt (make-uri :scheme "http" :host "b.hatena.ne.jp" :port 80) 21 | (make-uri :scheme "http" :host "b.hatena.ne.jp" :port 81) 22 | "Differ by port.") 23 | (isnt (make-uri :scheme "http" :host "b.hatena.ne.jp" :path "/path") 24 | (make-uri :scheme "http" :host "b.hatena.ne.jp" :path "/path" :query "?") 25 | "Differ by query.") 26 | (isnt (make-uri :scheme "http" :host "b.hatena.ne.jp" :path "/path") 27 | (make-uri :scheme "http" :host "b.hatena.ne.jp" :path "/path" :fragment "bar") 28 | "Differ by fragment.") 29 | (is (make-uri :scheme "http" :host "b.hatena.ne.jp") 30 | (make-uri :scheme "http" :host "b.hatena.ne.jp" :path "") 31 | "The NIL and empty string path are equivalent.") 32 | (is (make-basic-uri :path "foo") 33 | (make-basic-uri :path #p"foo") 34 | "coerce cl:pathname") 35 | (is (make-uri-file :path "foo") 36 | (make-uri-file :path #p"foo") 37 | "coerce cl:pathname") 38 | (isnt (uri "https://google.com") 39 | ;; The "o" character is replaced by cyrillic small letter O. 40 | (uri "https://gооgle.com") 41 | "Prevent IDN homograph attack.")) 42 | 43 | (subtest "uri=" 44 | (let ((prove:*default-test-function* #'uri=)) 45 | (common-uri-equivalences-assertions) 46 | (isnt (make-uri :scheme "http" :host "b.hatena.ne.jp" :path "/") 47 | (make-uri :scheme "http" :host "b.hatena.ne.jp") 48 | "The NIL and \"/\" path aren't equivalent.") 49 | (isnt (make-uri :scheme "http" :host "b.hatena.ne.jp" :path "/") 50 | (make-uri :scheme "http" :host "b.hatena.ne.jp" :path "") 51 | "The empty string and \"/\" path aren't equivalent.") 52 | #+todo 53 | (is (uri "mailto:Joe@Example.COM") 54 | (uri "mailto:Joe@example.com")) 55 | #+todo 56 | (is (uri "mailto:Postmaster@example.com") 57 | (uri "mailto:POSTMASTER@example.com")))) 58 | 59 | (subtest "uri-equal" 60 | (let ((prove:*default-test-function* #'uri-equal)) 61 | (common-uri-equivalences-assertions) 62 | (is (make-uri :scheme "http" :host "b.hatena.ne.jp" :path "/") 63 | (make-uri :scheme "http" :host "b.hatena.ne.jp") 64 | "The NIL and \"/\" path are equivalent.") 65 | (is (make-uri :scheme "http" :host "b.hatena.ne.jp" :path "/") 66 | (make-uri :scheme "http" :host "b.hatena.ne.jp" :path "") 67 | "The empty string and \"/\" path are equivalent."))) 68 | 69 | (defparameter *test-cases* 70 | '(("file:///tmp/junk.txt" . 71 | ("file" nil nil "/tmp/junk.txt" nil nil)) 72 | ("imap://mail.common-lisp.net/mbox1" . 73 | ("imap" nil "mail.common-lisp.net" "/mbox1" nil nil)) 74 | ("mms://wms.sys.hinet.net/cts/Drama/09006251100.asf" . 75 | ("mms" nil "wms.sys.hinet.net" "/cts/Drama/09006251100.asf" nil nil)) 76 | ("nfs://server/path/to/file.txt" . 77 | ("nfs" nil "server" "/path/to/file.txt" nil nil)) 78 | ("svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/" . 79 | ("svn+ssh" nil "svn.zope.org" "/repos/main/ZConfig/trunk/" nil nil)) 80 | ("git+ssh://git@github.com/user/project.git" . 81 | ("git+ssh" "git" "github.com" "/user/project.git" nil nil)) 82 | ("http://common-lisp.net" . 83 | ("http" nil "common-lisp.net" nil nil nil)) 84 | ("http://common-lisp.net#abc" . 85 | ("http" nil "common-lisp.net" nil nil "abc")) 86 | ("http://common-lisp.net?q=abc" . 87 | ("http" nil "common-lisp.net" nil "q=abc" nil)) 88 | ("http://common-lisp.net/#abc" . 89 | ("http" nil "common-lisp.net" "/" nil "abc")) 90 | ("http://a/b/c/d;p?q#f" . 91 | ("http" nil "a" "/b/c/d;p" "q" "f")) 92 | ("http" . 93 | (nil nil nil "http" nil nil)) 94 | ("http:" . 95 | ("http" nil nil nil nil nil)) 96 | ("ldap://[2001:db8::7]/c=GB?objectClass?one" . 97 | ("ldap" nil "[2001:db8::7]" "/c=GB" "objectClass?one" nil)) 98 | ("http://[dead:beef::]:/foo/" . 99 | ("http" nil "[dead:beef::]" "/foo/" nil nil)) 100 | ("tel:+31-641044153" . 101 | ("tel" nil nil "+31-641044153" nil nil)) 102 | ("http://" . 103 | ("http" nil nil nil nil nil)) 104 | ("foo:/a/b/c" . 105 | ("foo" nil nil "/a/b/c" nil nil)) 106 | ("foo::" . 107 | ("foo" nil nil ":" nil nil)) 108 | ("/" . 109 | (nil nil nil "/" nil nil)) 110 | ("foo:/" . 111 | ("foo" nil nil "/" nil nil)) 112 | ("//a/" . 113 | (nil nil "a" "/" nil nil)) 114 | ("//" . 115 | (nil nil nil nil nil nil)) 116 | ("///" . 117 | (nil nil nil "/" nil nil)) 118 | ("//foo/bar" . 119 | (nil nil "foo" "/bar" nil nil)))) 120 | 121 | (loop for (test-uri . params) in *test-cases* do 122 | (subtest (format nil "~A (string)" test-uri) 123 | (let ((uri (uri test-uri))) 124 | (is (uri-scheme uri) (nth 0 params) "scheme") 125 | (is (uri-userinfo uri) (nth 1 params) "userinfo") 126 | (is (uri-host uri) (nth 2 params) "host") 127 | (is (uri-path uri) (nth 3 params) "path") 128 | (is (uri-query uri) (nth 4 params) "query") 129 | (is (uri-fragment uri) (nth 5 params) "fragment"))) 130 | (subtest (format nil "~A (byte-vector)" test-uri) 131 | (let ((uri (uri (babel:string-to-octets test-uri)))) 132 | (is (uri-scheme uri) (nth 0 params) "scheme") 133 | (is (uri-userinfo uri) (nth 1 params) "userinfo") 134 | (is (uri-host uri) (nth 2 params) "host") 135 | (is (uri-path uri) (nth 3 params) "path") 136 | (is (uri-query uri) (nth 4 params) "query") 137 | (is (uri-fragment uri) (nth 5 params) "fragment"))) 138 | (subtest (format nil "~A (copy-uri)" test-uri) 139 | (let ((uri (uri test-uri))) 140 | (is uri (copy-uri uri) :test #'uri=)))) 141 | 142 | ;; May not be true in general, e.g.: 143 | ;; (render-uri (uri "///")) -> "/" 144 | ;; (render-uri (uri "http://")) -> "http:" 145 | (subtest "render-uri after uri is the identity function" 146 | (let ((url "file:///tmp/junk.txt?query#fragment")) 147 | (is url (render-uri (uri url)))) 148 | ;; Ensure it doesn't depend on *print-base*. 149 | (let* ((*print-base* 2) 150 | (url "//foo:80?a=5")) 151 | (is url (render-uri (uri url))))) 152 | 153 | (defparameter *base-uri* (uri "http://www.example.com/path/a/b.html")) 154 | 155 | (defparameter *merge-test-cases* 156 | `((,(uri "file:///tmp/junk.txt") . "file:///tmp/junk.txt") 157 | (,(make-uri :userinfo "auth" :host "secretplace.com") . "http://auth@secretplace.com") 158 | (,(make-uri :host "example.com" :path "/path" :query "query") . "http://example.com/path?query") 159 | (,(uri "/new/path") . "http://www.example.com/new/path") 160 | (,(uri "foo.txt") . "http://www.example.com/path/a/foo.txt") 161 | (,(uri "../bar") . "http://www.example.com/path/bar") 162 | (,(uri "other/./car") . "http://www.example.com/path/a/other/car") 163 | (,(uri "./../.") . "http://www.example.com/path/") 164 | (,(uri "/./foo") . "http://www.example.com/foo") 165 | (,(uri "/./foo/") . "http://www.example.com/foo/") 166 | (,(uri "/x/../y/") . "http://www.example.com/y/") 167 | (,(uri "/x/../../../y/") . "http://www.example.com/y/") 168 | (,(uri "foo://x/y/../z/") . "foo://x/z/") 169 | (,(make-uri :query "name=fukamachi") . "http://www.example.com/path/a/b.html?name=fukamachi") 170 | (,(make-uri :scheme "https" :host "foo.com" :path "foo/bar") . "https://foo.com/foo/bar") 171 | (,(uri "https://example.org/#/?b") . "https://example.org/#/?b") 172 | (,(uri "about:blank") . "about:blank"))) 173 | 174 | (loop for (test-uri . result-uri) in *merge-test-cases* do 175 | (let ((merged-uri (merge-uris test-uri *base-uri*))) 176 | (subtest "merge-uris" 177 | (is (render-uri merged-uri) result-uri :test 'string=)) 178 | (subtest "merge-uris type checking" 179 | (unless (uri-scheme test-uri) 180 | (is (symbol-name (type-of merged-uri)) 181 | (symbol-name (type-of *base-uri*))))))) 182 | 183 | (finalize) 184 | -------------------------------------------------------------------------------- /src/quri.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri 3 | (:use :cl 4 | :quri.uri 5 | :quri.uri.ftp 6 | :quri.uri.http 7 | :quri.uri.ldap 8 | :quri.uri.file 9 | :quri.error) 10 | (:import-from :quri.domain 11 | :uri-tld 12 | :uri-domain 13 | :ipv4-addr-p 14 | :ipv6-addr-p 15 | :ip-addr-p 16 | :ip-addr= 17 | :cookie-domain-p) 18 | (:import-from :quri.parser 19 | :parse-uri 20 | :parse-scheme 21 | :parse-authority 22 | :parse-path 23 | :parse-query 24 | :parse-fragment) 25 | (:import-from :quri.port 26 | :scheme-default-port) 27 | (:import-from :quri.decode 28 | :url-decode 29 | :url-decode-params) 30 | (:import-from :quri.encode 31 | :url-encode 32 | :url-encode-params) 33 | (:import-from :split-sequence :split-sequence) 34 | (:import-from :alexandria 35 | :delete-from-plist 36 | :when-let*) 37 | (:export :parse-uri 38 | :parse-scheme 39 | :parse-authority 40 | :parse-path 41 | :parse-query 42 | :parse-fragment 43 | 44 | :make-uri 45 | :uri 46 | :uri= 47 | :uri-equal 48 | :uri-p 49 | :uri-scheme 50 | :uri-userinfo 51 | :uri-host 52 | :uri-port 53 | :uri-path 54 | :uri-query 55 | :uri-fragment 56 | :uri-authority 57 | 58 | :uri-tld 59 | :uri-domain 60 | :ipv4-addr-p 61 | :ipv6-addr-p 62 | :ip-addr-p 63 | :ip-addr= 64 | :cookie-domain-p 65 | 66 | :urn 67 | :urn-p 68 | :urn-nid 69 | :urn-nss 70 | 71 | :make-uri-ftp 72 | :uri-ftp 73 | :uri-ftp-p 74 | :uri-ftp-typecode 75 | 76 | :make-uri-http 77 | :make-uri-https 78 | :uri-http 79 | :uri-http-p 80 | :uri-https 81 | :uri-https-p 82 | :uri-query-params 83 | 84 | :make-uri-ldap 85 | :make-uri-ldaps 86 | :uri-ldap 87 | :uri-ldap-p 88 | :uri-ldap-dn 89 | :uri-ldap-attributes 90 | :uri-ldap-scope 91 | :uri-ldap-filter 92 | :uri-ldap-extensions 93 | 94 | :make-uri-file 95 | :uri-file 96 | :uri-file-p 97 | :uri-file-pathname 98 | 99 | :copy-uri 100 | :render-uri 101 | :merge-uris 102 | 103 | :url-decode 104 | :url-decode-params 105 | :url-encode 106 | :url-encode-params 107 | 108 | :uri-error 109 | :uri-malformed-string 110 | :uri-invalid-port 111 | :url-decoding-error 112 | :uri-malformed-urlencoded-string)) 113 | (in-package :quri) 114 | 115 | (defun scheme-constructor (scheme) 116 | "Get a constructor function appropriate for the scheme." 117 | (cond 118 | ((string= scheme "http") #'make-uri-http) 119 | ((string= scheme "https") #'make-uri-https) 120 | ((string= scheme "ldap") #'make-uri-ldap) 121 | ((string= scheme "ldaps") #'make-uri-ldaps) 122 | ((string= scheme "ftp") #'make-uri-ftp) 123 | ((string= scheme "file") #'make-uri-file) 124 | ((string= scheme "urn") #'make-urn) 125 | (t #'make-basic-uri))) 126 | 127 | (defun uri (data &key (start 0) end) 128 | (if (uri-p data) 129 | data 130 | (multiple-value-bind (scheme userinfo host port path query fragment) 131 | (parse-uri data :start start :end end) 132 | (apply (scheme-constructor scheme) 133 | :scheme scheme 134 | :userinfo userinfo 135 | :host host 136 | :path path 137 | :query query 138 | :fragment fragment 139 | (and port 140 | `(:port ,port)))))) 141 | 142 | (defun copy-uri (uri &key (scheme (uri-scheme uri)) 143 | (userinfo (uri-userinfo uri)) 144 | (host (uri-host uri)) 145 | (port (uri-port uri)) 146 | (path (uri-path uri)) 147 | (query (uri-query uri)) 148 | (fragment (uri-fragment uri))) 149 | (make-uri :scheme scheme 150 | :userinfo userinfo 151 | :host host 152 | :port port 153 | :path path 154 | :query query 155 | :fragment fragment)) 156 | 157 | (defun make-uri (&rest initargs &key scheme userinfo host port path query fragment defaults) 158 | (declare (ignore userinfo host port path fragment)) 159 | (setf initargs (delete-from-plist initargs :defaults)) 160 | (if defaults 161 | (apply #'copy-uri (uri defaults) initargs) 162 | (progn 163 | (when (consp query) 164 | (setf (getf initargs :query) (url-encode-params query))) 165 | (apply (scheme-constructor scheme) initargs)))) 166 | 167 | (defun render-uri (uri &optional stream) 168 | (flet ((maybe-slash (authority path) 169 | (if (and (not (uiop:emptyp authority)) (not (uiop:emptyp path)) 170 | (char/= (uiop:last-char authority) #\/) 171 | (char/= (uiop:first-char path) #\/)) 172 | "/" 173 | ""))) 174 | (cond 175 | ((uri-ftp-p uri) 176 | (format stream 177 | "~@[~(~A~):~]~@[//~A~]~a~@[~A~]~@[;type=~A~]~@[?~A~]~@[#~A~]" 178 | (uri-scheme uri) 179 | (uri-authority uri) 180 | (maybe-slash (uri-authority uri) (uri-path uri)) 181 | (uri-path uri) 182 | (uri-ftp-typecode uri) 183 | (uri-query uri) 184 | (uri-fragment uri))) 185 | ((uri-file-p uri) 186 | (format stream 187 | "~@[~(~A~)://~]~@[~A~]~@[?~A~]~@[#~A~]" 188 | (uri-scheme uri) 189 | (uri-path uri) 190 | (uri-query uri) 191 | (uri-fragment uri))) 192 | (t 193 | (format stream 194 | "~@[~(~A~):~]~@[//~A~]~a~@[~A~]~@[?~A~]~@[#~A~]" 195 | (uri-scheme uri) 196 | (uri-authority uri) 197 | (maybe-slash (uri-authority uri) (uri-path uri)) 198 | (uri-path uri) 199 | (uri-query uri) 200 | (uri-fragment uri)))))) 201 | 202 | (defun %uri= (uri1 uri2 &key normalize-path-p) 203 | (check-type uri1 uri) 204 | (check-type uri2 uri) 205 | (flet ((%path (path) 206 | "Define path equivalence relations." 207 | (cond (normalize-path-p 208 | (if (or (null path) (equal path "")) 209 | "/" 210 | path)) 211 | (t 212 | (or path ""))))) 213 | (and (equal (uri-scheme uri1) (uri-scheme uri2)) 214 | (equal (%path (uri-path uri1)) (%path (uri-path uri2))) 215 | (equal (uri-query uri1) (uri-query uri2)) 216 | (equal (uri-fragment uri1) (uri-fragment uri2)) 217 | (equalp (uri-authority uri1) (uri-authority uri2)) 218 | (or (not (uri-ftp-p uri1)) 219 | (eql (uri-ftp-typecode uri1) (uri-ftp-typecode uri2)))))) 220 | 221 | (defun uri= (uri1 uri2) 222 | "Whether URI1 refers to the same URI as URI2. 223 | Paths are not normalized. See `uri-equal'." 224 | (%uri= uri1 uri2)) 225 | 226 | (defun uri-equal (uri1 uri2) 227 | "Whether URI1 refers to the same URI as URI2. 228 | Empty paths are normalized to '/' as per RFC 3986 229 | (https://tools.ietf.org/html/rfc3986#section-6.2.3). 230 | See `uri='." 231 | (%uri= uri1 uri2 :normalize-path-p t)) 232 | 233 | (defmethod print-object ((uri uri) stream) 234 | (if (and (null *print-readably*) (null *print-escape*)) 235 | (render-uri uri stream) 236 | (format stream "#<~S ~A>" 237 | (type-of uri) 238 | (render-uri uri)))) 239 | 240 | (defun merge-uri-paths (ref-path base-path) 241 | (declare (type (or string null) ref-path base-path)) 242 | (let* ((path-list (and base-path (nreverse (split-sequence #\/ base-path)))) 243 | (ref-components (and ref-path (split-sequence #\/ ref-path))) 244 | ending-slash-p) 245 | ;; remove last component of base 246 | (pop path-list) 247 | (dolist (component ref-components) 248 | (cond ((string= ".." component) 249 | (pop path-list) 250 | (setf ending-slash-p t)) 251 | ((string= "." component) 252 | (setf ending-slash-p t)) 253 | (t 254 | (push component path-list) 255 | (setf ending-slash-p nil)))) 256 | (setf path-list (nreverse path-list)) 257 | (with-output-to-string (s) 258 | (loop for (component . more) on path-list 259 | do (progn 260 | (write-string component s) 261 | (when (or more ending-slash-p) 262 | (write-char #\/ s))))))) 263 | 264 | (defun merge-uris (reference base) 265 | "Merge a reference URI into the base URI as described in RFC 2396 Section 5.2. 266 | The returned URI is always a new instance. Neither REFERENCE nor BASE is 267 | mutated." 268 | (let* ((reference (uri reference)) 269 | (base (uri base)) 270 | (merged-uri (copy-uri reference))) 271 | (declare (uri reference base)) 272 | ;; Steps described at 273 | ;; https://datatracker.ietf.org/doc/html/rfc2396#section-5.2 274 | ;; Step 1 is absent since it's implicit 275 | (flet ((return-merged-uri () (return-from merge-uris (uri merged-uri))) 276 | (merge-paths () (setf (uri-path merged-uri) 277 | (merge-uri-paths (uri-path merged-uri) nil)))) 278 | ;; Step 2 279 | (when (uri-equal reference base) 280 | (return-merged-uri)) 281 | ;; Step 3 282 | (when (uri-scheme merged-uri) 283 | (merge-paths) 284 | (return-merged-uri)) 285 | (setf merged-uri (copy-uri merged-uri :scheme (uri-scheme base))) 286 | ;; Step 4 287 | (when (null (uri-port merged-uri)) 288 | (setf (uri-port merged-uri) (scheme-default-port (uri-scheme merged-uri)))) 289 | (when (uri-host merged-uri) 290 | (merge-paths) 291 | (return-merged-uri)) 292 | (setf (uri-userinfo merged-uri) (uri-userinfo base)) 293 | (setf (uri-host merged-uri) (uri-host base)) 294 | (setf (uri-port merged-uri) (uri-port base)) 295 | ;; Step 5 296 | (when (null (uri-path merged-uri)) 297 | (setf (uri-path merged-uri) (uri-path base)) 298 | (return-merged-uri)) 299 | ;; Step 6 300 | (alexandria:when-let* ((p (uri-path merged-uri)) 301 | (first-char (and (> (length p) 0) (char p 0))) 302 | (_ (char= #\/ first-char))) 303 | (merge-paths) 304 | (return-merged-uri)) 305 | ;; Step 7 306 | (setf (uri-path merged-uri) 307 | (merge-uri-paths (uri-path merged-uri) (uri-path base))) 308 | (return-merged-uri)))) 309 | -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | # QURI 2 | 3 | [![Build Status](https://travis-ci.org/fukamachi/quri.svg?branch=master)](https://travis-ci.org/fukamachi/quri) 4 | [![Coverage Status](https://coveralls.io/repos/fukamachi/quri/badge.svg?branch=master)](https://coveralls.io/r/fukamachi/quri) 5 | 6 |

冷やしきゅうり

7 |

Photo by m-louis, licensed under the CC BY-SA 2.0 license.

8 | 9 | **QURI** (pronounced "Q-ree") is yet another URI library for Common Lisp. It is intended to be a replacement of [PURI](http://puri.kpe.io/). 10 | 11 | It aims at implementing [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986). 12 | Behaviour that deviates from it should be considered a bug; please report. 13 | 14 | ## Differences from PURI 15 | 16 | - Fast. (See [Benchmark](#benchmark).) 17 | - Doesn't encode/decode URL implicitly. 18 | - UTF-8 characters support. 19 | - Supports userinfo. (Example: `git` in `git@github.com`) 20 | - Supports IPv6 hostname. (Example: `ldap://[2001:db8::7]/`) 21 | - Allows byte vectors as input. 22 | - Takes optional `:start` and `:end` keyword arguments. 23 | - Low-level parser functions. 24 | - URL encoding/decoding utilities. 25 | - `url-decode` 26 | - `url-decode-params` 27 | - `url-encode` 28 | - `url-encode-params` 29 | 30 | ## Usage 31 | 32 | ```common-lisp 33 | (use-package :quri) 34 | 35 | (defvar *uri* (uri "http://www.ics.uci.edu/pub/ietf/uri/#Related")) 36 | 37 | *uri* 38 | ;=> # 39 | 40 | (uri-scheme *uri*) 41 | ;=> "http" 42 | 43 | (uri-host *uri*) 44 | ;=> "www.ics.uci.edu" 45 | 46 | (uri-domain *uri*) 47 | ;=> "uci.edu" 48 | 49 | (uri-path *uri*) 50 | ;=> "/pub/ietf/uri/" 51 | 52 | (uri-fragment *uri*) 53 | ;=> "Related" 54 | ``` 55 | 56 | ## Functions 57 | 58 | ### \[Function] uri 59 | 60 | Parse a string or a byte vector and return a `uri` object. 61 | 62 | ### \[Function] make-uri 63 | 64 | Create a `uri` object. 65 | 66 | ```common-lisp 67 | (make-uri :scheme "http" 68 | :host "8arrow.org" 69 | :path "/") 70 | ;=> # 71 | 72 | (make-uri :defaults "http://8arrow.org" 73 | :query '(("guest" . 1))) 74 | ;=> # 75 | ``` 76 | 77 | ### \[Function] copy-uri 78 | 79 | Return a copy of the given `uri` object. 80 | 81 | ### \[Function] merge-uris 82 | 83 | Merge a reference URI into the base URI as described in RFC 2396 Section 5.2. The returned URI may or may not be a new instance. Neither REFERENCE nor BASE is mutated. 84 | 85 | ### \[Structure] uri 86 | 87 | Structure class as a representation of URIs. The following methods are available for all classes extends this class. 88 | 89 | #### Methods 90 | 91 | - `uri-scheme` 92 | - `uri-userinfo` 93 | - `uri-host` 94 | - `uri-domain` 95 | - `uri-tld` 96 | - `uri-port` 97 | - `uri-path` 98 | - `uri-authority` 99 | - `render-uri` 100 | 101 | ### \[Structure] urn (extends uri) 102 | 103 | Structure class as a representation of URNs. All methods of `uri` are also available for this class. 104 | 105 | #### Methods 106 | 107 | - `urn-nid` 108 | - `urn-nss` 109 | 110 | ### \[Structure] uri-http (extends uri) 111 | 112 | Structure class for HTTP/HTTPS URIs. 113 | 114 | #### Methods 115 | 116 | - `uri-query-params` 117 | 118 | ```common-lisp 119 | (defvar *uri* (quri:uri "http://quickdocs.org/search?q=web")) 120 | 121 | (uri-query-params *uri*) 122 | ;=> (("q" . "web")) 123 | 124 | (setf (uri-query-params *uri*) '(("q" . "system programming"))) 125 | 126 | *uri* 127 | ;=> # 128 | ``` 129 | 130 | ### \[Structure] uri-ftp (extends uri) 131 | 132 | Structure class for FTP URIs. 133 | 134 | #### Methods 135 | 136 | - `uri-ftp-typecode` 137 | 138 | ### \[Structure] uri-ldap (extends uri) 139 | 140 | Structure class for LDAP/LDAPS URIs. 141 | 142 | #### Methods 143 | 144 | - `uri-ldap-dn` 145 | - `uri-ldap-attributes` 146 | - `uri-ldap-scope` 147 | - `uri-ldap-filter` 148 | - `uri-ldap-extensions` 149 | 150 | ### \[Function] url-decode 151 | 152 | Decode a Percent-Encoded string or byte vector. 153 | 154 | ```common-lisp 155 | (url-decode "%2Ffoo%E3%81%82") 156 | ;=> "/fooあ" 157 | ``` 158 | 159 | ### \[Function] url-decode-params 160 | 161 | Decode a [form-urlencoded](http://tools.ietf.org/html/rfc1866#section-8.2.1) string or byte vector and return an association list. 162 | 163 | ### \[Function] url-encode 164 | 165 | Encode a string or a byte vector into a Percent-Encoded string. 166 | 167 | ```common-lisp 168 | (url-encode "/fooあ") 169 | ;=> "%2Ffoo%E3%81%82" 170 | ``` 171 | 172 | ### \[Function] url-encode-params 173 | 174 | Encode an association list into a [form-urlencoded](http://tools.ietf.org/html/rfc1866#section-8.2.1) string. 175 | 176 | ## Low-level functions 177 | 178 | ### \[Function] parse-uri 179 | 180 | Parse a URI string or a URI byte vector and return 7 URI components -- scheme, userinfo, host name, port, path, query and fragment. 181 | 182 | ```common-lisp 183 | (parse-uri "http://www.ics.uci.edu/pub/ietf/uri/#Related") 184 | ;=> "http" 185 | ; NIL 186 | ; "www.ics.uci.edu" 187 | ; NIL 188 | ; "/pub/ietf/uri/" 189 | ; NIL 190 | ; "Related" 191 | ``` 192 | 193 | ## Installation 194 | 195 | ``` 196 | $ git clone https://github.com/fukamachi/quri 197 | ``` 198 | 199 | ```common-lisp 200 | (ql:quickload :quri) 201 | ``` 202 | 203 | ## Benchmark 204 | 205 | ### Parsing URI 206 | 207 | - Parsing a URI string 100,000 times. 208 | 209 | | QURI | PURI | 210 | |--------|--------| 211 | | 0.064s | 0.423s | 212 | 213 | QURI is **6.6 times faster** than PURI for URI parsing. 214 | 215 | #### QURI 216 | 217 | ```common-lisp 218 | (time 219 | (dotimes (i 100000) 220 | (quri:uri "http://www.ics.uci.edu/pub/ietf/uri/#Related"))) 221 | ``` 222 | 223 | ``` 224 | Evaluation took: 225 | 0.064 seconds of real time 226 | 0.063984 seconds of total run time (0.063745 user, 0.000239 system) 227 | 100.00% CPU 228 | 191,340,531 processor cycles 229 | 28,807,728 bytes consed 230 | ``` 231 | 232 | #### PURI 233 | 234 | ```common-lisp 235 | (time 236 | (dotimes (i 100000) 237 | (puri:uri "http://www.ics.uci.edu/pub/ietf/uri/#Related"))) 238 | ``` 239 | 240 | ``` 241 | Evaluation took: 242 | 0.423 seconds of real time 243 | 0.425327 seconds of total run time (0.422234 user, 0.003093 system) 244 | [ Run times consist of 0.004 seconds GC time, and 0.422 seconds non-GC time. ] 245 | 100.47% CPU 246 | 1,266,663,894 processor cycles 247 | 64,001,408 bytes consed 248 | ``` 249 | 250 | ### URL decoding 251 | 252 | - Decoding a URL-encoded string 100,000 times. 253 | 254 | | QURI | Hunchentoot | do-urlencode | 255 | |--------|-------------|--------------| 256 | | 0.029s | 0.089s | 0.634s | 257 | 258 | QURI is **3 times faster** than Hunchentoot, and **21.8 times faster** than do-urlencode. 259 | 260 | #### QURI 261 | 262 | ```common-lisp 263 | (time 264 | (dotimes (i 100000) 265 | (quri:url-decode "foo%E3%81%82"))) 266 | ``` 267 | 268 | ``` 269 | Evaluation took: 270 | 0.029 seconds of real time 271 | 0.028683 seconds of total run time (0.027934 user, 0.000749 system) 272 | 100.00% CPU 273 | 85,421,676 processor cycles 274 | 7,993,456 bytes consed 275 | ``` 276 | 277 | #### Hunchentoot 278 | 279 | ```common-lisp 280 | (time 281 | (dotimes (i 100000) 282 | (hunchentoot:url-decode "foo%E3%81%82"))) 283 | ``` 284 | 285 | ``` 286 | Evaluation took: 287 | 0.089 seconds of real time 288 | 0.088946 seconds of total run time (0.087632 user, 0.001314 system) 289 | 100.00% CPU 290 | 265,341,714 processor cycles 291 | 17,611,968 bytes consed 292 | ``` 293 | 294 | #### do-urlencode 295 | 296 | ```common-lisp 297 | (time 298 | (dotimes (i 100000) 299 | (do-urlencode:urldecode "foo%E3%81%82"))) 300 | ``` 301 | 302 | ``` 303 | Evaluation took: 304 | 0.634 seconds of real time 305 | 0.637236 seconds of total run time (0.632224 user, 0.005012 system) 306 | [ Run times consist of 0.023 seconds GC time, and 0.615 seconds non-GC time. ] 307 | 100.47% CPU 308 | 1,897,304,959 processor cycles 309 | 153,606,064 bytes consed 310 | ``` 311 | 312 | ### URL encoding 313 | 314 | - URL-encoding a string 100,000 times. 315 | 316 | | QURI | Hunchentoot | do-urlencode | 317 | |--------|-------------|--------------| 318 | | 0.074s | 0.282s | 0.317s | 319 | 320 | QURI is **3.8 times faster** than Hunchentoot, and **4.2 times faster** than do-urlencode. 321 | 322 | #### QURI 323 | 324 | ```common-lisp 325 | (time 326 | (dotimes (i 100000) 327 | (quri:url-encode "fooあ"))) 328 | ``` 329 | 330 | ``` 331 | Evaluation took: 332 | 0.074 seconds of real time 333 | 0.074284 seconds of total run time (0.072908 user, 0.001376 system) 334 | 100.00% CPU 335 | 221,267,517 processor cycles 336 | 31,993,520 bytes consed 337 | ``` 338 | 339 | #### Hunchentoot 340 | 341 | ```common-lisp 342 | (time 343 | (dotimes (i 100000) 344 | (hunchentoot:url-encode "fooあ"))) 345 | ``` 346 | 347 | ``` 348 | Evaluation took: 349 | 0.282 seconds of real time 350 | 0.284922 seconds of total run time (0.280063 user, 0.004859 system) 351 | [ Run times consist of 0.034 seconds GC time, and 0.251 seconds non-GC time. ] 352 | 101.06% CPU 353 | 845,204,850 processor cycles 354 | 214,382,672 bytes consed 355 | ``` 356 | 357 | #### do-urlencode 358 | 359 | ```common-lisp 360 | (time 361 | (dotimes (i 100000) 362 | (do-urlencode:urlencode "fooあ"))) 363 | ``` 364 | 365 | ``` 366 | Evaluation took: 367 | 0.317 seconds of real time 368 | 0.319419 seconds of total run time (0.314339 user, 0.005080 system) 369 | [ Run times consist of 0.026 seconds GC time, and 0.294 seconds non-GC time. ] 370 | 100.63% CPU 371 | 946,704,912 processor cycles 372 | 219,186,768 bytes consed 373 | ``` 374 | 375 | ## Change log 376 | 377 | ### 0.7.0 378 | 379 | - Add `:lenient` option `uri-query-params` (default to `T`). 380 | 381 | - Fix `merge-uris` to accept strings as it did in 0.4.0. 382 | 383 | - Support MSVC on ECL. 384 | 385 | - Coerce URI `path` to strings. 386 | 387 | ### 0.6.0 388 | 389 | - All constructors like `make-uri-file` and `make-uri-https` exported. 390 | 391 | - `uri=` and `uri-equal` normalize the path so that NIL and "" are considered equal. 392 | 393 | - The `file` scheme renders the query and the fragment. 394 | 395 | ### 0.5.0 396 | 397 | - URI schemes are now read-only. 398 | 399 | This preserves the integrity of the structures (or else the scheme of a 400 | `uri-http` could be set to FTP). 401 | 402 | `merge-uris` has been updated accordingly, so now the following returns the 403 | right thing: 404 | 405 | ```lisp 406 | (merge-uris (uri "/") (uri "https://en.wikipedia.org/wiki/URL")) 407 | ; => # 408 | ``` 409 | 410 | - Prevent some functions from being affected by *PRINT-BASE*. 411 | 412 | Functions `make-uri` and `uri-authority` build strings from a number; they now 413 | do so with the standard value for `*print-base*`. 414 | 415 | ### 0.4.0 416 | 417 | - Query values accept numbers again. 418 | This should fix backward-compatibility issues. 419 | 420 | - New `uri-equal` which normalizes the path when comparing URIs. 421 | 422 | - The empty path and the root path are no longer equal with `uri=`. Use 423 | `uri-equal` if you want the old behaviour. 424 | 425 | - Dot segments are removed when merging URLs. 426 | 427 | - Fix parsing of the colon at the end of the scheme. 428 | 429 | ### 0.3.0 430 | 431 | - Handle strings and byte vectors in query values, and nothing else. 432 | 433 | In particular, numbers are no longer supported. You'll have to convert them 434 | to a string or a byte-vector from the caller. 435 | 436 | - `parse-uri-string` and `parse-uri-byte-vector` now return the scheme default 437 | port when unspecified. 438 | 439 | ## Authors and maintainers 440 | 441 | * Eitaro Fukamachi (e.arrows@gmail.com): author 442 | * André A. Gomes (andremegafone@gmail.com): maintainer 443 | 444 | ## Copyright 445 | 446 | Copyright (c) 2014-2019 Eitaro Fukamachi (e.arrows@gmail.com) 447 | 448 | ## License 449 | 450 | Licensed under the BSD 3-Clause License. 451 | -------------------------------------------------------------------------------- /src/parser.lisp: -------------------------------------------------------------------------------- 1 | (in-package :cl-user) 2 | (defpackage quri.parser 3 | (:use :cl 4 | :quri.error 5 | :quri.port 6 | :quri.util) 7 | #+(or sbcl openmcl cmu allegro) 8 | (:import-from #+sbcl :sb-cltl2 9 | #+openmcl :ccl 10 | #+cmu :ext 11 | #+allegro :sys 12 | :variable-information) 13 | (:import-from :alexandria 14 | :with-gensyms 15 | :define-constant) 16 | (:export :parse-uri 17 | :parse-scheme 18 | :parse-authority 19 | :parse-path 20 | :parse-query 21 | :parse-fragment)) 22 | (in-package :quri.parser) 23 | 24 | (declaim (type (simple-array fixnum (128)) +uri-char+)) 25 | (define-constant +uri-char+ 26 | (let ((uri-char (make-array 128 :element-type 'fixnum :initial-element 0))) 27 | (dotimes (i 128 uri-char) 28 | (let ((char (code-char i))) 29 | (when (or (alphanumericp char) 30 | (char= char #\%) 31 | (char= char #\:) 32 | (char= char #\@) 33 | (char= char #\-) 34 | (char= char #\.) 35 | (char= char #\_) 36 | (char= char #\~) 37 | (char= char #\!) 38 | (char= char #\$) 39 | (char= char #\&) 40 | (char= char #\') 41 | (char= char #\() 42 | (char= char #\)) 43 | (char= char #\*) 44 | (char= char #\+) 45 | (char= char #\,) 46 | (char= char #\;) 47 | (char= char #\=)) 48 | (setf (aref uri-char i) 1))))) 49 | :test 'equalp) 50 | 51 | #+(or sbcl openmcl cmu allegro) 52 | (define-compiler-macro parse-uri (&whole form &environment env data &key start end) 53 | (declare (ignore start end)) 54 | (let ((type (cond 55 | ((constantp data) (type-of data)) 56 | ((symbolp data) (cdr (assoc 'type (nth-value 2 (variable-information data env)))))))) 57 | (cond 58 | ((null type) form) 59 | ((subtypep type 'simple-string) `(parse-uri-string ,@(cdr form))) 60 | ((subtypep type 'simple-byte-vector) `(parse-uri-byte-vector ,@(cdr form))) 61 | (t form)))) 62 | 63 | (defun parse-uri (data &key (start 0) end) 64 | "Parse a URI string or a URI byte vector and return 7 URI components: 65 | - scheme, 66 | - userinfo, 67 | - host name, 68 | - port, 69 | - path, 70 | - query, 71 | - fragment." 72 | (etypecase data 73 | (simple-string (parse-uri-string data :start start :end end)) 74 | (simple-byte-vector (parse-uri-byte-vector data :start start :end end)) 75 | (string (parse-uri (coerce data 'simple-string) :start start :end end)))) 76 | 77 | (defun parse-uri-string (data &key (start 0) end) 78 | (declare (type simple-string data) 79 | (optimize (speed 3) (safety 2))) 80 | (let (scheme userinfo host port path query fragment 81 | (parse-start start) 82 | (parse-end (or end (length data)))) 83 | (declare (type fixnum parse-start parse-end)) 84 | (block nil 85 | (flet ((parse-from-path (data start) 86 | (declare (type simple-string data) 87 | (type fixnum start)) 88 | (multiple-value-bind (data start end) 89 | (parse-path-string data :start start :end parse-end) 90 | (declare (type simple-string data) 91 | (type fixnum start end)) 92 | (unless (= start end) 93 | (setq path (subseq data start end))) 94 | ;; Pitfall: There may be no query but a fragment that has a '?', e.g. 95 | ;; https://example.org/#/?b 96 | (let ((maybe-query-start (or (nth-value 1 (parse-query-string data :start end :end parse-end)) 97 | (1+ parse-end))) 98 | (maybe-fragment-start (or (nth-value 1 (parse-fragment-string data :start end :end parse-end)) 99 | (1+ parse-end)))) 100 | (flet ((parse-fragment (path-end) 101 | (multiple-value-bind (data start end) 102 | (parse-fragment-string data :start (or path-end end) :end parse-end) 103 | (when data 104 | (setq fragment (subseq (the string data) (the fixnum start) (the fixnum end))))))) 105 | (if (< (the fixnum maybe-query-start) (the fixnum maybe-fragment-start)) 106 | (multiple-value-bind (parsed-data path-start path-end) 107 | (parse-query-string data :start end :end parse-end) 108 | (when parsed-data 109 | (setq query (subseq (the string parsed-data) (the fixnum path-start) (the fixnum path-end)))) 110 | (parse-fragment path-end)) 111 | (parse-fragment end))))))) 112 | (multiple-value-bind (parsed-data start end got-scheme) 113 | (parse-scheme-string data :start parse-start :end parse-end) 114 | (if parsed-data 115 | (locally (declare (type fixnum start end)) 116 | (setq scheme 117 | (or got-scheme 118 | (string-downcase (subseq data start end)))) 119 | (incf end)) ;eat the trailing #\: 120 | (setq scheme nil 121 | end parse-start)) 122 | (locally (declare (type fixnum end)) 123 | (unless (= end parse-end) 124 | (multiple-value-bind (parsed-data userinfo-start userinfo-end 125 | host-start host-end port-start port-end non-ascii) 126 | (parse-authority-string data :start end :end parse-end) 127 | (when parsed-data 128 | (locally (declare (type fixnum host-start host-end)) 129 | (when userinfo-start 130 | (setq userinfo (subseq (the string data) (the fixnum userinfo-start) (the fixnum userinfo-end)))) 131 | (unless (= host-start host-end) 132 | (setq host (subseq data host-start host-end)) 133 | (when non-ascii 134 | (setq host (idna:to-ascii host)))) 135 | (cond 136 | (port-start 137 | (locally (declare (type fixnum port-start port-end)) 138 | (unless (= port-start port-end) 139 | (handler-case 140 | (setq port 141 | (parse-integer data :start (the fixnum port-start) :end (the fixnum port-end))) 142 | (error () 143 | (error 'uri-invalid-port 144 | :data data :position port-start)))))) 145 | (scheme 146 | (setq port (scheme-default-port scheme)))))) 147 | (locally (declare (optimize (safety 0))) 148 | (parse-from-path data (or port-end host-end end))))))))) 149 | (values scheme userinfo host port path query fragment))) 150 | 151 | (defun parse-uri-byte-vector (data &key (start 0) end) 152 | (declare (type simple-byte-vector data) 153 | (optimize (speed 3) (safety 2))) 154 | (let (scheme userinfo host port path query fragment 155 | (parse-start start) 156 | (parse-end (or end (length data)))) 157 | (declare (type fixnum parse-start parse-end)) 158 | (flet ((subseq* (data &optional (start 0) end) 159 | (declare (type simple-byte-vector data)) 160 | (values (babel:octets-to-string data :start start :end end))) 161 | (parse-integer-from-bv (data &key (start 0) end) 162 | (declare (type fixnum start end) 163 | (optimize (speed 3) (safety 2))) 164 | (when (= start end) 165 | (return-from parse-integer-from-bv nil)) 166 | (do ((i start (1+ i)) 167 | (res 0)) 168 | ((= i end) res) 169 | (declare (type fixnum i res)) 170 | (let ((code (aref data i))) 171 | (declare (type fixnum code) 172 | #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note)) 173 | (unless (<= #.(char-code #\0) code #.(char-code #\9)) 174 | (error 'uri-invalid-port 175 | :data data :position i)) 176 | 177 | (setq res (+ (* res 10) 178 | (- code #.(char-code #\0)))))))) 179 | (block nil 180 | (flet ((parse-from-path (data start) 181 | (declare (type simple-byte-vector data) 182 | (type fixnum start)) 183 | (multiple-value-bind (data start end) 184 | (parse-path-byte-vector data :start start :end parse-end) 185 | (declare (type fixnum start end)) 186 | (unless (= start end) 187 | (setq path (subseq* data start end))) 188 | (multiple-value-bind (parsed-data path-start path-end) 189 | (parse-query-byte-vector data :start end :end parse-end) 190 | (when parsed-data 191 | (setq query (subseq* parsed-data (the fixnum path-start) (the fixnum path-end)))) 192 | (multiple-value-bind (data start end) 193 | (parse-fragment-byte-vector data :start (or path-end end) :end parse-end) 194 | (when data 195 | (setq fragment (subseq* data (the fixnum start) (the fixnum end))))))))) 196 | (multiple-value-bind (parsed-data start end got-scheme) 197 | (parse-scheme-byte-vector data :start parse-start :end parse-end) 198 | (if parsed-data 199 | (locally (declare (type fixnum start end)) 200 | (setq scheme 201 | (or got-scheme 202 | (let ((data-str (make-string (- end start)))) 203 | (do ((i start (1+ i)) 204 | (j 0 (1+ j))) 205 | ((= i end) data-str) 206 | (let ((code (aref data i))) 207 | (setf (aref data-str j) 208 | (code-char 209 | (if (<= #.(char-code #\A) code #.(char-code #\Z)) 210 | (+ code 32) 211 | code)))))))) 212 | (incf end)) ;eat the trailing #\: 213 | (setq scheme nil 214 | end parse-start)) 215 | (locally (declare (type fixnum end)) 216 | (unless (= end parse-end) 217 | (multiple-value-bind (parsed-data userinfo-start userinfo-end 218 | host-start host-end port-start port-end non-ascii) 219 | (parse-authority-byte-vector data :start end :end parse-end) 220 | (when parsed-data 221 | (locally (declare (type simple-byte-vector data) 222 | (type fixnum host-start host-end)) 223 | (when userinfo-start 224 | (setq userinfo (subseq* data (the fixnum userinfo-start) (the fixnum userinfo-end)))) 225 | (unless (= host-start host-end) 226 | (setq host (subseq* data host-start host-end)) 227 | (when non-ascii 228 | (setq host (idna:to-ascii host)))) 229 | (cond 230 | (port-start 231 | (setq port 232 | (parse-integer-from-bv data :start port-start :end port-end))) 233 | (scheme 234 | (setq port (scheme-default-port scheme)))))) 235 | (locally (declare (optimize (safety 0))) 236 | (parse-from-path data (or port-end host-end (1+ end))))))))))) 237 | (values scheme userinfo host port path query fragment))) 238 | 239 | (defmacro defun-with-array-parsing (name (char p data start end &rest other-args) &body body) 240 | (with-gensyms (args type form env) 241 | (flet ((intern-proper-case (a b) 242 | (intern (format nil "~:@(~a-~a~)" a b)))) 243 | (let ((fn-for-string (intern-proper-case name :string)) 244 | (fn-for-byte-vector (intern-proper-case name :byte-vector))) 245 | `(progn 246 | (defun ,name (,data &rest ,args &key ,start ,end) 247 | (declare (ignore ,start ,end)) 248 | (etypecase ,data 249 | (simple-string (apply ',(intern-proper-case name :string) data ,args)) 250 | (simple-byte-vector (apply ',(intern-proper-case name :byte-vector) data ,args)))) 251 | 252 | #+(or sbcl openmcl cmu allegro) 253 | (define-compiler-macro ,name (&whole ,form &environment ,env ,data &rest ,args) 254 | (declare (ignore ,args)) 255 | (let ((,type (cond 256 | ((constantp ,data) (type-of ,data)) 257 | ((symbolp ,data) (cdr (assoc 'type (nth-value 2 (variable-information ,data ,env)))))))) 258 | (cond 259 | ((null ,type) ,form) 260 | ((subtypep ,type 'simple-string) `(,',fn-for-string ,@(cdr ,form))) 261 | ((subtypep ,type 'simple-byte-vector) `(,',fn-for-byte-vector ,@(cdr ,form))) 262 | (t ,form)))) 263 | 264 | (defun ,fn-for-string (,data &key (,start 0) (,end (length ,data)) ,@other-args) 265 | (declare (type simple-string ,data) 266 | (type fixnum ,start ,end) 267 | (optimize (speed 3) (safety 2))) 268 | (macrolet ((char=* (char1 char2) 269 | `(char= ,char1 ,char2)) 270 | (char-code* (char) 271 | `(char-code ,char)) 272 | (scheme-char-p* (char) 273 | `(scheme-char-p ,char)) 274 | (standard-alpha-char-p* (char) 275 | `(standard-alpha-char-p ,char))) 276 | (block ,name 277 | (with-string-parsing (,char ,p ,data ,start ,end) 278 | (declare (type fixnum ,p)) 279 | ,@body)))) 280 | 281 | (defun ,fn-for-byte-vector (,data &key (,start 0) (,end (length ,data)) ,@other-args) 282 | (declare (type simple-byte-vector ,data) 283 | (type fixnum ,start ,end) 284 | (optimize (speed 3) (safety 2))) 285 | (macrolet ((char=* (byte char) 286 | `(= ,byte ,(char-code char))) 287 | (char-code* (byte) 288 | byte) 289 | (scheme-char-p* (byte) 290 | `(scheme-byte-p ,byte)) 291 | (standard-alpha-char-p* (byte) 292 | `(standard-alpha-byte-p ,byte))) 293 | (block ,name 294 | (with-byte-array-parsing (,char ,p ,data ,start ,end) 295 | (declare (type fixnum ,p)) 296 | ,@body))))))))) 297 | 298 | (defun scheme-char-p (char) 299 | (declare (type character char) 300 | (optimize (speed 3) (safety 0))) 301 | (or (standard-alphanumeric-p char) 302 | (char= char #\+) 303 | (char= char #\-) 304 | (char= char #\.))) 305 | 306 | (defun scheme-byte-p (byte) 307 | (declare (type (unsigned-byte 8) byte) 308 | (optimize (speed 3) (safety 0))) 309 | (or (standard-alphanumeric-byte-p byte) 310 | (= byte (char-code #\+)) 311 | (= byte (char-code #\-)) 312 | (= byte (char-code #\.)))) 313 | 314 | (defun-with-array-parsing parse-scheme (char p data start end) 315 | (parsing-scheme-start 316 | (when (or (char=* char #\h) 317 | (char=* char #\H)) 318 | (goto parsing-H)) 319 | (unless (standard-alpha-char-p* char) 320 | (return-from parse-scheme nil)) 321 | (gonext)) 322 | 323 | (parsing-scheme 324 | (cond 325 | ((char=* char #\:) 326 | (return-from parse-scheme 327 | (values data start p))) 328 | ((scheme-char-p* char) 329 | (redo)) 330 | (t 331 | (return-from parse-scheme nil)))) 332 | 333 | (parsing-H 334 | (if (or (char=* char #\t) 335 | (char=* char #\T)) 336 | (goto parsing-HT) 337 | (goto parsing-scheme 0))) 338 | 339 | (parsing-HT 340 | (if (or (char=* char #\t) 341 | (char=* char #\T)) 342 | (goto parsing-HTT) 343 | (goto parsing-scheme 0))) 344 | 345 | (parsing-HTT 346 | (if (or (char=* char #\p) 347 | (char=* char #\P)) 348 | (goto parsing-HTTP) 349 | (goto parsing-scheme 0))) 350 | 351 | (parsing-HTTP 352 | (cond 353 | ((char=* char #\:) 354 | (return-from parse-scheme 355 | (values data start p "http"))) 356 | ((or (char=* char #\s) 357 | (char=* char #\S)) 358 | (goto parsing-HTTPS)) 359 | (t (goto parsing-scheme 0)))) 360 | 361 | (parsing-HTTPS 362 | (if (char=* char #\:) 363 | (return-from parse-scheme 364 | (values data start p "https")) 365 | (goto parsing-scheme 0))) 366 | 367 | (:eof (return-from parse-scheme nil))) 368 | 369 | (defun-with-array-parsing parse-authority (char p data start end 370 | &aux 371 | (authority-mark nil) 372 | (colon-mark nil) 373 | userinfo-start 374 | userinfo-end 375 | host-start 376 | host-end 377 | port-start 378 | port-end 379 | non-ascii) 380 | (parsing-first 381 | (cond 382 | ((char=* char #\/) 383 | (gonext)) 384 | (t 385 | (return-from parse-authority 386 | (values data nil nil start start nil nil non-ascii))))) 387 | 388 | (parsing-authority-starting 389 | (unless (char=* char #\/) 390 | (return-from parse-authority 391 | (values data nil nil start start nil nil non-ascii))) 392 | (setq authority-mark (1+ p)) 393 | (gonext)) 394 | 395 | (parsing-authority-start 396 | (if (char=* char #\[) 397 | (goto parsing-ipliteral) 398 | (gonext 0))) 399 | 400 | ;; parsing host or userinfo 401 | (parsing-authority 402 | (cond 403 | ((char=* char #\:) 404 | (setq colon-mark p) 405 | (redo)) 406 | ((char=* char #\@) 407 | (when userinfo-start 408 | (error 'uri-malformed-string :data data :position p)) 409 | (setq userinfo-start authority-mark 410 | userinfo-end p) 411 | (setq authority-mark (1+ p) 412 | colon-mark nil) 413 | (redo)) 414 | ((or (char=* char #\/) 415 | (char=* char #\?) 416 | (char=* char #\#)) 417 | (go :eof)) 418 | ((let ((code (char-code* char))) 419 | (and (<= 0 code 127) (= (aref +uri-char+ code) 1))) 420 | (redo)) 421 | ((< 127 (char-code* char)) 422 | (setq non-ascii t) 423 | (redo)) 424 | (t (error 'uri-malformed-string 425 | :data data :position p)))) 426 | 427 | (parsing-ipliteral 428 | (if (char=* char #\]) 429 | (goto parsing-authority) 430 | (redo))) 431 | 432 | (:eof 433 | (unless authority-mark 434 | (return-from parse-authority 435 | (values data 436 | nil nil 437 | start start 438 | nil nil non-ascii))) 439 | (if colon-mark 440 | (setq host-start authority-mark 441 | host-end colon-mark 442 | port-start (1+ colon-mark) 443 | port-end p) 444 | (setq host-start authority-mark 445 | host-end p)) 446 | (return-from parse-authority 447 | (values data 448 | userinfo-start userinfo-end 449 | host-start host-end 450 | port-start port-end non-ascii)))) 451 | 452 | (defun path-char-p (char) 453 | (declare (type character char) 454 | (optimize (speed 3) (safety 0))) 455 | (let ((byte (char-code char))) 456 | (and (< byte 128) 457 | (or (= (aref +uri-char+ byte) 1) 458 | (= byte #.(char-code #\/)))))) 459 | 460 | (defun path-byte-p (byte) 461 | (declare (type (unsigned-byte 8) byte) 462 | (optimize (speed 3) (safety 0))) 463 | (or (= (aref +uri-char+ byte) 1) 464 | (= byte (char-code #\/)))) 465 | 466 | (defun query-char-p (char) 467 | (declare (type character char) 468 | (optimize (speed 3) (safety 0))) 469 | (or (path-char-p char) 470 | (char= char #\?))) 471 | 472 | (defun query-byte-p (byte) 473 | (declare (type (unsigned-byte 8) byte) 474 | (optimize (speed 3) (safety 0))) 475 | (or (path-byte-p byte) 476 | (= byte (char-code #\?)))) 477 | 478 | (defmacro parse-until-string (delimiters data &key start end test) 479 | (with-gensyms (p char) 480 | `(block nil 481 | (progn 482 | (do ((,p ,start (1+ ,p))) 483 | ((= ,p ,end) 484 | (values ,data ,start ,end)) 485 | (declare (type fixnum ,p)) 486 | (let ((,char (aref ,data ,p))) 487 | (declare (type character ,char)) 488 | (when (or ,@(loop for delim in delimiters 489 | collect `(char= ,delim ,char))) 490 | (return (values ,data ,start ,p))) 491 | ,@(when test 492 | `((unless (funcall ,test ,char) 493 | (error 'uri-malformed-string 494 | :data ,data :position ,p)))))))))) 495 | 496 | (defmacro parse-until-byte-vector (delimiters data &key start end test) 497 | (with-gensyms (p byte) 498 | `(block nil 499 | (progn 500 | (do ((,p ,start (1+ ,p))) 501 | ((= ,p ,end) 502 | (values ,data ,start ,end)) 503 | (declare (type fixnum ,p)) 504 | (let ((,byte (aref ,data ,p))) 505 | (declare (type (unsigned-byte 8) ,byte)) 506 | (when (or ,@(loop for delim in delimiters 507 | collect `(= ,(char-code delim) ,byte))) 508 | (return (values ,data ,start ,p))) 509 | ,@(when test 510 | `((unless (funcall ,test ,byte) 511 | (error 'uri-malformed-string 512 | :data ,data :position ,p)))))))))) 513 | 514 | (defun parse-path (data &key (start 0) (end (length data))) 515 | (etypecase data 516 | (simple-string 517 | (parse-path-string data :start start :end end)) 518 | (simple-byte-vector 519 | (parse-path-byte-vector data :start start :end end)))) 520 | 521 | (defun parse-path-string (data &key (start 0) (end (length data))) 522 | (declare (type simple-string data) 523 | (optimize (speed 3) (safety 2)) 524 | #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note)) 525 | (parse-until-string (#\? #\#) data :start start :end end)) 526 | 527 | (defun parse-path-byte-vector (data &key (start 0) (end (length data))) 528 | (declare (type simple-byte-vector data) 529 | (optimize (speed 3) (safety 2)) 530 | #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note)) 531 | (parse-until-byte-vector (#\? #\#) data :start start :end end)) 532 | 533 | (defun parse-query (data &key (start 0) (end (length data))) 534 | (etypecase data 535 | (string 536 | (parse-query-string data :start start :end end)) 537 | (simple-byte-vector 538 | (parse-query-byte-vector data :start start :end end)))) 539 | 540 | #+(or sbcl openmcl cmu allegro) 541 | (define-compiler-macro parse-query (&whole form &environment env data &key start end) 542 | (declare (ignore start end)) 543 | (let ((type (cond 544 | ((constantp data) (type-of data)) 545 | ((symbolp data) (cdr (assoc 'type (nth-value 2 (variable-information data env)))))))) 546 | (cond 547 | ((null type) form) 548 | ((subtypep type 'simple-string) `(parse-query-string ,@(cdr form))) 549 | ((subtypep type 'simple-byte-vector) `(parse-query-byte-vector ,@(cdr form))) 550 | (t form)))) 551 | 552 | (defun parse-query-string (data &key (start 0) (end (length data))) 553 | (declare (type simple-string data) 554 | (type fixnum start end) 555 | (optimize (speed 3) (safety 2))) 556 | (let ((?-pos (position #\? data :start start :end end))) 557 | (when ?-pos 558 | (parse-until-string (#\#) data :start (1+ (the fixnum ?-pos)) :end end)))) 559 | 560 | (defun parse-query-byte-vector (data &key (start 0) (end (length data))) 561 | (declare (type simple-byte-vector data) 562 | (type fixnum start end) 563 | (optimize (speed 3) (safety 2))) 564 | (let ((?-pos (position #.(char-code #\?) data :start start :end end))) 565 | (when ?-pos 566 | (parse-until-byte-vector (#\#) data :start (1+ (the fixnum ?-pos)) :end end)))) 567 | 568 | (defun parse-fragment (data &key (start 0) (end (length data))) 569 | (etypecase data 570 | (string (parse-fragment-string data :start start :end end)) 571 | (simple-byte-vector (parse-fragment-byte-vector data :start start :end end)))) 572 | 573 | #+(or sbcl openmcl cmu allegro) 574 | (define-compiler-macro parse-fragment (&whole form &environment env data &key start end) 575 | (declare (ignore start end)) 576 | (let ((type (cond 577 | ((constantp data) (type-of data)) 578 | ((symbolp data) (cdr (assoc 'type (nth-value 2 (variable-information data env)))))))) 579 | (cond 580 | ((null type) form) 581 | ((subtypep type 'simple-string) `(parse-fragment-string ,@(cdr form))) 582 | ((subtypep type 'simple-byte-vector) `(parse-fragment-byte-vector ,@(cdr form))) 583 | (t form)))) 584 | 585 | (defun parse-fragment-string (data &key (start 0) (end (length data))) 586 | (declare (type simple-string data) 587 | (type fixnum start end) 588 | (optimize (speed 3) (safety 2))) 589 | (let ((|#-pos| (position #\# data 590 | :start start 591 | :end end))) 592 | (when |#-pos| 593 | (values data (1+ (the fixnum |#-pos|)) end)))) 594 | 595 | (defun parse-fragment-byte-vector (data &key (start 0) (end (length data))) 596 | (declare (type simple-byte-vector data) 597 | (type fixnum start end) 598 | (optimize (speed 3) (safety 2))) 599 | (let ((|#-pos| (position #\# data 600 | :start start 601 | :end end 602 | :key #'code-char))) 603 | (when |#-pos| 604 | (values data (1+ (the fixnum |#-pos|)) end)))) 605 | --------------------------------------------------------------------------------