├── .gitignore ├── README.md ├── cl-oneliner.asd ├── cl-oneliner.lisp ├── package.lisp ├── test ├── oneliner.lisp └── utils.lisp └── utils.lisp /.gitignore: -------------------------------------------------------------------------------- 1 | \#*# 2 | *~ 3 | .DS_Store -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | One-liner 2 | ============== 3 | 4 | Extracts a one-liner from a piece of text. 5 | 6 | ### How? 7 | 8 | 1. Counts word occurences 9 | 2. Given each word, count word occurences immediately after this word 10 | 3. Construct sentence by taking 1 and 2 recursively 11 | 4. Avoids repeating a word to avoid infinite loops 12 | 5. Removes all punctuation marks and convert to lower-case 13 | 14 | ### Why? 15 | 16 | Why not. Are you not curious about the results? 17 | 18 | ### Example 19 | 20 | ``` 21 | (ql:quickload 'cl-oneliner) 22 | (cl-oneliner:oneliner "whatever wall of text you are too lazy too read") 23 | ``` 24 | 25 | In the functional fashion of recursion, when given this README as an example, it produces the following: 26 | 27 | ``` 28 | "word occurences given each" 29 | ``` 30 | 31 | Isn't that just poetic? 32 | 33 | *Idea sparked by @flockonus* 34 | -------------------------------------------------------------------------------- /cl-oneliner.asd: -------------------------------------------------------------------------------- 1 | ;;;; cl-oneliner.asd 2 | 3 | (asdf:defsystem #:cl-oneliner 4 | :serial t 5 | :description "Given a piece of text, summarize it with a one-liner" 6 | :author "mck-" 7 | :license "wtfpl" 8 | :version "0.1.0" 9 | :depends-on (#:lisp-unit #:split-sequence #:cl-ppcre) 10 | :components ((:file "package") 11 | (:file "utils") 12 | (:file "cl-oneliner") 13 | (:module :test 14 | :components 15 | ((:file "utils"))))) 16 | -------------------------------------------------------------------------------- /cl-oneliner.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Oneliner takes a piece of text (article) and extracts a oneliner 2 | ;;;; 1. Take the most frequent word 3 | ;;;; 2. Take the most frequent next word 4 | ;;;; 3. Repeat until no more words come next (avoid repetition of words) 5 | 6 | (in-package #:cl-oneliner) 7 | 8 | (defun oneliner (string) 9 | "Given a string, summarize the piece of text in a oneliner" 10 | (let ((alist (count-words string))) 11 | (labels ((iter (alist word oneliner) 12 | "One iteration will take the word from alist and append to the sentence, while recursively taking the most frequent next word that occurs after" 13 | (if (or (null alist) (null word)) 14 | (string-trim '(#\Space) oneliner) 15 | (iter (remove-word alist word) 16 | (get-next-word alist word) 17 | (concatenate 'string oneliner word " "))))) 18 | (iter alist (car (alist-most-frequent alist)) "")))) 19 | 20 | (defun get-next-word (alist word) 21 | (alist-most-frequent-next (assoc word alist :test #'equal))) 22 | 23 | (defun remove-word (alist word) 24 | "Given an alist-count and current word, return alist without word -- and make sure they are also removed from all next counts" 25 | (cond ((null alist) nil) 26 | ((string= (caar alist) word) 27 | (remove-word (cdr alist) word)) 28 | (t 29 | (cons (list (caar alist) 30 | (cons 'NEXT (remove word (cdadar alist) :test #'equal :key #'car)) 31 | (caddar alist)) 32 | (remove-word (cdr alist) word))))) 33 | -------------------------------------------------------------------------------- /package.lisp: -------------------------------------------------------------------------------- 1 | ;;;; package.lisp 2 | 3 | (defpackage #:cl-oneliner 4 | (:use #:cl #:lisp-unit #:split-sequence #:cl-ppcre) 5 | (:export :oneliner)) 6 | -------------------------------------------------------------------------------- /test/oneliner.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Unit tests for oneliner algorithm 2 | 3 | (in-package #:cl-oneliner) 4 | 5 | (define-test short-sentence 6 | "Test the algorithm on short sentence" 7 | (:tag :oneliner) 8 | (assert-equal "" (oneliner "")) 9 | (assert-equal "hello world" (oneliner "hello world hello")) 10 | (assert-equal "this test and not" (oneliner "this test is this test and not this one, this test and!"))) 11 | -------------------------------------------------------------------------------- /test/utils.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Unit tests for utilities 2 | 3 | (in-package #:cl-oneliner) 4 | 5 | (define-test alist-count 6 | "Test the alist-count util" 7 | (:tag :util) 8 | (assert-equal 1 (get-count "this" (alist-count "this is a test"))) 9 | (assert-equal 2 (get-count "this" (alist-count "this is this"))) 10 | (assert-equal 3 (get-count "this" (alist-count "not this but this is this"))) 11 | (assert-equal 4 (get-count "this" (alist-count "this THIS This this"))) 12 | (assert-equal 5 (get-count "this" (alist-count "this, THIS This. this; THIS!")))) 13 | 14 | (define-test alist-highest-occurence 15 | "Test return of alist-most-frequent, to include next counts too" 16 | (:tag :util) 17 | (assert-equal '(("test" . 1)("is" . 1)) 18 | (val (list (alist-most-frequent (count-words "this is this test"))) 19 | "this" 'next)) 20 | (assert-equal 2 (val (list (alist-most-frequent (count-words "Is this is this test is?"))) 21 | "is" 'next "this")) 22 | (assert-equal '(("testing" . 1)) 23 | (val (list (alist-most-frequent (count-words "TEST test, testing; test!"))) 24 | "test" 'next))) 25 | 26 | (define-test next-counts 27 | "Test counting of next words, given a word and a string" 28 | (:tag :util) 29 | (assert-equal 2 (aval "is" (next-counts "this" "this is a test and this is another test"))) 30 | (assert-equal 1 (aval "a" (next-counts "is" "this is a test and this is another test"))) 31 | (assert-equal '(("and" . 1)) (next-counts "test" "this is a test and this is another test")) 32 | (assert-equal 1 (aval "and" (next-counts "test" "this is a test and this is another test")))) 33 | 34 | (define-test most-frequent-next 35 | "Test returning most frequent word coming after" 36 | (:tag :util) 37 | (assert-equal nil (alist-most-frequent-next '("this" (NEXT) (COUNT . 1)))) 38 | (assert-equal "this" (alist-most-frequent-next 39 | (alist-most-frequent 40 | (count-words "Is this is this test is?")))) 41 | (assert-equal "bar" (alist-most-frequent-next 42 | (alist-most-frequent 43 | (count-words "foo bar foo foo bar baz foo baz?"))))) 44 | -------------------------------------------------------------------------------- /utils.lisp: -------------------------------------------------------------------------------- 1 | ;;;; utils.lisp 2 | 3 | (in-package #:cl-oneliner) 4 | 5 | ;;; Custom data structure for alist count 6 | ;;; An Alist where the value is another Alist, which holds count and next 7 | ;;; ----------------- 8 | ;;; ((wordA (count . 3) 9 | ;;; (next (wordB . 3))) 10 | ;;; (wordB (count . 5) 11 | ;;; (next (wordC . 2) 12 | ;;; (wordD . 3)))) 13 | ;;; ----------------- 14 | ;;; This example shows a hashcount of words, and the word occurance of the words after the word 15 | ;;; - wordA occured 3 times, always followed by wordB 16 | ;;; - wordB occured 5 times, two times followed by wordC and three times by wordD 17 | 18 | ;;; Creating the alist-count 19 | 20 | (defun count-words (string) 21 | "Given a string, return an alist which counts the words and the next words that come after it" 22 | (alist-next-count string (alist-count string))) 23 | 24 | (defun alist-count (string) 25 | "Given a string, return an Alist counting the words" 26 | (let ((words (words-sorted string))) 27 | (labels ((iter (words alist &optional (count 1)) 28 | (if (null words) alist 29 | (let ((word (car words)) 30 | (next (cadr words))) 31 | (if (string= word next) 32 | (iter (cdr words) alist (1+ count)) 33 | (iter (cdr words) (acons word `((count . ,count)) alist))))))) 34 | (iter words '())))) 35 | 36 | (defun alist-next-count (string alist) 37 | "Given the original string and the alist resulting from alist-count, count next words" 38 | (labels ((iter (alist ans) 39 | (if (null alist) ans 40 | (iter (cdr alist) 41 | (cons (list (caar alist) 42 | `(next ,@(next-counts (caar alist) string)) 43 | (cadar alist)) 44 | ans))))) 45 | (iter alist '()))) 46 | 47 | (defun next-counts (word string) 48 | "Given a word and the string, return list of (word . count)" 49 | (let ((words (split-words string))) 50 | (labels ((iter (words alist) 51 | (if (null words) alist 52 | (let* ((cur (car words)) 53 | (next (cadr words)) 54 | (freq (aval next alist))) 55 | (if (and next (string= cur word) (not (string= next word))) 56 | (iter (cdr words) (acons next (if freq (1+ freq) 1) alist)) 57 | (iter (cdr words) alist)))))) 58 | (iter words '())))) 59 | 60 | (defun simplify-word (word) 61 | "Given a word, put it to lower-case and remove all symbols" 62 | (string-downcase (regex-replace-all "[!-@]" word ""))) 63 | 64 | (defun split-words (sentence) 65 | (mapcar #'simplify-word 66 | (split-sequence #\Space sentence))) 67 | 68 | (defun words-sorted (sentence) 69 | (sort (split-words sentence) #'string<)) 70 | 71 | ;;; Reader methods for alist-count 72 | 73 | (defun aval (key alist) 74 | "Given alist and key, return value" 75 | (cdr (assoc key alist :test #'equal))) 76 | 77 | (defmacro val-reversed (alist &rest keys) 78 | "Given an alist, and a list of keys, retrieve value dot-notation style (reversed)" 79 | (if (null keys) alist 80 | `(aval ,(car keys) (val-reversed ,alist ,@(cdr keys))))) 81 | 82 | (defmacro val (alist &rest keys) 83 | "Given an alist, and a list of keys, retrieve value dot-notation style" 84 | `(val-reversed ,alist ,@(reverse keys))) 85 | 86 | (defun get-count (word alist-count) 87 | "Given a word and an alist-count, return number of occurence" 88 | (cdar (aval word alist-count))) 89 | 90 | (defun alist-most-frequent (alist-count) 91 | "Given an alist-count, return most frequent item" 92 | (reduce (lambda (x y) (if (> (aval 'count (cdr x)) (aval 'count (cdr y))) x y )) alist-count)) 93 | 94 | (defun alist-most-frequent-next (most-frequent) 95 | "Given an object (from alist-most-frequent), return the word that occurs most frequently as next word" 96 | (let ((next-words (val (cdr most-frequent) 'next))) 97 | (when next-words 98 | (car (reduce (lambda (x y) (if (> (cdr x) (cdr y)) x y)) 99 | next-words))))) 100 | --------------------------------------------------------------------------------