├── .gitignore ├── .lein-env.example ├── LICENSE ├── README.md ├── d ├── dev ├── transactor.example.properties └── user.clj ├── doc └── intro.md ├── project.clj ├── resources ├── genres.edn ├── public │ └── .gitkeep ├── sample.edn ├── schema.edn └── templates │ └── .gitkeep ├── src └── kevin │ ├── core.clj │ ├── expunge.clj │ ├── handler.clj │ ├── loader.clj │ ├── routes │ └── home.clj │ ├── search.clj │ ├── system.clj │ ├── util.clj │ └── views.clj └── test └── kevin ├── core_test.clj ├── loader_test.clj ├── search_test.clj └── util_test.clj /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | pom.xml 5 | pom.xml.asc 6 | *.jar 7 | *.class 8 | /.lein-* 9 | /.nrepl-port 10 | /resources/*.list.gz 11 | config/*.properties 12 | data 13 | /.bundle 14 | /build 15 | .sass-cache 16 | .cache 17 | .DS_Store 18 | .gem 19 | /source 20 | dev/transactor.properties 21 | 22 | # Ignore generated assets. Generate with ./d 23 | resources/public/images 24 | resources/public/stylesheets 25 | resources/public/javascripts 26 | resources/templates/*.html 27 | -------------------------------------------------------------------------------- /.lein-env.example: -------------------------------------------------------------------------------- 1 | {:datomic-db-url "datomic:free://localhost:4334/movies"} 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC 2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM 3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 4 | 5 | 1. DEFINITIONS 6 | 7 | "Contribution" means: 8 | 9 | a) in the case of the initial Contributor, the initial code and 10 | documentation distributed under this Agreement, and 11 | 12 | b) in the case of each subsequent Contributor: 13 | 14 | i) changes to the Program, and 15 | 16 | ii) additions to the Program; 17 | 18 | where such changes and/or additions to the Program originate from and are 19 | distributed by that particular Contributor. A Contribution 'originates' from 20 | a Contributor if it was added to the Program by such Contributor itself or 21 | anyone acting on such Contributor's behalf. Contributions do not include 22 | additions to the Program which: (i) are separate modules of software 23 | distributed in conjunction with the Program under their own license 24 | agreement, and (ii) are not derivative works of the Program. 25 | 26 | "Contributor" means any person or entity that distributes the Program. 27 | 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are 29 | necessarily infringed by the use or sale of its Contribution alone or when 30 | combined with the Program. 31 | 32 | "Program" means the Contributions distributed in accordance with this 33 | Agreement. 34 | 35 | "Recipient" means anyone who receives the Program under this Agreement, 36 | including all Contributors. 37 | 38 | 2. GRANT OF RIGHTS 39 | 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to 42 | reproduce, prepare derivative works of, publicly display, publicly perform, 43 | distribute and sublicense the Contribution of such Contributor, if any, and 44 | such derivative works, in source code and object code form. 45 | 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise 49 | transfer the Contribution of such Contributor, if any, in source code and 50 | object code form. This patent license shall apply to the combination of the 51 | Contribution and the Program if, at the time the Contribution is added by the 52 | Contributor, such addition of the Contribution causes such combination to be 53 | covered by the Licensed Patents. The patent license shall not apply to any 54 | other combinations which include the Contribution. No hardware per se is 55 | licensed hereunder. 56 | 57 | c) Recipient understands that although each Contributor grants the licenses 58 | to its Contributions set forth herein, no assurances are provided by any 59 | Contributor that the Program does not infringe the patent or other 60 | intellectual property rights of any other entity. Each Contributor disclaims 61 | any liability to Recipient for claims brought by any other entity based on 62 | infringement of intellectual property rights or otherwise. As a condition to 63 | exercising the rights and licenses granted hereunder, each Recipient hereby 64 | assumes sole responsibility to secure any other intellectual property rights 65 | needed, if any. For example, if a third party patent license is required to 66 | allow Recipient to distribute the Program, it is Recipient's responsibility 67 | to acquire that license before distributing the Program. 68 | 69 | d) Each Contributor represents that to its knowledge it has sufficient 70 | copyright rights in its Contribution, if any, to grant the copyright license 71 | set forth in this Agreement. 72 | 73 | 3. REQUIREMENTS 74 | 75 | A Contributor may choose to distribute the Program in object code form under 76 | its own license agreement, provided that: 77 | 78 | a) it complies with the terms and conditions of this Agreement; and 79 | 80 | b) its license agreement: 81 | 82 | i) effectively disclaims on behalf of all Contributors all warranties and 83 | conditions, express and implied, including warranties or conditions of title 84 | and non-infringement, and implied warranties or conditions of merchantability 85 | and fitness for a particular purpose; 86 | 87 | ii) effectively excludes on behalf of all Contributors all liability for 88 | damages, including direct, indirect, special, incidental and consequential 89 | damages, such as lost profits; 90 | 91 | iii) states that any provisions which differ from this Agreement are offered 92 | by that Contributor alone and not by any other party; and 93 | 94 | iv) states that source code for the Program is available from such 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on 96 | or through a medium customarily used for software exchange. 97 | 98 | When the Program is made available in source code form: 99 | 100 | a) it must be made available under this Agreement; and 101 | 102 | b) a copy of this Agreement must be included with each copy of the Program. 103 | 104 | Contributors may not remove or alter any copyright notices contained within 105 | the Program. 106 | 107 | Each Contributor must identify itself as the originator of its Contribution, 108 | if any, in a manner that reasonably allows subsequent Recipients to identify 109 | the originator of the Contribution. 110 | 111 | 4. COMMERCIAL DISTRIBUTION 112 | 113 | Commercial distributors of software may accept certain responsibilities with 114 | respect to end users, business partners and the like. While this license is 115 | intended to facilitate the commercial use of the Program, the Contributor who 116 | includes the Program in a commercial product offering should do so in a 117 | manner which does not create potential liability for other Contributors. 118 | Therefore, if a Contributor includes the Program in a commercial product 119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend 120 | and indemnify every other Contributor ("Indemnified Contributor") against any 121 | losses, damages and costs (collectively "Losses") arising from claims, 122 | lawsuits and other legal actions brought by a third party against the 123 | Indemnified Contributor to the extent caused by the acts or omissions of such 124 | Commercial Contributor in connection with its distribution of the Program in 125 | a commercial product offering. The obligations in this section do not apply 126 | to any claims or Losses relating to any actual or alleged intellectual 127 | property infringement. In order to qualify, an Indemnified Contributor must: 128 | a) promptly notify the Commercial Contributor in writing of such claim, and 129 | b) allow the Commercial Contributor tocontrol, and cooperate with the 130 | Commercial Contributor in, the defense and any related settlement 131 | negotiations. The Indemnified Contributor may participate in any such claim 132 | at its own expense. 133 | 134 | For example, a Contributor might include the Program in a commercial product 135 | offering, Product X. That Contributor is then a Commercial Contributor. If 136 | that Commercial Contributor then makes performance claims, or offers 137 | warranties related to Product X, those performance claims and warranties are 138 | such Commercial Contributor's responsibility alone. Under this section, the 139 | Commercial Contributor would have to defend claims against the other 140 | Contributors related to those performance claims and warranties, and if a 141 | court requires any other Contributor to pay any damages as a result, the 142 | Commercial Contributor must pay those damages. 143 | 144 | 5. NO WARRANTY 145 | 146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON 147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER 148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR 149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A 150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the 151 | appropriateness of using and distributing the Program and assumes all risks 152 | associated with its exercise of rights under this Agreement , including but 153 | not limited to the risks and costs of program errors, compliance with 154 | applicable laws, damage to or loss of data, programs or equipment, and 155 | unavailability or interruption of operations. 156 | 157 | 6. DISCLAIMER OF LIABILITY 158 | 159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY 160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, 161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION 162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY 166 | OF SUCH DAMAGES. 167 | 168 | 7. GENERAL 169 | 170 | If any provision of this Agreement is invalid or unenforceable under 171 | applicable law, it shall not affect the validity or enforceability of the 172 | remainder of the terms of this Agreement, and without further action by the 173 | parties hereto, such provision shall be reformed to the minimum extent 174 | necessary to make such provision valid and enforceable. 175 | 176 | If Recipient institutes patent litigation against any entity (including a 177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself 178 | (excluding combinations of the Program with other software or hardware) 179 | infringes such Recipient's patent(s), then such Recipient's rights granted 180 | under Section 2(b) shall terminate as of the date such litigation is filed. 181 | 182 | All Recipient's rights under this Agreement shall terminate if it fails to 183 | comply with any of the material terms or conditions of this Agreement and 184 | does not cure such failure in a reasonable period of time after becoming 185 | aware of such noncompliance. If all Recipient's rights under this Agreement 186 | terminate, Recipient agrees to cease use and distribution of the Program as 187 | soon as reasonably practicable. However, Recipient's obligations under this 188 | Agreement and any licenses granted by Recipient relating to the Program shall 189 | continue and survive. 190 | 191 | Everyone is permitted to copy and distribute copies of this Agreement, but in 192 | order to avoid inconsistency the Agreement is copyrighted and may only be 193 | modified in the following manner. The Agreement Steward reserves the right to 194 | publish new versions (including revisions) of this Agreement from time to 195 | time. No one other than the Agreement Steward has the right to modify this 196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The 197 | Eclipse Foundation may assign the responsibility to serve as the Agreement 198 | Steward to a suitable separate entity. Each new version of the Agreement will 199 | be given a distinguishing version number. The Program (including 200 | Contributions) may always be distributed subject to the version of the 201 | Agreement under which it was received. In addition, after a new version of 202 | the Agreement is published, Contributor may elect to distribute the Program 203 | (including its Contributions) under the new version. Except as expressly 204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or 205 | licenses to the intellectual property of any Contributor under this 206 | Agreement, whether expressly, by implication, estoppel or otherwise. All 207 | rights in the Program not expressly granted under this Agreement are 208 | reserved. 209 | 210 | This Agreement is governed by the laws of the State of Washington and the 211 | intellectual property laws of the United States of America. No party to this 212 | Agreement will bring a legal action under this Agreement more than one year 213 | after the cause of action arose. Each party waives its rights to a jury trial 214 | in any resulting litigation. 215 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Kevin Bacon's favourite reposotory 2 | ================================== 3 | 4 | How far is an actor from Kevin Bacon? 5 | 6 | ## Setup 7 | 8 | 1. Make sure leiningen is installed 9 | 2. `brew install datomic` 10 | 3. Copy .lein-env.example to .lein-env 11 | 4. Copy dev/transactor.example.properties to dev/transactor.properties 12 | 5. In another pane, run `datomic-transactor $PWD/dev/transactor.properties` 13 | 6. Start a repl with `lein repl` 14 | 7. Within the repl, run `(go)` 15 | 16 | You can now visit http://localhost:3000 to see the web app. You also now have an 17 | empty datomic database. 18 | 19 | ## Import 20 | 21 | 1. At a REPL, after you've run `(go)`, run the following: 22 | 23 | (import-sample-data) 24 | -------------------------------------------------------------------------------- /d: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | template_dir="resources/templates/" 6 | branch="$(basename `git symbolic-ref HEAD`)" 7 | 8 | git co design 9 | middleman build 10 | git co $branch 11 | 12 | rm -Rf resources/public/* 13 | cp -R build/* resources/public 14 | 15 | mv resources/public/*.html $template_dir 16 | -------------------------------------------------------------------------------- /dev/transactor.example.properties: -------------------------------------------------------------------------------- 1 | ################################################################ 2 | protocol=free 3 | host=localhost 4 | port=4334 5 | 6 | ## OPTIONAL #################################################### 7 | # The dev: and free: protocols typically use three ports 8 | # starting with the selected :port, but you can specify the 9 | # other ports explicitly, e.g. for virtualization environs 10 | # that do not issue contiguous ports. 11 | 12 | # h2-port=4335 13 | # h2-web-port=4336 14 | 15 | ################################################################ 16 | # See http://docs.datomic.com/capacity.html 17 | 18 | # Recommended settings for -Xmx4g, ongoing usage. 19 | # memory-index-threshold=32m 20 | # memory-index-max=128m 21 | # object-cache-max=1g 22 | 23 | # Recommended settings for -Xmx4g import jobs. 24 | # memory-index-threshold=512m 25 | # memory-index-max=1g 26 | # object-cache-max=1g 27 | 28 | # Recommended settings for -Xmx1g usage, e.g. dev laptops. 29 | memory-index-threshold=32m 30 | memory-index-max=128m 31 | object-cache-max=128m 32 | 33 | 34 | ## OPTIONAL #################################################### 35 | 36 | # Set to false to disable SSL between the peers and the transactor. 37 | # Default: true 38 | # encrypt-channel=true 39 | 40 | # Data directory is used for dev: and free: storage, and 41 | # as a temporary directory for all storages. 42 | # data-dir=data 43 | 44 | # Transactor will log here, see bin/logback.xml to configure logging. 45 | # log-dir=log 46 | 47 | # Transactor will write process pid here on startup 48 | # pid-file=transactor.pid 49 | 50 | 51 | ## OPTIONAL #################################################### 52 | # See http://docs.datomic.com/capacity.html 53 | 54 | # Soft limit on the number of concurrent writes to storage. 55 | # Default: 4, Miniumum: 2 56 | # write-concurrency=4 57 | 58 | # Soft limit on the number of concurrent reads to storage. 59 | # Default: 2 times write-concurrency, Miniumum: 2 60 | # read-concurrency=8 61 | -------------------------------------------------------------------------------- /dev/user.clj: -------------------------------------------------------------------------------- 1 | (ns user 2 | (:require [clojure.java.io :as io] 3 | [clojure.string :as str] 4 | [clojure.pprint :refer (pprint)] 5 | [clojure.repl :refer :all] 6 | [clojure.zip :as zip] 7 | [clojure.tools.namespace.repl :refer (refresh refresh-all)] 8 | [ring.server.standalone :refer (serve)] 9 | [datomic.api :as d :refer (db q)] 10 | [kevin.system :as sys] 11 | [kevin.expunge] 12 | [kevin.core :refer :all] 13 | [kevin.search :refer :all])) 14 | 15 | (defonce system nil) 16 | 17 | (defn start-server [system] 18 | (let [server (serve (get-in system [:web :handler]) (:web system))] 19 | (assoc-in system [:web :server] server))) 20 | 21 | (defn stop-server [system] 22 | (when-let [server (get-in system [:web :server])] 23 | (.stop server) 24 | (assoc-in system [:web :server] nil))) 25 | 26 | (defn init 27 | "Constructs the current development system." 28 | [] 29 | (alter-var-root #'system (constantly (sys/system)))) 30 | 31 | (defn start 32 | "Starts the current development system." 33 | [] 34 | (alter-var-root #'system sys/start) 35 | (alter-var-root #'system start-server)) 36 | 37 | (defn stop 38 | "Shuts down and destroys the current development system." 39 | [] 40 | (when system 41 | (alter-var-root #'system stop-server) 42 | (alter-var-root #'system sys/stop))) 43 | 44 | (defn go 45 | "Initializes the current development system and starts it running." 46 | [] 47 | (init) 48 | (start)) 49 | 50 | (defn reset [] 51 | (stop) 52 | (refresh :after 'user/go)) 53 | 54 | (defn import-sample-data 55 | "Transacts the sample data from `resources/sample.edn` into current 56 | system's database connection. Assumes top-level system var has an active 57 | database connection." 58 | [] 59 | { :pre (:conn (:db system)) } 60 | (let [conn (-> system :db :conn) 61 | actors (read-string (slurp "resources/sample.edn")) 62 | tx-fn (fn [[name movies]] 63 | {:db/id (d/tempid :db.part/user) 64 | :person/name name 65 | :actor/movies (mapv (fn [m] {:db/id (d/tempid :db.part/user) 66 | :movie/title m}) movies)})] 67 | 68 | @(d/transact conn (map tx-fn actors)) 69 | (kevin.loader/add-years-to-movies conn) 70 | (d/request-index conn) 71 | :ok)) 72 | 73 | (comment 74 | 75 | (reset) 76 | 77 | ;; movies an actor was in 78 | (q '[:find [?mn ...] 79 | :in $ ?n 80 | :where 81 | [?e :person/name ?n] 82 | [?e :actor/movies ?m] 83 | [?m :movie/title ?mn]] 84 | (-> system :db :conn db) 85 | "Bacon, Kevin (I)") 86 | 87 | ;; movies an actor with name like query was in 88 | (q '[:find [(pull ?e [:person/name {:actor/movies [:movie/title]}]) ...] 89 | :in $ ?q 90 | :where 91 | [(fulltext $ :person/name ?q) [[?e ?name]]]] 92 | (-> system :db :conn db) 93 | "+Bacon +Kevin") 94 | 95 | ;; number of movies, total 96 | (time (q '[:find (count ?e) :where [?e :movie/title]] 97 | (-> system :db :conn db))) 98 | 99 | ;; number of movies with actors 100 | (time 101 | (let [d (-> system :db :conn db)] 102 | (q '[:find (count ?e) 103 | :where 104 | [?e :movie/title] 105 | [_ :actor/movies ?e]] 106 | d))) 107 | 108 | ;; number of movies with no actors 109 | (time 110 | (let [d (-> system :db :conn db) 111 | movies (q '[:find ?e :where [?e :movie/title]] d)] 112 | (->> (map (fn [[id]] (d/entity d id)) movies) 113 | (remove (fn [e] (:actor/_movies e))) 114 | count))) 115 | 116 | ;; retract video games 117 | (let [d (-> system :db :conn db)] 118 | (->> (q '[:find ?e ?name 119 | :where 120 | [?e :movie/title ?name]] d) 121 | (filter (fn [[e n]] (not= -1 (.indexOf n "(VG)")))) 122 | (mapv (fn [[e _]] [:db.fn/retractEntity e])) 123 | (d/transact (-> system :db :conn)) 124 | (deref) 125 | )) 126 | 127 | ;; zipper 128 | (let [d (-> system :db :conn db) 129 | a (d/entid d [:person/name "Barth, Clayton"]) 130 | actor-name (partial eid->actor-name d) 131 | kevin (d/entid d [:person/name "Bacon, Kevin (I)"]) 132 | tree (zipper d a)] 133 | (time (some (fn [n] (when (= kevin n) n)) tree))) 134 | 135 | 136 | ;; bi-directional bfs 137 | (def from-bfs 138 | (let [d (-> system :db :conn db) 139 | clay (d/entid d [:person/name "Barth, Clayton"]) 140 | kevin (d/entid d [:person/name "Bacon, Kevin (I)"]) 141 | neighbor-fn (partial neighbors d)] 142 | (time (bidirectional-bfs clay kevin neighbor-fn))) 143 | ) 144 | 145 | ;; queue-based search 146 | (let [d (-> system :db :conn db) 147 | clay (d/entid d [:person/name "Barth, Clayton"]) 148 | kevin (d/entid d [:person/name "Bacon, Kevin (I)"]) 149 | neighbor-fn (partial neighbors d) 150 | actor-name (partial actor-or-movie-name d)] 151 | (time (map actor-name (bfs clay kevin neighbor-fn)))) 152 | 153 | ;; history-graham? 154 | (time 155 | (let [d (-> system :db :conn db) 156 | d (d/filter d (without-documentaries d)) 157 | kevin (d/entid d [:person/name "Bacon, Kevin (I)"]) 158 | neighbor-fn (partial neighbors d)] 159 | (->> (degrees-of-separation kevin neighbor-fn :up-to 15) 160 | (reduce-kv (fn [m k v] 161 | (if (even? k) 162 | (assoc m (/ k 2) (count v)) 163 | m)) {}))) 164 | ) 165 | 166 | ;; query engine search (3 degrees) 167 | (let [d (-> system :db :conn db) 168 | clay (actor-name->eid d "Barth, Clayton") 169 | kevin (actor-name->eid d "Bacon, Kevin (I)") 170 | actor-name (partial eid->actor-name d)] 171 | (time (q '[:find ?actor ?m1 ?target 172 | :in $ % ?actor ?target 173 | :where (acted-with ?actor ?m1 _) 174 | (acted-with ?m1 ?target _)] 175 | d acted-with-rules clay kevin))) 176 | 177 | ;; query engine search (4 degrees) 178 | (let [d (-> system :db :conn db) 179 | clay (actor-name->eid d "Barth, Clayton") 180 | kevin (actor-name->eid d "Bacon, Kevin (I)") 181 | actor-name (partial eid->actor-name d)] 182 | (time (map (partial map actor-name) 183 | (q '[:find ?actor ?m1 ?m2 ?target 184 | :in $ % ?actor ?target 185 | :where (acted-with ?actor ?m1 _) 186 | (acted-with ?m1 ?m2 _) 187 | (acted-with ?m2 ?target _)] 188 | d acted-with-rules clay kevin)))) 189 | 190 | ;; using path from rule 191 | (let [d (-> system :db :conn db) 192 | clay (d/entid d [:person/name "Barth, Clayton"]) 193 | kevin (d/entid d [:person/name "Bacon, Kevin (I)"]) 194 | ename (partial actor-or-movie-name d)] 195 | (time (map first 196 | (q '[:find ?path 197 | :in $ % ?actor ?target 198 | :where 199 | (acted-with-3 ?actor ?target ?path)] 200 | d acted-with-rules clay kevin)))) 201 | 202 | ;; export movies and actors 203 | (let [d (-> system :db :conn db) 204 | reducer (fn [map [k v]] (assoc map k (conj (get map k []) v))) 205 | actor-map (->> (kevin.expunge/actor-names "data/movies-small.list.gz" d) 206 | (reduce reducer {}))] 207 | (spit "resources/sample.edn" (with-out-str (pr actor-map)))) 208 | 209 | 210 | ;; quick n dirty profiling 211 | (let [d (-> system :db :conn db) 212 | clay (d/entid d [:person/name "Barth, Clayton"]) 213 | kevin (d/entid d [:person/name "Bacon, Kevin (I)"]) 214 | neighbor-fn (partial neighbors d)] 215 | (time (dotimes [_ 50] 216 | (bidirectional-bfs clay kevin neighbor-fn)))) 217 | 218 | 219 | ;; Filter out documentaries 220 | (let [d (-> system :db :conn db) 221 | fd (d/filter d (without-documentaries d))] 222 | (q '[:find ?m ?t ?g 223 | :in $ ?t 224 | :where [?m :movie/title ?t] 225 | [?m :movie/genre ?g]] d "Going to Pieces: The Rise and Fall of the Slasher Film (2006)")) 226 | 227 | ;; Genres and counts 228 | 229 | (def genre-counts (let [d (-> system :db :conn db)] 230 | (q '[:find ?g (count ?e) 231 | :in $ 232 | :where 233 | [?e :movie/genre ?gid] 234 | [?gid :db/ident ?g]] d))) 235 | 236 | (pprint (map (fn [[g c]] [(name g) c]) (sort-by peek genre-counts))) 237 | 238 | (d/touch (d/entity (-> system :db :conn db) 17592191235171)) 239 | 240 | (deref (d/transact (-> system :db :conn) [[:db/add 17592191235171 :movie/genre 0]])) 241 | 242 | (d/q '[:find ?e ?a ?v 243 | :in ?log ?t1 ?t2 244 | :where [(tx-ids ?log ?t1 ?t2) [?tx ...]] 245 | [(tx-data ?log ?tx) [[?e ?a ?v]]]] 246 | (d/log (-> system :db :conn)) #inst "2014-11-11" #inst "2014-11-12") 247 | 248 | ) 249 | -------------------------------------------------------------------------------- /doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to kevin 2 | 3 | TODO: write [great documentation](http://jacobian.org/writing/great-documentation/what-to-write/) 4 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject kevin "0.1.2" 2 | :description "It's like Kevin Bacon is right here!" 3 | :url "http://imdb.com" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :dependencies [[org.clojure/clojure "1.7.0"] 7 | [org.clojure/tools.nrepl "0.2.10"] 8 | [org.clojure/core.async "0.1.303.0-886421-alpha"] 9 | [commons-codec "1.10"] 10 | [hiccup "1.0.5"] 11 | [enlive "1.1.6"] 12 | [environ "1.0.0"] 13 | [lib-noir "0.9.9" :exclusions [compojure clout com.fasterxml.jackson.core/jackson-core ring org.clojure/tools.reader org.clojure/core.cache]] 14 | [ring-server "0.4.0"] 15 | [ring "1.4.0"] 16 | [clj-time "0.11.0"] 17 | [compojure "1.4.0"]] 18 | :plugins [[lein-ring "0.9.6" :exclusions [org.clojure/clojure]] 19 | [lein-beanstalk "0.2.7" :exclusions [commons-codec org.clojure/clojure]]] 20 | :ring {:handler kevin.system/handler 21 | :init kevin.system/init 22 | :destroy kevin.system/destroy } 23 | :profiles {:uberjar 24 | {:ring {:open-browser? false :stacktraces? false :auto-reload? false} 25 | :dependencies [[com.datomic/datomic-pro "0.9.5206" :exclusions [joda-time]]]} 26 | :dev {:source-paths ["dev" "src"] 27 | :dependencies [[com.datomic/datomic-free "0.9.5206" :exclusions [joda-time]] 28 | [org.clojure/tools.namespace "0.2.11"] 29 | [org.clojure/java.classpath "0.2.2"] 30 | [javax.servlet/servlet-api "2.5"] 31 | [ring-mock "0.1.5"]]}} 32 | :jvm-opts ["-Xmx4g" "-server"] 33 | ) 34 | -------------------------------------------------------------------------------- /resources/genres.edn: -------------------------------------------------------------------------------- 1 | [ 2 | {:db/id #db/id[:db.part/db] 3 | :db/ident :movie/genre 4 | :db/valueType :db.type/ref 5 | :db/cardinality :db.cardinality/many 6 | :db/doc "A movie's genres" 7 | :db.install/_attribute :db.part/db} 8 | 9 | {:db/id #db/id[:db.part/db] 10 | :db/ident :movie.genre/action} 11 | 12 | {:db/id #db/id[:db.part/db] 13 | :db/ident :movie.genre/adult} 14 | 15 | {:db/id #db/id[:db.part/db] 16 | :db/ident :movie.genre/adventure} 17 | 18 | {:db/id #db/id[:db.part/db] 19 | :db/ident :movie.genre/animation} 20 | 21 | {:db/id #db/id[:db.part/db] 22 | :db/ident :movie.genre/comedy} 23 | 24 | {:db/id #db/id[:db.part/db] 25 | :db/ident :movie.genre/crime} 26 | 27 | {:db/id #db/id[:db.part/db] 28 | :db/ident :movie.genre/documentary} 29 | 30 | {:db/id #db/id[:db.part/db] 31 | :db/ident :movie.genre/drama} 32 | 33 | {:db/id #db/id[:db.part/db] 34 | :db/ident :movie.genre/family} 35 | 36 | {:db/id #db/id[:db.part/db] 37 | :db/ident :movie.genre/fantasy} 38 | 39 | {:db/id #db/id[:db.part/db] 40 | :db/ident :movie.genre/film-noir} 41 | 42 | {:db/id #db/id[:db.part/db] 43 | :db/ident :movie.genre/horror} 44 | 45 | {:db/id #db/id[:db.part/db] 46 | :db/ident :movie.genre/musical} 47 | 48 | {:db/id #db/id[:db.part/db] 49 | :db/ident :movie.genre/mystery} 50 | 51 | {:db/id #db/id[:db.part/db] 52 | :db/ident :movie.genre/romance} 53 | 54 | {:db/id #db/id[:db.part/db] 55 | :db/ident :movie.genre/sci-fi} 56 | 57 | {:db/id #db/id[:db.part/db] 58 | :db/ident :movie.genre/short} 59 | 60 | {:db/id #db/id[:db.part/db] 61 | :db/ident :movie.genre/thriller} 62 | 63 | {:db/id #db/id[:db.part/db] 64 | :db/ident :movie.genre/war} 65 | 66 | {:db/id #db/id[:db.part/db] 67 | :db/ident :movie.genre/western} 68 | ] 69 | -------------------------------------------------------------------------------- /resources/public/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jgdavey/kevin/b4263c932b733c4e0b7729573cdc4550e69ddfa2/resources/public/.gitkeep -------------------------------------------------------------------------------- /resources/schema.edn: -------------------------------------------------------------------------------- 1 | [ 2 | ;; movies 3 | {:db/id #db/id[:db.part/db] 4 | :db/ident :movie/title 5 | :db/valueType :db.type/string 6 | :db/cardinality :db.cardinality/one 7 | :db/fulltext true 8 | :db/unique :db.unique/identity 9 | :db/doc "A movie's title (upsertable)" 10 | :db.install/_attribute :db.part/db} 11 | 12 | {:db/id #db/id[:db.part/db] 13 | :db/ident :movie/year 14 | :db/valueType :db.type/long 15 | :db/cardinality :db.cardinality/one 16 | :db/doc "A movie's release year" 17 | :db.install/_attribute :db.part/db} 18 | 19 | ;; actors 20 | {:db/id #db/id[:db.part/db] 21 | :db/ident :person/name 22 | :db/valueType :db.type/string 23 | :db/cardinality :db.cardinality/one 24 | :db/fulltext true 25 | :db/unique :db.unique/identity 26 | :db/doc "An person's name (upsertable)" 27 | :db.install/_attribute :db.part/db} 28 | 29 | {:db/id #db/id[:db.part/db] 30 | :db/ident :actor/movies 31 | :db/valueType :db.type/ref 32 | :db/cardinality :db.cardinality/many 33 | :db/doc "An actor's ref to a movie" 34 | :db.install/_attribute :db.part/db} 35 | ] 36 | -------------------------------------------------------------------------------- /resources/templates/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jgdavey/kevin/b4263c932b733c4e0b7729573cdc4550e69ddfa2/resources/templates/.gitkeep -------------------------------------------------------------------------------- /src/kevin/core.clj: -------------------------------------------------------------------------------- 1 | (ns kevin.core 2 | (:require [datomic.api :as d :refer [q db]] 3 | [clojure.string :refer [split join] :as str] 4 | [clojure.zip :as zip] 5 | [kevin.util :refer :all] 6 | [kevin.search :refer [bidirectional-bfs]]) 7 | (:import datomic.Datom)) 8 | 9 | (defprotocol Eid 10 | (e [_])) 11 | 12 | (extend-protocol Eid 13 | java.lang.Long 14 | (e [i] i) 15 | 16 | datomic.Entity 17 | (e [ent] (:db/id ent))) 18 | 19 | (def acted-with-rules 20 | '[[(acted-with ?e1 ?e2 ?path) 21 | [?e1 :actor/movies ?m] 22 | [?e2 :actor/movies ?m] 23 | [(!= ?e1 ?e2)] 24 | [(vector ?e1 ?m ?e2) ?path]] 25 | [(acted-with-1 ?e1 ?e2 ?path) 26 | (acted-with ?e1 ?e2 ?path)] 27 | [(acted-with-2 ?e1 ?e2 ?path) 28 | (acted-with ?e1 ?x ?pp) 29 | (acted-with ?x ?e2 ?p2) 30 | [(butlast ?pp) ?p1] 31 | [(concat ?p1 ?p2) ?path]] 32 | [(acted-with-3 ?e1 ?e2 ?path) 33 | (acted-with-2 ?e1 ?x ?pp) 34 | (acted-with ?x ?e2 ?p2) 35 | [(butlast ?pp) ?p1] 36 | [(concat ?p1 ?p2) ?path]] 37 | [(acted-with-4 ?e1 ?e2 ?path) 38 | (acted-with-3 ?e1 ?x ?pp) 39 | (acted-with ?x ?e2 ?p2) 40 | [(butlast ?pp) ?p1] 41 | [(concat ?p1 ?p2) ?path]]]) 42 | 43 | (defn actor-or-movie-name [db eid] 44 | (let [ent (d/entity db (e eid))] 45 | (or (:movie/title ent) (:person/name ent)))) 46 | 47 | (defn referring-to 48 | "Find all entities referring to an eid as a certain attribute." 49 | [db eid] 50 | (->> (d/datoms db :vaet (e eid)) 51 | (map :e))) 52 | 53 | (defn eids-with-attr-val 54 | "Return eids with a given attribute and value." 55 | [db attr val] 56 | (->> (d/datoms db :avet attr val) 57 | (map :e))) 58 | 59 | (defn eid->actor-name 60 | "db is database value 61 | name is the actor's name" 62 | [db eid] 63 | (-> (d/entity db (e eid)) 64 | :person/name)) 65 | 66 | (defn actor-search 67 | "Returns set with exact match, if found. Otherwise query will 68 | be formatted with format-query passed as-is to Lucene" 69 | [db query] 70 | (if (str/blank? query) 71 | #{} 72 | (if-let [eid (d/entid db [:person/name query])] 73 | [{:name query :actor-id eid}] 74 | (mapv #(zipmap [:actor-id :name] %) 75 | (q '[:find ?e ?name 76 | :in $ ?search 77 | :where [(fulltext $ :person/name ?search) [[?e ?name]]]] 78 | db (format-query query)))))) 79 | 80 | (defn movie-actors 81 | "Given a datomic database value and a movie id, 82 | returns ids for actors in that movie." 83 | [db eid] 84 | (map :e (d/datoms db :vaet eid :actor/movies))) 85 | 86 | (defn actor-movies 87 | "Given a datomic database value and an actor id, 88 | returns ids for movies that actor was in." 89 | [db eid] 90 | (map :v (d/datoms db :eavt eid :actor/movies))) 91 | 92 | (defn immediate-connections 93 | "d is database value 94 | eid is actor's entity id" 95 | [db eid] 96 | (->> (actor-movies db eid) 97 | (mapcat (partial referring-to db)))) 98 | 99 | (defn neighbors 100 | "db is database value 101 | eid is an actor or movie eid" 102 | [db eid] 103 | (or (seq (actor-movies db (e eid))) 104 | (seq (movie-actors db (e eid))))) 105 | 106 | (defn zipper 107 | "db is database value 108 | eid is actor's entity id" 109 | [db eid] 110 | (let [children (partial immediate-connections db) 111 | branch? (comp seq children) 112 | make-node (fn [_ c] c)] 113 | (zip/zipper branch? children make-node eid))) 114 | 115 | (defn search [db start end] 116 | (let [s (partial actor-search db) 117 | starts (s start) 118 | ends (s end)] 119 | (for [p1 starts, p2 ends] 120 | [p1 p2]))) 121 | 122 | (defn path-at-depth [db source target depth] 123 | (let [rule (symbol (str "acted-with-" depth))] 124 | (q (concat '[:find ?path 125 | :in $ % ?actor ?target 126 | :where] 127 | [(list rule '?actor '?target '?path)]) 128 | db acted-with-rules source target))) 129 | 130 | (defn ascending-years? [annotated-node] 131 | (if-let [years (->> annotated-node 132 | (map :year) 133 | (filter identity) 134 | seq)] 135 | (apply <= years) 136 | true)) 137 | 138 | (defn is-documentary? [entity] 139 | (let [genres (:movie/genre entity)] 140 | (and genres (contains? genres :movie.genre/documentary)))) 141 | 142 | (defn without-documentaries 143 | "Returns a function suitable for use with datomic.api/filter" 144 | [db] 145 | (let [movies-attr (d/entid db :actor/movies) 146 | has-documentaries? (fn [db ^Datom datom] 147 | (and (= movies-attr (.a datom)) 148 | (is-documentary? (d/entity db (.v datom)))))] 149 | (fn [db ^Datom datom] 150 | (not (or (has-documentaries? db datom) 151 | (is-documentary? (d/entity db (.e datom)))))))) 152 | 153 | (defn find-id-paths [db source target] 154 | (let [filt (without-documentaries db) 155 | fdb (d/filter db filt)] 156 | (bidirectional-bfs source target (partial neighbors fdb)))) 157 | 158 | (defn find-annotated-paths 159 | [db source target] 160 | (let [ename (partial actor-or-movie-name db) 161 | annotate-node (fn [node] 162 | (let [ent (d/entity db node)] 163 | {:type (if (:person/name ent) "actor" "movie") 164 | :year (:movie/year ent) 165 | :name (ename ent) 166 | :entity ent}))] 167 | (->> (find-id-paths db source target) 168 | (map (partial mapv annotate-node))))) 169 | 170 | (defn annotate-search [db search hard-mode] 171 | (let [[result1 result2] search 172 | paths (find-annotated-paths db (:actor-id result1) (:actor-id result2)) 173 | paths (if hard-mode 174 | (filter ascending-years? paths) 175 | paths) 176 | total (count paths) 177 | bacon-number (int (/ (-> paths first count) 2))] 178 | {:total total 179 | :paths paths 180 | :start (:name result1) 181 | :end (:name result2) 182 | :bacon-number bacon-number 183 | :hard-mode? hard-mode})) 184 | -------------------------------------------------------------------------------- /src/kevin/expunge.clj: -------------------------------------------------------------------------------- 1 | (ns kevin.expunge 2 | (:require [datomic.api :as d :refer [q db]] 3 | [clojure.java.io :as io] 4 | [kevin.loader :refer [ensure-transformed-movies]])) 5 | 6 | (defn movie-titles 7 | "This is documentation" 8 | ([path] 9 | (let [cleaned-path (clojure.string/replace path ".gz" "") 10 | cleaned-path (clojure.string/replace cleaned-path ".list" ".transformed")] 11 | (ensure-transformed-movies path cleaned-path) 12 | (doall (line-seq (io/reader cleaned-path)))))) 13 | 14 | (defn actor-names [path db] 15 | (let [titles (movie-titles path)] 16 | (q '[:find ?name ?title 17 | :in $ [?title ...] 18 | :where [?m :movie/title ?title] 19 | [?a :actor/movies ?m] 20 | [?a :person/name ?name]] 21 | db titles))) 22 | -------------------------------------------------------------------------------- /src/kevin/handler.clj: -------------------------------------------------------------------------------- 1 | (ns kevin.handler 2 | (:require [compojure.core :refer [defroutes routes]] 3 | [compojure.handler :as handler] 4 | [compojure.route :as route] 5 | [kevin.routes.home :refer [home-routes]])) 6 | 7 | (defroutes app-routes 8 | (route/resources "/") 9 | (route/not-found "Not Found")) 10 | 11 | (defn app [context] 12 | (-> (home-routes context) 13 | (routes app-routes) 14 | (handler/site))) 15 | 16 | -------------------------------------------------------------------------------- /src/kevin/loader.clj: -------------------------------------------------------------------------------- 1 | (ns kevin.loader 2 | "To use, download the movies, actors and actresses lists from a mirror on 3 | http://www.imdb.com/interfaces, and copy them (still zipped) to the data 4 | folder. You can then run `lein run -m kevin.loader`" 5 | (:gen-class) 6 | (:require [clojure.java.io :as io] 7 | [clojure.core.async :as async :refer [chan go >! > (q '[:find ?t 58 | :where [?e :movie/title ?t]] 59 | (db conn)) 60 | (map first) 61 | (map add-year) 62 | (filter identity))] 63 | (doseq [batch (partition-all *batch-size* tx-data)] 64 | (print ".") 65 | (flush) 66 | @(d/transact conn batch)) 67 | :ok)) 68 | 69 | (defn movie-line? [^String line] 70 | (and 71 | (not (empty? line)) 72 | (not (.startsWith line char-quote)) ; Not a TV series 73 | (= -1 (.indexOf line "{{SUSPENDED}}")) ; Not bad data 74 | (= -1 (.indexOf line "(VG)")) ; Not a videogame 75 | (= -1 (.indexOf line "V)")))) ; Not TV movie or straight to video 76 | 77 | (defn role-line? [^String line] 78 | (and 79 | (movie-line? line) 80 | (not= -1 (.indexOf line ")")))) 81 | 82 | (defn legit-role? [^String line] 83 | (and 84 | (= -1 (.indexOf line "(archive footage)")) 85 | (= -1 (.indexOf line "(unconfirmed)")) 86 | (= -1 (.indexOf line "(archival")))) 87 | 88 | (defn movie-tx [^String title] 89 | (let [tx {:db/id (d/tempid :db.part/user) 90 | :movie/title title}] 91 | (if-let [year (extract-year title)] 92 | (assoc tx :movie/year year) 93 | tx))) 94 | 95 | (defn actor-movie-tx [actor-id title] 96 | {:movie/title title 97 | :db/id (d/tempid :db.part/user) 98 | :actor/_movies actor-id}) 99 | 100 | (defn actor-tx [tuples] 101 | (let [actor-id (d/tempid :db.part/user)] 102 | (concat [{:db/id actor-id :person/name (ffirst tuples)}] 103 | (map (fn [[_ movie]] 104 | (actor-movie-tx actor-id movie)) tuples)))) 105 | 106 | (defn retract-roles [d {:keys [actor movies]}] 107 | (->> (q '[:find ?actor ?movie 108 | :in $ ?name [?title ...] 109 | :where 110 | [?actor :person/name ?name] 111 | [?actor :actor/movies ?movie] 112 | [?movie :movie/title ?title]] 113 | d actor movies) 114 | (map (fn [[actor movie]] 115 | [:db/retract actor :actor/movies movie])))) 116 | 117 | (defn parse-genre [^String line] 118 | (map #(.trim ^String %) (clojure.string/split line #"\t+"))) 119 | 120 | (defn genre-tx [line] 121 | (let [[title genre] (parse-genre line)] 122 | (when-let [g (genres genre)] 123 | {:db/id (d/tempid :db.part/user) 124 | :movie/title title 125 | :movie/genre g}))) 126 | 127 | (defn extract-role [^String role-line] 128 | (let [paren (. role-line (indexOf ")"))] 129 | (.. role-line (substring 0 (inc paren)) trim))) 130 | 131 | (defn extract-potential-roles [[actor-line & role-lines]] 132 | (let [[actor title & rest] (clojure.string/split actor-line #"\t+")] 133 | {:actor actor 134 | :movies (map #(.trim ^String %) (conj role-lines title))})) 135 | 136 | (defn parse-actor [lines] 137 | (let [{actor :actor potential-roles :movies} (extract-potential-roles lines) 138 | roles (map extract-role (filter #(and (role-line? %) (legit-role? %)) potential-roles)) 139 | movies (filter identity roles)] 140 | (when (and (seq movies) actor) 141 | { :actor actor :movies movies }))) 142 | 143 | (defmacro ensure-transformed-file 144 | "in and out are bound for you" 145 | [[file outfile] & body] 146 | `(when-not (.exists (io/as-file ~outfile)) 147 | (with-open [~'in (io/reader 148 | (java.util.zip.GZIPInputStream. (io/input-stream ~file)) 149 | :encoding "ISO-8859-1") 150 | ~'out (io/writer ~outfile)] 151 | ~@body))) 152 | 153 | (defn ensure-transformed-movies [file outfile] 154 | (ensure-transformed-file [file outfile] 155 | (loop [[line & lines] (drop-while #(not= % "MOVIES LIST") (line-seq in))] 156 | (when line 157 | (when (movie-line? line) 158 | (when-let [title (movie-title line)] 159 | (doto out 160 | (.write title) 161 | (.newLine)))) 162 | (recur lines))))) 163 | 164 | (defn ensure-transformed-actors [file outfile & {:keys [start-at]}] 165 | (ensure-transformed-file [file outfile] 166 | (loop [lines (drop 3 (drop-while #(not= % start-at) (line-seq in)))] 167 | (let [[actor-lines lines] (split-with (complement empty?) (rest lines))] 168 | (when (seq actor-lines) 169 | (when-let [actor-data (try (parse-actor actor-lines) (catch Throwable t nil))] 170 | (let [{:keys [actor movies]} actor-data] 171 | (doseq [movie movies] 172 | (doto out 173 | (.write actor) 174 | (.write char-tab) 175 | (.write movie) 176 | (.newLine))))) 177 | (recur lines)))))) 178 | 179 | (defn ensure-transformed-genres [file outfile] 180 | (ensure-transformed-file [file outfile] 181 | (loop [[line & lines] (drop 3 (drop-while #(not= % "8: THE GENRES LIST") (line-seq in)))] 182 | (when line 183 | (when (movie-line? line) 184 | (let [[title genre] (parse-genre line)] 185 | (doto out 186 | (.write title) 187 | (.write char-tab) 188 | (.write genre) 189 | (.newLine)))) 190 | (recur lines))))) 191 | 192 | (defn batch 193 | "Returns a channel that batches entries from in" 194 | [in timeout-ms] 195 | (let [inner (chan 1) 196 | splitter? (partial identical? ::split) 197 | proc (go (loop [t (async/timeout timeout-ms)] 198 | (let [[v c] (async/alts! [t in])] 199 | (condp identical? c 200 | t (do (>! inner ::split) 201 | (recur (async/timeout timeout-ms))) 202 | in (if (nil? v) 203 | (close! inner) 204 | (do (>! inner v) 205 | (recur t))))))) 206 | out (->> (async/partition-by splitter? inner) 207 | (async/remove< (comp splitter? first)))] 208 | out)) 209 | 210 | (defn transact-all 211 | "Returns a chan" 212 | [tx-chan transact n] 213 | (let [procs (map (fn [_] (go (loop [] 214 | (when-let [batch (! tx-data-chan (movie-tx line)) 233 | (recur)) 234 | (close! tx-data-chan))))) 235 | (with-open [file (io/reader "data/movies.transformed")] 236 | (doseq [line (line-seq file)] 237 | (async/>!! work-chan line))) 238 | (close! work-chan) 239 | (async/! tx-chan (actor-tx lines)) 255 | (recur)) 256 | (close! tx-chan))))) 257 | (with-open [file (io/reader file)] 258 | (doseq [line (line-seq file)] 259 | (async/>!! line-chan (clojure.string/split line #"\t+")))) 260 | (close! line-chan) 261 | (async/! tx-data-chan tx)) 280 | (recur)) 281 | (close! tx-data-chan))))) 282 | (with-open [file (io/reader "data/genres.transformed")] 283 | (doseq [line (line-seq file)] 284 | (async/>!! work-chan line))) 285 | (close! work-chan) 286 | (async/ context :db :conn d/db) 21 | search (s/search db person1 person2) 22 | hard-mode? (boolean (seq hard-mode))] 23 | (if (= 1 (count search)) 24 | (cache! (cache-key search hard-mode?) 25 | (views/results-page (s/annotate-search db (first search) hard-mode?))) 26 | (views/disambiguate search params)))) 27 | 28 | (defn home-routes [context] 29 | (routes 30 | (HEAD "/" [] "") ;; heartbeat response 31 | (GET "/" [] (home)) 32 | (GET "/search" {params :params} (search context params)))) 33 | -------------------------------------------------------------------------------- /src/kevin/search.clj: -------------------------------------------------------------------------------- 1 | (ns kevin.search) 2 | 3 | (defn bfs 4 | "bread-first search, one-directional" 5 | [start end neighbor-fn] 6 | (let [queue (conj clojure.lang.PersistentQueue/EMPTY [start]) 7 | visited #{start} 8 | end-neighbors (neighbor-fn end) 9 | found? (fn [n] (some #{n} end-neighbors))] 10 | (loop [q queue 11 | v visited 12 | i 0] 13 | (when (seq q) 14 | (let [path (peek q) 15 | node (last path)] 16 | (if (found? node) 17 | (do 18 | (println "Finished in " i " iterations") 19 | (conj path end)) 20 | (let [neighbors (remove v (neighbor-fn node)) 21 | paths (map (partial conj path) neighbors)] 22 | (recur (into (pop q) paths) (into v neighbors) (inc i))))))))) 23 | 24 | (defn paths 25 | "Returns a lazy seq of all non-looping path vectors starting with 26 | []" 27 | [nodes-fn path] 28 | (let [this-node (peek path)] 29 | (->> (nodes-fn this-node) 30 | (filter #(not-any? (fn [edge] (= edge [this-node %])) 31 | (partition 2 1 path))) 32 | (mapcat #(paths nodes-fn (conj path %))) 33 | (cons path)))) 34 | 35 | (defn degrees-of-separation 36 | [start neighbor-fn & {:keys [up-to] :or {up-to 13}}] 37 | (loop [q #{start} 38 | distances {0 q} 39 | i 1] 40 | (if (< i up-to) ; Has anyone really been far even? 41 | (let [visited (vals distances) 42 | next-q (set (flatten (for [node q 43 | neighbor (neighbor-fn node) 44 | :when (not-any? #(contains? % neighbor) visited)] 45 | neighbor)))] 46 | (recur next-q (assoc distances i next-q) (inc i))) 47 | distances))) 48 | 49 | (defn trace-paths [m start] 50 | (remove #(m (peek %)) (paths m [start]))) 51 | 52 | (defn- find-paths [from-map to-map matches] 53 | (for [n matches 54 | from (map reverse (trace-paths from-map n)) 55 | to (map rest (trace-paths to-map n))] 56 | (vec (concat from to)))) 57 | 58 | (defn- neighbor-pairs [neighbors q coll] 59 | (for [node q 60 | nbr (neighbors node) 61 | :when (not (contains? coll nbr))] 62 | [nbr node])) 63 | 64 | (defn bidirectional-bfs [start end neighbors] 65 | (let [find-pairs (partial neighbor-pairs neighbors) 66 | overlaps (fn [coll q] (seq (filter #(contains? coll %) q))) 67 | map-set-pairs (fn [map pairs] 68 | (persistent! (reduce (fn [map [key val]] 69 | (assoc! map key (conj (get map key #{}) val))) 70 | (transient map) pairs)))] 71 | (loop [preds {start nil} ; map of outgoing nodes to where they came from 72 | succs {end nil} ; map of incoming nodes to where they came from 73 | q1 (list start) ; queue of outgoing things to check 74 | q2 (list end) ; queue of incoming things to check 75 | iter 1] 76 | (when (and (seq q1) (seq q2) (< iter 13)) ; 6 "hops" or fewer 77 | (if (<= (count q1) (count q2)) 78 | (let [pairs (find-pairs q1 preds) 79 | preds (map-set-pairs preds pairs) 80 | q1 (map first pairs)] 81 | (if-let [all (overlaps succs q1)] 82 | (find-paths preds succs (set all)) 83 | (recur preds succs q1 q2 (inc iter)))) 84 | (let [pairs (find-pairs q2 succs) 85 | succs (map-set-pairs succs pairs) 86 | q2 (map first pairs)] 87 | (if-let [all (overlaps preds q2)] 88 | (find-paths preds succs (set all)) 89 | (recur preds succs q1 q2 (inc iter))))))))) 90 | -------------------------------------------------------------------------------- /src/kevin/system.clj: -------------------------------------------------------------------------------- 1 | (ns kevin.system 2 | (:require [datomic.api :as d] 3 | [kevin.handler :as handler] 4 | [clojure.tools.nrepl.server :as repl] 5 | [environ.core :refer [env]] 6 | [ring.server.standalone :refer (serve)])) 7 | 8 | (defn- ensure-schema [conn] 9 | (or (-> conn d/db (d/entid :person/name)) 10 | @(d/transact conn (read-string (slurp "resources/schema.edn")))) 11 | (or (-> conn d/db (d/entid :movie/genre)) 12 | @(d/transact conn (read-string (slurp "resources/genres.edn"))))) 13 | 14 | (defn- ensure-db [db-uri] 15 | (let [newdb? (d/create-database db-uri) 16 | conn (d/connect db-uri)] 17 | (ensure-schema conn) 18 | conn)) 19 | 20 | (defn start-db [system] 21 | (let [db (:db system) 22 | conn (ensure-db (:uri db))] 23 | (assoc-in system [:db :conn] conn))) 24 | 25 | (defn- stop-db [system] 26 | (when-let [conn (:conn (:db system))] 27 | (d/release conn)) 28 | (assoc-in system [:db :conn] nil)) 29 | 30 | (defn- start-repl [system] 31 | (let [repl-server (repl/start-server :port (get-in system [:repl :port]))] 32 | (assoc-in system [:repl :server] repl-server))) 33 | 34 | (defn- stop-repl [system] 35 | (when-let [repl-server (get-in system [:repl :server])] 36 | (repl/stop-server repl-server)) 37 | (assoc-in system [:repl :server] nil)) 38 | 39 | (defn- setup-handler [system] 40 | (let [web-opts (:web system) 41 | handler (handler/app system)] 42 | (assoc-in system [:web :handler] handler))) 43 | 44 | (defn- teardown-handler [system] 45 | (assoc-in system [:web :handler] nil)) 46 | 47 | (defn system 48 | "Returns a new instance of the whole application." 49 | [] 50 | {:db {:uri (env :datomic-db-url)} 51 | :web {:open-browser? false} 52 | :repl {:port 7888}}) 53 | 54 | (defn start 55 | "Performs side effects to initialize the system, acquire resources, 56 | and start it running. Returns an updated instance of the system." 57 | [system] 58 | (-> system 59 | start-db 60 | start-repl 61 | setup-handler)) 62 | 63 | (defn stop 64 | "Performs side effects to shut down the system and release its 65 | resources. Returns an updated instance of the system." 66 | [system] 67 | (when system 68 | (-> system 69 | teardown-handler 70 | stop-repl 71 | stop-db))) 72 | 73 | ;; external ring handlers 74 | (defonce sys nil) 75 | (defonce handler nil) 76 | 77 | (defn destroy [] 78 | (let [s (stop sys)] 79 | (alter-var-root #'sys nil) 80 | (alter-var-root #'handler nil))) 81 | 82 | (defn init [] 83 | (let [s (start (system))] 84 | (alter-var-root #'sys (constantly s)) 85 | (alter-var-root #'handler (constantly (get-in s [:web :handler]))) 86 | s)) 87 | -------------------------------------------------------------------------------- /src/kevin/util.clj: -------------------------------------------------------------------------------- 1 | (ns kevin.util 2 | (:require [clojure.string :refer [split join] :as str]) 3 | (:import [com.datomic.lucene.queryParser QueryParser])) 4 | 5 | (defn- tokenize-query [q] 6 | (let [escaped (QueryParser/escape q)] 7 | (if (= q escaped) 8 | (str "+" escaped "*") 9 | (str "+" escaped)))) 10 | 11 | (defn format-query 12 | "Makes each word of query required, front-stemmed. 13 | Escapes all special characters. 14 | 15 | (format-query \"Foo bar\") 16 | ;=> \"+Foo* +bar*\" 17 | 18 | This maps to Lucene's QueryParser.parse 19 | See http://lucene.apache.org/core/3_6_1/api/core/org/apache/lucene/queryParser/QueryParser.html" 20 | [query] 21 | (->> (split query #",?\s+") 22 | (remove str/blank?) 23 | (map tokenize-query) 24 | (join " "))) 25 | 26 | (defn format-name [name] 27 | (str/replace-first name #"^([^,]+), (.+?)( \([IVX]+\))?$" "$2 $1$3")) 28 | -------------------------------------------------------------------------------- /src/kevin/views.clj: -------------------------------------------------------------------------------- 1 | (ns kevin.views 2 | (:require [hiccup.util :refer [url]] 3 | [clojure.string :as str] 4 | [kevin.util :refer :all] 5 | [net.cgrand.enlive-html :as html 6 | :refer [defsnippet deftemplate do-> content clone-for 7 | substitute set-attr nth-of-type first-of-type 8 | last-of-type]])) 9 | 10 | (defn- form-input [nth] 11 | [[:fieldset (nth-of-type 1)] :dl [:dd (nth-of-type nth)] :input]) 12 | 13 | (defn emit [form] 14 | (if (some map? form) 15 | (html/emit* form) 16 | form)) 17 | 18 | (defn render [& args] 19 | (apply str (flatten (map emit args)))) 20 | 21 | (deftemplate main-template "templates/index.html" 22 | [& {:keys [body title]}] 23 | [:head :title] (content (str/join " - " 24 | (filter identity [title "Kevin Bacon"]))) 25 | [:#container :> #{:h1 :h2} :a] (set-attr :href "/") 26 | [:#container :ul] (substitute body)) 27 | 28 | (defsnippet form "templates/search_form.html" [:#container :form] 29 | [person1 person2 & args] 30 | [:form] (do-> 31 | (set-attr :action "search") 32 | (set-attr :method "GET")) 33 | (form-input 1) (do-> 34 | (set-attr :name "person1") 35 | (set-attr :value person1)) 36 | (form-input 2) (do-> 37 | (set-attr :name "person2") 38 | (set-attr :value person2))) 39 | 40 | (defsnippet possibility "templates/disambiguate.html" [:#disambiguate [:li (nth-of-type 1)]] 41 | [[person1 person2]] 42 | [:a] (set-attr :href (str (url "/search" {:person1 (:name person1) :person2 (:name person2)}))) 43 | [:a :> html/text-node] (html/wrap :rel) 44 | [:a :> [:rel html/first-child]] (content (format-name (:name person1))) 45 | [:a :> [:rel html/last-child]] (content (format-name (:name person2))) 46 | [:a :> :rel] html/unwrap) 47 | 48 | (defsnippet possibilities "templates/disambiguate.html" [:#disambiguate] 49 | [pairs] 50 | [:ul] (content (map possibility pairs))) 51 | 52 | (def ^:dynamic *result-sel* [:.result_list :> [:ul (nth-of-type 1)] :> [:li (nth-of-type 1)]]) 53 | 54 | (defn simple-escape [^String text] 55 | (.. text (replace " " "+") (replace "&" "%26"))) 56 | 57 | (defn- imdb-link [{:keys [name type]}] 58 | (str "http://imdb.com/find?exact=" 59 | (= type "movie") 60 | "&q=" 61 | (simple-escape name))) 62 | 63 | (defn- display-name [{:keys [name type]}] 64 | (if (= type "actor") 65 | (format-name name) 66 | name)) 67 | 68 | (defsnippet result-node "templates/results.html" (conj *result-sel* [:li (nth-of-type 1)]) 69 | [node] 70 | [:li] (set-attr :class (:type node)) 71 | [:li :a] (do-> 72 | (set-attr :href (imdb-link node)) 73 | (content (display-name node)))) 74 | 75 | (defsnippet result "templates/results.html" *result-sel* 76 | [path] 77 | [:ul] (content (map result-node path))) 78 | 79 | (defn degrees-description [bacon-number] 80 | (let [degrees (str bacon-number " degree" (when-not (= 1 bacon-number) "s"))] 81 | (str "(with " degrees " of separation)"))) 82 | 83 | (defsnippet results "templates/results.html" [:#results] 84 | [{:keys [paths total start end bacon-number]}] 85 | [:.result_list :> [:ul (html/but first-of-type)]] nil 86 | [:.result_list :> [:ul first-of-type]] (clone-for [path paths] 87 | (content (result path))) 88 | [:.result_list :> :h3] (content (str (count paths) " path" (when-not (= 1 (count paths)) "s"))) 89 | [:.result_list :> :h4] (content (degrees-description bacon-number)) 90 | [:.bacon_number :mark] (content (str bacon-number)) 91 | [:.bacon_number [:p first-of-type]] (content (format-name start)) 92 | [:.bacon_number [:p last-of-type]] (content (format-name end))) 93 | 94 | (defn disambiguate [pairs {:keys [hard-mode person1 person2]}] 95 | (main-template :title "Did you mean one of these?" 96 | :body [(if (seq pairs) 97 | (possibilities pairs) 98 | (html/html [:h2 "Oops. We couldn't find one of those actors"])) 99 | (html/html [:h2 "Try a new search"]) 100 | (form person1 person2 hard-mode)])) 101 | 102 | (defn results-page [{:keys [paths start end] :as res}] 103 | (main-template :body (if (seq paths) 104 | (results res) 105 | "Not linkable in 5 hops or fewer") 106 | :title (str "From " start " to " end))) 107 | -------------------------------------------------------------------------------- /test/kevin/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns kevin.core-test 2 | (:use [clojure.test] 3 | [kevin.core])) 4 | 5 | (deftest ascending-years 6 | (is (ascending-years? [{:year 2010} {:year 2011}])) 7 | (is (ascending-years? [{:year 2010} {:year nil} {:year 2011}])) 8 | (is (ascending-years? [{:year nil}])) 9 | (is (ascending-years? [{:year 2012} {:year 2012}])) 10 | (is (ascending-years? [{:year 2010} {:year 2011} {:year 2011}])) 11 | (is (not (ascending-years? [{:year 2012} {:year 2011}]))) 12 | (is (not (ascending-years? [{:year 2011} {:year 2011} {:year nil} {:year 2010}])))) 13 | -------------------------------------------------------------------------------- /test/kevin/loader_test.clj: -------------------------------------------------------------------------------- 1 | (ns kevin.loader-test 2 | (:require [clojure.test :refer :all] 3 | [kevin.loader :refer :all])) 4 | 5 | (deftest extract-years-test 6 | (is (= (extract-year "Knocked Up (2007)") 7 | 2007)) 8 | (is (= (extract-year "Die Hard 2 (1990)") 9 | 1990)) 10 | (is (= (extract-year "2001: A Space Odyssey (1968)") 11 | 1968)) 12 | (is (= (extract-year "Closer (2004/I)") 13 | 2004))) 14 | 15 | (deftest parsing 16 | (is (= ["Scanner Darkly, A (2006)" 17 | "Mad Dog and Glory (1993)"] 18 | (mapv movie-title 19 | (filter movie-line? ["2003 MTV Movie Awards (2003) (TV) 2003" 20 | "Comic Books & Superheroes (2001) (V) 2001" 21 | "Scanner Darkly, A (2006) 2006" 22 | "Mad Dog and Glory (1993) 1993"]))))) 23 | 24 | -------------------------------------------------------------------------------- /test/kevin/search_test.clj: -------------------------------------------------------------------------------- 1 | (ns kevin.search-test 2 | (:require [clojure.test :refer :all] 3 | [kevin.search :refer :all])) 4 | 5 | (deftest tracing-paths 6 | (testing "trace-paths" 7 | (is (= (trace-paths {:a nil} :a) 8 | [[:a]])) 9 | (is (= (trace-paths {:a #{:b} 10 | :b nil} :a) 11 | [[:a :b]])) 12 | (is (= (trace-paths {:a #{:b :c} 13 | :b nil 14 | :c #{:b}} :a) 15 | [[:a :c :b] [:a :b]])) 16 | (is (= (trace-paths {:a #{:b} 17 | :b #{:c :d} 18 | :c #{:d} 19 | :d nil} :a) 20 | [[:a :b :c :d] [:a :b :d]])) 21 | )) 22 | -------------------------------------------------------------------------------- /test/kevin/util_test.clj: -------------------------------------------------------------------------------- 1 | (ns kevin.util-test 2 | (:use [clojure.test] 3 | [kevin.util])) 4 | 5 | (deftest test-format-query 6 | (is (= "+J* +Digg*" (format-query "J Digg"))) 7 | (is (= "+J* +Digg* +\\(I\\)" (format-query "J Digg (I)")))) 8 | 9 | (deftest test-format-name 10 | (are [x y] (= (format-name x) y) 11 | "Wayne, John" "John Wayne" 12 | "Wayne, John (I)" "John Wayne (I)" 13 | "Shakira" "Shakira" 14 | "Watts, J (I) W" "J (I) W Watts" 15 | "Wayne John, Juan Wayne (XII)" "Juan Wayne Wayne John (XII)")) 16 | --------------------------------------------------------------------------------