├── .github └── FUNDING.yml ├── .gitignore ├── .replit ├── Makefile ├── README.md ├── anomaly_detection ├── .gitignore ├── LICENSE ├── README.md ├── data │ └── cleaned_wisconsin_cancer_data.csv ├── doc │ └── intro.md ├── project.clj ├── src-java │ └── main │ │ └── java │ │ └── com │ │ └── markwatson │ │ └── anomaly_detection │ │ └── AnomalyDetection.java ├── src │ └── anomaly_detection_clj │ │ └── core.clj └── test │ └── anomaly_detection_clj │ └── core_test.clj ├── brave_search ├── .gitignore ├── LICENSE ├── README.md ├── doc │ └── intro.md ├── project.clj ├── src │ └── brave_search │ │ └── core.clj └── test │ └── brave_search │ └── core_test.clj ├── datomic_local └── README.md ├── deeplearning_dl4j ├── .gitignore ├── LICENSE ├── README.md ├── data │ ├── cleaned_wisconsin_cancer_data.csv │ ├── testing.csv │ └── training.csv ├── project.clj └── src │ └── deeplearning_dl4j_clj │ └── wisconsin_data.clj ├── docs_qa ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── data │ ├── chemistry.txt │ ├── economics.txt │ ├── health.txt │ └── sports.txt ├── doc │ └── intro.md ├── project.clj ├── src │ └── docs_qa │ │ ├── core.clj │ │ └── vectordb.clj └── test │ └── docs_qa │ └── core_test.clj ├── gemini_api ├── .gitignore ├── LICENSE ├── README.md ├── deps.edn ├── doc │ └── intro.md ├── project.clj ├── src │ └── gemini_api │ │ └── core.clj └── test │ └── gemini_api │ └── core_test.clj ├── knowledge_graph_navigator ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── dbpedia_sample.nt ├── doc │ └── intro.md ├── entities_by_name.sparql ├── project.clj ├── relationships.sparql ├── src-java │ └── main │ │ ├── java │ │ └── com │ │ │ └── markwatson │ │ │ └── semanticweb │ │ │ ├── Cache.java │ │ │ ├── JenaApis.java │ │ │ └── QueryResult.java │ │ └── resources │ │ └── log4j.xml ├── src │ ├── knowledge_graph_navigator_clj │ │ ├── entities_by_name.clj │ │ ├── kgn.clj │ │ ├── relationships.clj │ │ ├── sparql.clj │ │ └── sparql_utils.clj │ └── semantic_web_jena_clj │ │ └── core.clj └── test │ └── knowledge_graph_navigator_clj │ └── kgn_test.clj ├── llm_bosquet ├── .gitignore ├── Makefile ├── README.md ├── config.edn ├── deps.edn ├── secrets.edn ├── src │ └── llm_bosquet │ │ └── core.clj └── test │ └── llm_bosquet │ └── core_test.clj ├── nlp_libpython ├── .gitignore ├── CHANGELOG.md ├── INSTALL_MLW.txt ├── LICENSE ├── QA.py ├── README.md ├── doc │ └── intro.md ├── get_entity_text.sparql ├── project.clj ├── src │ ├── knowledge_graph_navigator_clj │ │ ├── entity_text_by_uri.clj │ │ ├── sparql.clj │ │ └── sparql_utils.clj │ └── nlp_libpython_spacy │ │ └── core.clj └── test │ └── nlp_libpython_spacy │ └── core_test.clj ├── nlp_opennlp ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── doc │ └── intro.md ├── models │ ├── en-ner-location.bin │ ├── en-ner-organization.bin │ ├── en-ner-person.bin │ ├── en-newscat.bin │ ├── en-pos-maxent.bin │ ├── en-sent.bin │ └── en-token.bin ├── project.clj ├── src-java │ └── main │ │ ├── java │ │ └── com │ │ │ └── markwatson │ │ │ └── opennlp │ │ │ ├── NLP.java │ │ │ └── Pair.java │ │ └── resources │ │ └── log4j.xml ├── src │ └── opennlp_clj │ │ └── core.clj └── test │ └── opennlp_clj │ └── core_test.clj ├── old_code └── openai_api_old │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── doc │ └── intro.md │ ├── project.clj │ ├── src │ └── openai_api │ │ └── core.clj │ └── test │ └── openai_api │ └── core_test.clj ├── ollama ├── .gitignore ├── LICENSE ├── README.md ├── doc │ └── intro.md ├── project.clj ├── src │ └── ollama_api │ │ └── core.clj └── test │ └── ollama_api │ └── core_test.clj ├── openai_api ├── .gitignore ├── LICENSE ├── README.md ├── doc │ └── intro.md ├── project.clj ├── src │ └── openai_api │ │ └── core.clj └── test │ └── openai_api │ └── core_test.clj ├── replit.nix ├── semantic_web_jena ├── .gitignore ├── LICENSE ├── README.md ├── data │ ├── business.sql │ ├── foaf.nt │ ├── inferencing_1.txt │ ├── inferencing_2.txt │ ├── inferencing_3.txt │ ├── inferencing_4.txt │ ├── news.n3 │ ├── news.nt │ ├── rdfs_business.n3 │ ├── rdfs_business.nt │ ├── rdfs_sample_1.n3 │ ├── rdfs_sample_1.owl │ ├── rdfs_sample_2.n3 │ ├── rdfs_sample_2.owl │ ├── sample_news.n3 │ ├── sample_news.nt │ ├── sparql_ask_test.txt │ ├── sparql_combine_rdfs_test.txt │ ├── sparql_combine_test.txt │ ├── sparql_construct_test.txt │ ├── sparql_describe_test.txt │ └── sparql_select_test.txt ├── doc │ └── intro.md ├── project.clj ├── src-java │ └── main │ │ ├── java │ │ └── com │ │ │ └── markwatson │ │ │ └── semanticweb │ │ │ ├── Cache.java │ │ │ ├── JenaApis.java │ │ │ └── QueryResult.java │ │ └── resources │ │ └── log4j.xml ├── src │ └── semantic_web_jena_clj │ │ └── core.clj └── test │ └── semantic_web_jena_clj │ └── core_test.clj ├── simple_rdf_sparql ├── .gitignore ├── LICENSE ├── README.md ├── doc │ └── intro.md ├── project.clj ├── src │ └── simple_rdf_sparql │ │ └── core.clj └── test │ └── simple_rdf_sparql │ └── core_test.clj └── webscraping ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── project.clj ├── src └── webscraping │ └── core.clj └── test └── webscraping └── core_test.clj /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: mark-watson # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | markwatson: # Replace with a single Patreon username 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | *.iml 3 | .idea 4 | .lein-failures 5 | .clj-kondo 6 | .DS_Store 7 | *.lsp 8 | .lsp 9 | *.db 10 | .cpcache 11 | *.cpcache 12 | *.log 13 | .classpath 14 | .project 15 | .settings 16 | .calva 17 | .vscode 18 | perplexity 19 | #*# 20 | 21 | -------------------------------------------------------------------------------- /.replit: -------------------------------------------------------------------------------- 1 | run = "clojure -M main.clj" 2 | 3 | entrypoint = "main.clj" 4 | 5 | [env] 6 | CLJ_CONFIG = "/home/runner/.clojure" 7 | 8 | [languages.clojure] 9 | pattern = "**/*.clj" 10 | 11 | [languages.clojure.languageServer] 12 | start = ["clojure-lsp"] 13 | 14 | [nix] 15 | channel = "stable-22_05" 16 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | clean: 2 | rm -rf */target 3 | 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Code for my book "Practical Artificial Intelligence Programming With Clojure" 2 | 3 | You can read all of my books for free on my 4 | website [https://markwatson.com](https://markwatson.com). 5 | 6 | If you would like to support my work please consider purchasing my books on [Leanpub](https://leanpub.com/u/markwatson) and star my git repositories that you find useful on [GitHub](https://github.com/mark-watson?tab=repositories&q=&type=public). You can also interact with me on social media on [Mastodon](https://mastodon.social/@mark_watson) and [Twitter](https://twitter.com/mark_l_watson). 7 | 8 | -------------------------------------------------------------------------------- /anomaly_detection/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | profiles.clj 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | /.prepl-port 12 | .hgignore 13 | .hg/ 14 | -------------------------------------------------------------------------------- /anomaly_detection/README.md: -------------------------------------------------------------------------------- 1 | # anomaly_detection_clj 2 | 3 | # Code for my book "Practical Artificial Intelligence Programming With Clojure" 4 | 5 | You read my eBooks for free, see my 6 | website [https://markwatson.com](https://markwatson.com). If you would like to pay me for a copy then please visit [https://leanpub.com/clojureai](https://leanpub.com/clojureai). 7 | 8 | ## License 9 | 10 | Copyright © 2021 Mark Watson 11 | 12 | This program and the accompanying materials are made available under the 13 | terms of the Eclipse Public License 2.0 which is available at 14 | http://www.eclipse.org/legal/epl-2.0. 15 | 16 | This Source Code may also be made available under the following Secondary 17 | Licenses when the conditions for such availability set forth in the Eclipse 18 | Public License, v. 2.0 are satisfied: GNU General Public License as published by 19 | the Free Software Foundation, either version 2 of the License, or (at your 20 | option) any later version, with the GNU Classpath Exception which is available 21 | at https://www.gnu.org/software/classpath/license.html. 22 | -------------------------------------------------------------------------------- /anomaly_detection/doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to anomaly_detection_clj 2 | 3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) 4 | -------------------------------------------------------------------------------- /anomaly_detection/project.clj: -------------------------------------------------------------------------------- 1 | (defproject anomaly_detection_clj "0.1.0-SNAPSHOT" 2 | :description "Example of Clojure using Java Anomaly Detection code" 3 | :url "https://markwatson.com" 4 | :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" 5 | :url "https://www.eclipse.org/legal/epl-2.0/"} 6 | :dependencies [[org.clojure/clojure "1.11.1"] 7 | ;;[com.markwatson/anomaly_detection "1.0-SNAPSHOT"] 8 | [org.apache.commons/commons-io "1.3.2"] 9 | [org.clojure/data.csv "1.0.1"] 10 | [incanter "1.9.3"]] 11 | :source-paths ["src"] 12 | :java-source-paths ["src-java"] 13 | :javac-options ["-target" "1.8" "-source" "1.8"] 14 | :main ^:skip-aot anomaly-detection-clj.core 15 | :target-path "target/%s" 16 | :profiles {:uberjar {:aot :all 17 | :jvm-opts ["-Dclojure.compiler.direct-linking=true"]}}) 18 | -------------------------------------------------------------------------------- /anomaly_detection/src-java/main/java/com/markwatson/anomaly_detection/AnomalyDetection.java: -------------------------------------------------------------------------------- 1 | package com.markwatson.anomaly_detection; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | /** 7 | * Created by markw on 10/7/15. 8 | */ 9 | public class AnomalyDetection { 10 | 11 | public AnomalyDetection() { } 12 | 13 | /** 14 | * AnomalyDetection is a general purpose class for building anomaly detection 15 | * models. You should use this type of mdel when you have mostly negative 16 | * training examples with relatively few positive examples and you need a model 17 | * that detects postive (anomaly) inputs. 18 | * 19 | * @param num_features 20 | * @param num_training_examples 21 | * @param training_examples [num_training_examples][num_features] 22 | */ 23 | public AnomalyDetection(int num_features, int num_training_examples, double [][] training_examples) { 24 | List training = new ArrayList<>(); 25 | List cross_validation = new ArrayList<>(); 26 | List testing = new ArrayList<>(); 27 | int outcome_index = num_features - 1; // index of target outcome 28 | for (int i=0; igausian 20 | "separate labeled output, and then make the data look more like a Gausian (bell curve shaped) distribution" 21 | [vector-of-numbers-as-strings] 22 | (let [v (map read-string vector-of-numbers-as-strings) 23 | training-data0 (map 24 | (fn [x] (Math/log (+ (* 0.1 x) 1.2))) 25 | (butlast v)) 26 | target-output (* 0.5 (- (last v) 2)) ; make target output be [0,1] instead of [2,4] 27 | vmin (apply min training-data0) 28 | vmax (apply max training-data0) 29 | training-data (map 30 | (fn [x] (/ 31 | (- x vmin) 32 | (+ 0.0001 (- vmax vmin)))) 33 | training-data0)] 34 | (concat training-data [target-output]))) 35 | 36 | (defn testAD [] 37 | (let [ad (AnomalyDetection.) 38 | cdata 39 | (map 40 | data->gausian 41 | (with-open [reader (io/reader "data/cleaned_wisconsin_cancer_data.csv")] 42 | (doall 43 | (csv/read-csv reader))))] 44 | (if GENERATE_PLOTS 45 | (do 46 | (print-histogram "Clump Thickness" cdata 0) 47 | (print-histogram "Uniformity of Cell Size" cdata 1) 48 | (print-histogram "Uniformity of Cell Shape" cdata 2) 49 | (print-histogram "Marginal Adhesion" cdata 3) 50 | (print-histogram "Single Epithelial Cell Size" cdata 4) 51 | (print-histogram "Bare Nuclei" cdata 5) 52 | (print-histogram "Bland Chromatin" cdata 6) 53 | (print-histogram "Normal Nucleoli" cdata 7) 54 | (print-histogram "Mitoses" cdata 8))) 55 | ;; get best model parameters: 56 | (let [java-cdata (into-array (map double-array cdata)) 57 | detector (new AnomalyDetection 10 (- (count cdata) 1) java-cdata)] 58 | (. detector train) 59 | (let [test_malignant (double-array [0.5 1 1 0.8 0.5 0.5 0.7 1 0.1]) 60 | test_benign (double-array [0.5 0.4 0.5 0.1 0.8 0.1 0.3 0.6 0.1]) 61 | malignant_result (. detector isAnamoly test_malignant) 62 | benign_result (. detector isAnamoly test_benign)] 63 | (if malignant_result 64 | (println "malignant_result true") 65 | (println "malignant_result false")) 66 | (if benign_result 67 | (println "benign_result true") 68 | (println "benign_result false")) 69 | {:malignant-result malignant_result 70 | :benign-result benign_result} 71 | )))) 72 | (defn -main 73 | "I don't do a whole lot ... yet." 74 | [& _] 75 | (testAD)) 76 | -------------------------------------------------------------------------------- /anomaly_detection/test/anomaly_detection_clj/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns anomaly-detection-clj.core-test 2 | (:require [clojure.test :refer :all] 3 | [anomaly-detection-clj.core :refer :all])) 4 | 5 | (deftest a-test 6 | (testing "Test anomaly detection results" 7 | (let [results (testAD) 8 | malignant-result (:malignant-result results) 9 | benign-result (:benign-result results)] 10 | (is (= true malignant-result)) 11 | (is (= false benign-result))))) 12 | -------------------------------------------------------------------------------- /brave_search/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | .idea 5 | .lsp 6 | profiles.clj 7 | pom.xml 8 | pom.xml.asc 9 | *.jar 10 | *.class 11 | /.lein-* 12 | /.nrepl-port 13 | /.prepl-port 14 | .hgignore 15 | .hg/ 16 | -------------------------------------------------------------------------------- /brave_search/README.md: -------------------------------------------------------------------------------- 1 | # openai_api 2 | 3 | A Clojure library for using the OpenAI APIs (GPT-3 based) 4 | 5 | Note: in previous editions of my book, I implemented low level client code 6 | from scratch. That old code is now in the directory **../openai_api_mw**. The code here now is just a thin wrapper for Werner Kok's library [https://github.com/wkok/openai-clojure](https://github.com/wkok/openai-clojure). 7 | 8 | ## Usage 9 | 10 | lein test 11 | 12 | ## Code for my book "Practical Artificial Intelligence Programming With Clojure" 13 | 14 | You read my eBooks for free, see my 15 | website [https://markwatson.com](https://markwatson.com). If you would like to pay me for a copy then please visit [https://leanpub.com/clojureai](https://leanpub.com/clojureai). 16 | 17 | ## License 18 | 19 | Copyright © 2021 Mark Watson 20 | 21 | This program and the accompanying materials are made available under the 22 | terms of the Eclipse Public License 2.0 which is available at 23 | http://www.eclipse.org/legal/epl-2.0. 24 | 25 | This Source Code may also be made available under the following Secondary 26 | Licenses when the conditions for such availability set forth in the Eclipse 27 | Public License, v. 2.0 are satisfied: GNU General Public License as published by 28 | the Free Software Foundation, either version 2 of the License, or (at your 29 | option) any later version, with the GNU Classpath Exception which is available 30 | at https://www.gnu.org/software/classpath/license.html. 31 | -------------------------------------------------------------------------------- /brave_search/doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to openai_api 2 | 3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) 4 | -------------------------------------------------------------------------------- /brave_search/project.clj: -------------------------------------------------------------------------------- 1 | (defproject openai_api "0.1.0-SNAPSHOT" 2 | :description "FIXME: write description" 3 | :url "http://example.com/FIXME" 4 | :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" 5 | :url "https://www.eclipse.org/legal/epl-2.0/"} 6 | :dependencies [[org.clojure/clojure "1.10.1"] 7 | [clj-http "3.12.3"] 8 | [net.clojars.wkok/openai-clojure "0.15.0"] 9 | [org.clojure/data.json "2.3.1"] 10 | ] 11 | :repl-options {:init-ns openai-api.core}) 12 | -------------------------------------------------------------------------------- /brave_search/src/brave_search/core.clj: -------------------------------------------------------------------------------- 1 | (ns brave-search.core 2 | (:require [clj-http.client :as client] 3 | [cheshire.core :as json] 4 | [clojure.pprint :refer [pprint]])) 5 | 6 | ;; define the environment variable "BRAVE_SEARCH_API_KEY" with the value of your Brave search API key 7 | 8 | (defn brave-search [query] 9 | (let [subscription-key (System/getenv "BRAVE_SEARCH_API_KEY") 10 | endpoint "https://api.search.brave.com/res/v1/web/search" 11 | params {:q query} 12 | headers {"X-Subscription-Token" subscription-key} 13 | 14 | ;; Call the API 15 | response (client/get endpoint {:headers headers 16 | :query-params params}) 17 | 18 | ;; Pull out results 19 | results (get-in (json/parse-string (:body response) true) [:web :results]) 20 | 21 | ;; Create a vector of vectors containing title, URL, and description 22 | res (mapv (fn [result] 23 | [(:title result) 24 | (:url result) 25 | (:description result)]) 26 | results)] 27 | 28 | ;; Return the results 29 | res)) -------------------------------------------------------------------------------- /brave_search/test/brave_search/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns brave-search.core-test 2 | (:require [clojure.test :refer :all] 3 | [brave-search.core :refer :all])) 4 | 5 | (deftest search-test 6 | (testing "Brave search API" 7 | (let [results 8 | (brave-search "Sedona Arizona")] 9 | (println results) 10 | (is (= 0 0))))) 11 | -------------------------------------------------------------------------------- /datomic_local/README.md: -------------------------------------------------------------------------------- 1 | # Datomic local example app 2 | 3 | Note: this material is TBD, not in the book yet, 4 | 5 | Reference: https://blog.datomic.com/2023/08/datomic-local-is-released.html 6 | -------------------------------------------------------------------------------- /deeplearning_dl4j/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | profiles.clj 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | /.prepl-port 12 | .hgignore 13 | .hg/ 14 | -------------------------------------------------------------------------------- /deeplearning_dl4j/README.md: -------------------------------------------------------------------------------- 1 | # deeplearning_dl4j_clj 2 | 3 | # Code for my book "Practical Artificial Intelligence Programming With Clojure" 4 | 5 | Documentation for this example is in the first chapter of my book that can be read for free online, or purchased: [https://leanpub.com/clojureai](https://leanpub.com/clojureai) 6 | 7 | Please also visit my website [https://markwatson.com](https://markwatson.com). 8 | 9 | ## Running the Example 10 | 11 | lein run 12 | 13 | ## License 14 | 15 | Copyright © 2021-2023 Mark Watson 16 | 17 | This program and the accompanying materials are made available under the 18 | terms of the Eclipse Public License 2.0 which is available at 19 | http://www.eclipse.org/legal/epl-2.0. 20 | 21 | This Source Code may also be made available under the following Secondary 22 | Licenses when the conditions for such availability set forth in the Eclipse 23 | Public License, v. 2.0 are satisfied: GNU General Public License as published by 24 | the Free Software Foundation, either version 2 of the License, or (at your 25 | option) any later version, with the GNU Classpath Exception which is available 26 | at https://www.gnu.org/software/classpath/license.html. 27 | -------------------------------------------------------------------------------- /deeplearning_dl4j/data/testing.csv: -------------------------------------------------------------------------------- 1 | 6,10,10,2,8,10,7,3,3,1 2 | 10,10,10,4,8,1,8,10,1,1 3 | 1,1,1,1,2,1,2,1,2,0 4 | 3,3,6,4,5,8,4,4,1,1 5 | 3,6,6,6,5,10,6,8,3,1 6 | 4,1,1,1,2,1,3,1,1,0 7 | 2,1,1,2,3,1,2,1,1,0 8 | 4,2,1,1,2,2,3,1,1,0 9 | 10,10,10,2,10,10,5,3,3,1 10 | 1,1,1,1,2,1,3,1,1,0 11 | 1,1,1,1,2,1,2,1,1,0 12 | 6,10,10,10,8,10,10,10,7,1 13 | 1,1,1,1,2,1,3,1,1,0 14 | 8,4,7,1,3,10,3,9,2,1 15 | 5,1,1,1,2,1,3,1,1,0 16 | 10,4,6,1,2,10,5,3,1,1 17 | 7,4,5,10,2,10,3,8,2,1 18 | 8,10,10,10,8,10,10,7,3,1 19 | 10,10,10,10,10,10,4,10,10,1 20 | 3,1,1,1,3,1,2,1,1,0 21 | 3,1,2,1,2,1,2,1,1,0 22 | 1,1,1,1,2,1,2,1,1,0 23 | 5,1,2,1,2,1,3,1,1,0 24 | -------------------------------------------------------------------------------- /deeplearning_dl4j/project.clj: -------------------------------------------------------------------------------- 1 | (defproject deeplearning_dl4j_clj "0.1.2-SNAPSHOT" 2 | :description "DL4J example" 3 | :url "https://markwatson.com" 4 | :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" 5 | :url "https://www.eclipse.org/legal/epl-2.0/"} 6 | :dependencies [[org.clojure/clojure "1.11.1"] 7 | [org.deeplearning4j/deeplearning4j-datasets "1.0.0-M2.1"] 8 | [org.deeplearning4j/deeplearning4j-core "1.0.0-M2.1"] 9 | [org.nd4j/nd4j-native "1.0.0-M2.1"]] 10 | :main ^:skip-aot deeplearning-dl4j-clj.wisconsin-data 11 | :target-path "target/%s" 12 | :profiles {:uberjar {:aot :all 13 | :jvm-opts ["-Dclojure.compiler.direct-linking=true"]}}) 14 | -------------------------------------------------------------------------------- /deeplearning_dl4j/src/deeplearning_dl4j_clj/wisconsin_data.clj: -------------------------------------------------------------------------------- 1 | (ns deeplearning-dl4j-clj.wisconsin-data 2 | (:import [org.datavec.api.split FileSplit] 3 | [org.deeplearning4j.datasets.datavec 4 | RecordReaderDataSetIterator] 5 | [org.datavec.api.records.reader.impl.csv 6 | CSVRecordReader] 7 | [org.deeplearning4j.nn.conf 8 | NeuralNetConfiguration$Builder] 9 | [org.deeplearning4j.nn.conf.layers 10 | OutputLayer$Builder DenseLayer$Builder] 11 | [org.deeplearning4j.nn.weights WeightInit] 12 | [org.nd4j.linalg.activations Activation] 13 | [org.nd4j.linalg.lossfunctions 14 | LossFunctions$LossFunction] 15 | [org.deeplearning4j.optimize.listeners 16 | ScoreIterationListener] 17 | [org.deeplearning4j.nn.multilayer 18 | MultiLayerNetwork] 19 | [java.io File] 20 | [org.nd4j.linalg.learning.config Adam Sgd 21 | AdaDelta AdaGrad AdaMax Nadam NoOp])) 22 | 23 | (def numHidden 3) 24 | (def numOutputs 1) 25 | (def batchSize 64) 26 | 27 | (def initial-seed (long 33117)) 28 | 29 | (def numInputs 9) 30 | (def labelIndex 9) 31 | (def numClasses 2) 32 | 33 | 34 | (defn -main 35 | "Using DL4J with Wisconsin data" 36 | [& args] 37 | (let [recordReader (new CSVRecordReader) 38 | _ (. recordReader 39 | initialize 40 | (new FileSplit (new File "data/", "training.csv"))) 41 | trainIter (new RecordReaderDataSetIterator recordReader 42 | batchSize labelIndex numClasses) 43 | recordReaderTest (new CSVRecordReader) 44 | _ (. recordReaderTest initialize 45 | (new FileSplit (new File "data/", "testing.csv"))) 46 | testIter (new RecordReaderDataSetIterator 47 | recordReaderTest batchSize labelIndex numClasses) 48 | conf (-> 49 | (new NeuralNetConfiguration$Builder) 50 | (.seed initial-seed) 51 | (.activation Activation/TANH) 52 | (.weightInit (WeightInit/XAVIER)) 53 | (.updater (new Sgd 0.1)) 54 | (.l2 1e-4) 55 | (.list) 56 | (.layer 57 | 0, 58 | (-> (new DenseLayer$Builder) 59 | (.nIn numInputs) 60 | (.nOut numHidden) 61 | (.build))) 62 | (.layer 63 | 1, 64 | (-> (new OutputLayer$Builder 65 | LossFunctions$LossFunction/MCXENT) 66 | (.nIn numHidden) 67 | (.nOut numClasses) 68 | (.activation Activation/SOFTMAX) 69 | (.build))) 70 | (.build)) 71 | model (new MultiLayerNetwork conf) 72 | score-listener (ScoreIterationListener. 100)] 73 | (. model init) 74 | (. model setListeners (list score-listener)) 75 | (. model fit trainIter 10) 76 | (while (. testIter hasNext) 77 | (let [ds (. testIter next) 78 | features (. ds getFeatures) 79 | labels (. ds getLabels) 80 | predicted (. model output features false)] 81 | ;; 23 test samples in data/testing.csv: 82 | (doseq [i (range 0 46 2)] 83 | (println 84 | "target: [" (. labels getDouble i) 85 | (. labels getDouble (+ i 1)) "]" 86 | "predicted : [" 87 | (format "%1.2f" 88 | (. predicted getDouble i)) 89 | (format "%1.2f" 90 | (. predicted getDouble 91 | (+ i 1))) "]")))))) 92 | 93 | -------------------------------------------------------------------------------- /docs_qa/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | profiles.clj 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | /.prepl-port 12 | .hgignore 13 | .hg/ 14 | -------------------------------------------------------------------------------- /docs_qa/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/). 3 | 4 | ## [Unreleased] 5 | ### Changed 6 | - Add a new arity to `make-widget-async` to provide a different widget shape. 7 | 8 | ## [0.1.1] - 2023-05-10 9 | ### Changed 10 | - Documentation on how to make the widgets. 11 | 12 | ### Removed 13 | - `make-widget-sync` - we're all async, all the time. 14 | 15 | ### Fixed 16 | - Fixed widget maker to keep working when daylight savings switches over. 17 | 18 | ## 0.1.0 - 2023-05-10 19 | ### Added 20 | - Files from the new template. 21 | - Widget maker public API - `make-widget-sync`. 22 | 23 | [Unreleased]: https://github.com/your-name/docs_qa/compare/0.1.1...HEAD 24 | [0.1.1]: https://github.com/your-name/docs_qa/compare/0.1.0...0.1.1 25 | -------------------------------------------------------------------------------- /docs_qa/LICENSE: -------------------------------------------------------------------------------- 1 | Eclipse Public License - v 2.0 2 | 3 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE 4 | PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION 5 | OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 6 | 7 | 1. DEFINITIONS 8 | 9 | "Contribution" means: 10 | 11 | a) in the case of the initial Contributor, the initial content 12 | Distributed under this Agreement, and 13 | 14 | b) in the case of each subsequent Contributor: 15 | i) changes to the Program, and 16 | ii) additions to the Program; 17 | where such changes and/or additions to the Program originate from 18 | and are Distributed by that particular Contributor. A Contribution 19 | "originates" from a Contributor if it was added to the Program by 20 | such Contributor itself or anyone acting on such Contributor's behalf. 21 | Contributions do not include changes or additions to the Program that 22 | are not Modified Works. 23 | 24 | "Contributor" means any person or entity that Distributes the Program. 25 | 26 | "Licensed Patents" mean patent claims licensable by a Contributor which 27 | are necessarily infringed by the use or sale of its Contribution alone 28 | or when combined with the Program. 29 | 30 | "Program" means the Contributions Distributed in accordance with this 31 | Agreement. 32 | 33 | "Recipient" means anyone who receives the Program under this Agreement 34 | or any Secondary License (as applicable), including Contributors. 35 | 36 | "Derivative Works" shall mean any work, whether in Source Code or other 37 | form, that is based on (or derived from) the Program and for which the 38 | editorial revisions, annotations, elaborations, or other modifications 39 | represent, as a whole, an original work of authorship. 40 | 41 | "Modified Works" shall mean any work in Source Code or other form that 42 | results from an addition to, deletion from, or modification of the 43 | contents of the Program, including, for purposes of clarity any new file 44 | in Source Code form that contains any contents of the Program. Modified 45 | Works shall not include works that contain only declarations, 46 | interfaces, types, classes, structures, or files of the Program solely 47 | in each case in order to link to, bind by name, or subclass the Program 48 | or Modified Works thereof. 49 | 50 | "Distribute" means the acts of a) distributing or b) making available 51 | in any manner that enables the transfer of a copy. 52 | 53 | "Source Code" means the form of a Program preferred for making 54 | modifications, including but not limited to software source code, 55 | documentation source, and configuration files. 56 | 57 | "Secondary License" means either the GNU General Public License, 58 | Version 2.0, or any later versions of that license, including any 59 | exceptions or additional permissions as identified by the initial 60 | Contributor. 61 | 62 | 2. GRANT OF RIGHTS 63 | 64 | a) Subject to the terms of this Agreement, each Contributor hereby 65 | grants Recipient a non-exclusive, worldwide, royalty-free copyright 66 | license to reproduce, prepare Derivative Works of, publicly display, 67 | publicly perform, Distribute and sublicense the Contribution of such 68 | Contributor, if any, and such Derivative Works. 69 | 70 | b) Subject to the terms of this Agreement, each Contributor hereby 71 | grants Recipient a non-exclusive, worldwide, royalty-free patent 72 | license under Licensed Patents to make, use, sell, offer to sell, 73 | import and otherwise transfer the Contribution of such Contributor, 74 | if any, in Source Code or other form. This patent license shall 75 | apply to the combination of the Contribution and the Program if, at 76 | the time the Contribution is added by the Contributor, such addition 77 | of the Contribution causes such combination to be covered by the 78 | Licensed Patents. The patent license shall not apply to any other 79 | combinations which include the Contribution. No hardware per se is 80 | licensed hereunder. 81 | 82 | c) Recipient understands that although each Contributor grants the 83 | licenses to its Contributions set forth herein, no assurances are 84 | provided by any Contributor that the Program does not infringe the 85 | patent or other intellectual property rights of any other entity. 86 | Each Contributor disclaims any liability to Recipient for claims 87 | brought by any other entity based on infringement of intellectual 88 | property rights or otherwise. As a condition to exercising the 89 | rights and licenses granted hereunder, each Recipient hereby 90 | assumes sole responsibility to secure any other intellectual 91 | property rights needed, if any. For example, if a third party 92 | patent license is required to allow Recipient to Distribute the 93 | Program, it is Recipient's responsibility to acquire that license 94 | before distributing the Program. 95 | 96 | d) Each Contributor represents that to its knowledge it has 97 | sufficient copyright rights in its Contribution, if any, to grant 98 | the copyright license set forth in this Agreement. 99 | 100 | e) Notwithstanding the terms of any Secondary License, no 101 | Contributor makes additional grants to any Recipient (other than 102 | those set forth in this Agreement) as a result of such Recipient's 103 | receipt of the Program under the terms of a Secondary License 104 | (if permitted under the terms of Section 3). 105 | 106 | 3. REQUIREMENTS 107 | 108 | 3.1 If a Contributor Distributes the Program in any form, then: 109 | 110 | a) the Program must also be made available as Source Code, in 111 | accordance with section 3.2, and the Contributor must accompany 112 | the Program with a statement that the Source Code for the Program 113 | is available under this Agreement, and informs Recipients how to 114 | obtain it in a reasonable manner on or through a medium customarily 115 | used for software exchange; and 116 | 117 | b) the Contributor may Distribute the Program under a license 118 | different than this Agreement, provided that such license: 119 | i) effectively disclaims on behalf of all other Contributors all 120 | warranties and conditions, express and implied, including 121 | warranties or conditions of title and non-infringement, and 122 | implied warranties or conditions of merchantability and fitness 123 | for a particular purpose; 124 | 125 | ii) effectively excludes on behalf of all other Contributors all 126 | liability for damages, including direct, indirect, special, 127 | incidental and consequential damages, such as lost profits; 128 | 129 | iii) does not attempt to limit or alter the recipients' rights 130 | in the Source Code under section 3.2; and 131 | 132 | iv) requires any subsequent distribution of the Program by any 133 | party to be under a license that satisfies the requirements 134 | of this section 3. 135 | 136 | 3.2 When the Program is Distributed as Source Code: 137 | 138 | a) it must be made available under this Agreement, or if the 139 | Program (i) is combined with other material in a separate file or 140 | files made available under a Secondary License, and (ii) the initial 141 | Contributor attached to the Source Code the notice described in 142 | Exhibit A of this Agreement, then the Program may be made available 143 | under the terms of such Secondary Licenses, and 144 | 145 | b) a copy of this Agreement must be included with each copy of 146 | the Program. 147 | 148 | 3.3 Contributors may not remove or alter any copyright, patent, 149 | trademark, attribution notices, disclaimers of warranty, or limitations 150 | of liability ("notices") contained within the Program from any copy of 151 | the Program which they Distribute, provided that Contributors may add 152 | their own appropriate notices. 153 | 154 | 4. COMMERCIAL DISTRIBUTION 155 | 156 | Commercial distributors of software may accept certain responsibilities 157 | with respect to end users, business partners and the like. While this 158 | license is intended to facilitate the commercial use of the Program, 159 | the Contributor who includes the Program in a commercial product 160 | offering should do so in a manner which does not create potential 161 | liability for other Contributors. Therefore, if a Contributor includes 162 | the Program in a commercial product offering, such Contributor 163 | ("Commercial Contributor") hereby agrees to defend and indemnify every 164 | other Contributor ("Indemnified Contributor") against any losses, 165 | damages and costs (collectively "Losses") arising from claims, lawsuits 166 | and other legal actions brought by a third party against the Indemnified 167 | Contributor to the extent caused by the acts or omissions of such 168 | Commercial Contributor in connection with its distribution of the Program 169 | in a commercial product offering. The obligations in this section do not 170 | apply to any claims or Losses relating to any actual or alleged 171 | intellectual property infringement. In order to qualify, an Indemnified 172 | Contributor must: a) promptly notify the Commercial Contributor in 173 | writing of such claim, and b) allow the Commercial Contributor to control, 174 | and cooperate with the Commercial Contributor in, the defense and any 175 | related settlement negotiations. The Indemnified Contributor may 176 | participate in any such claim at its own expense. 177 | 178 | For example, a Contributor might include the Program in a commercial 179 | product offering, Product X. That Contributor is then a Commercial 180 | Contributor. If that Commercial Contributor then makes performance 181 | claims, or offers warranties related to Product X, those performance 182 | claims and warranties are such Commercial Contributor's responsibility 183 | alone. Under this section, the Commercial Contributor would have to 184 | defend claims against the other Contributors related to those performance 185 | claims and warranties, and if a court requires any other Contributor to 186 | pay any damages as a result, the Commercial Contributor must pay 187 | those damages. 188 | 189 | 5. NO WARRANTY 190 | 191 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT 192 | PERMITTED BY APPLICABLE LAW, THE PROGRAM IS PROVIDED ON AN "AS IS" 193 | BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR 194 | IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF 195 | TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR 196 | PURPOSE. Each Recipient is solely responsible for determining the 197 | appropriateness of using and distributing the Program and assumes all 198 | risks associated with its exercise of rights under this Agreement, 199 | including but not limited to the risks and costs of program errors, 200 | compliance with applicable laws, damage to or loss of data, programs 201 | or equipment, and unavailability or interruption of operations. 202 | 203 | 6. DISCLAIMER OF LIABILITY 204 | 205 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT 206 | PERMITTED BY APPLICABLE LAW, NEITHER RECIPIENT NOR ANY CONTRIBUTORS 207 | SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 208 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST 209 | PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 210 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 211 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 212 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE 213 | POSSIBILITY OF SUCH DAMAGES. 214 | 215 | 7. GENERAL 216 | 217 | If any provision of this Agreement is invalid or unenforceable under 218 | applicable law, it shall not affect the validity or enforceability of 219 | the remainder of the terms of this Agreement, and without further 220 | action by the parties hereto, such provision shall be reformed to the 221 | minimum extent necessary to make such provision valid and enforceable. 222 | 223 | If Recipient institutes patent litigation against any entity 224 | (including a cross-claim or counterclaim in a lawsuit) alleging that the 225 | Program itself (excluding combinations of the Program with other software 226 | or hardware) infringes such Recipient's patent(s), then such Recipient's 227 | rights granted under Section 2(b) shall terminate as of the date such 228 | litigation is filed. 229 | 230 | All Recipient's rights under this Agreement shall terminate if it 231 | fails to comply with any of the material terms or conditions of this 232 | Agreement and does not cure such failure in a reasonable period of 233 | time after becoming aware of such noncompliance. If all Recipient's 234 | rights under this Agreement terminate, Recipient agrees to cease use 235 | and distribution of the Program as soon as reasonably practicable. 236 | However, Recipient's obligations under this Agreement and any licenses 237 | granted by Recipient relating to the Program shall continue and survive. 238 | 239 | Everyone is permitted to copy and distribute copies of this Agreement, 240 | but in order to avoid inconsistency the Agreement is copyrighted and 241 | may only be modified in the following manner. The Agreement Steward 242 | reserves the right to publish new versions (including revisions) of 243 | this Agreement from time to time. No one other than the Agreement 244 | Steward has the right to modify this Agreement. The Eclipse Foundation 245 | is the initial Agreement Steward. The Eclipse Foundation may assign the 246 | responsibility to serve as the Agreement Steward to a suitable separate 247 | entity. Each new version of the Agreement will be given a distinguishing 248 | version number. The Program (including Contributions) may always be 249 | Distributed subject to the version of the Agreement under which it was 250 | received. In addition, after a new version of the Agreement is published, 251 | Contributor may elect to Distribute the Program (including its 252 | Contributions) under the new version. 253 | 254 | Except as expressly stated in Sections 2(a) and 2(b) above, Recipient 255 | receives no rights or licenses to the intellectual property of any 256 | Contributor under this Agreement, whether expressly, by implication, 257 | estoppel or otherwise. All rights in the Program not expressly granted 258 | under this Agreement are reserved. Nothing in this Agreement is intended 259 | to be enforceable by any entity that is not a Contributor or Recipient. 260 | No third-party beneficiary rights are created under this Agreement. 261 | 262 | Exhibit A - Form of Secondary Licenses Notice 263 | 264 | "This Source Code may also be made available under the following 265 | Secondary Licenses when the conditions for such availability set forth 266 | in the Eclipse Public License, v. 2.0 are satisfied: GNU General Public 267 | License as published by the Free Software Foundation, either version 2 268 | of the License, or (at your option) any later version, with the GNU 269 | Classpath Exception which is available at 270 | https://www.gnu.org/software/classpath/license.html." 271 | 272 | Simply including a copy of this Agreement, including this Exhibit A 273 | is not sufficient to license the Source Code under Secondary Licenses. 274 | 275 | If it is not possible or desirable to put the notice in a particular 276 | file, then You may include the notice in a location (such as a LICENSE 277 | file in a relevant directory) where a recipient would be likely to 278 | look for such a notice. 279 | 280 | You may add additional accurate notices of copyright ownership. 281 | -------------------------------------------------------------------------------- /docs_qa/README.md: -------------------------------------------------------------------------------- 1 | # docs_qa 2 | 3 | # docs_qa: a Clojure app for Documents Question Answering Using OpenAI GPT3 APIs and a Local Embeddings Vector Database 4 | 5 | This project is inspired by the Python LangChain and LlamaIndex projects, with the parts I need written from scratch in Common Lisp. I wrote a Python book "LangChain and LlamaIndex Projects Lab Book: Hooking Large Language Models Up to the Real World 6 | Using GPT-3, ChatGPT, and Hugging Face Models in Applications" in March 2023: [https://leanpub.com/langchain](https://leanpub.com/langchain) that you might also be interested in. 7 | 8 | ## Installation 9 | 10 | In the directory ../openai_api run: 11 | 12 | lein install 13 | 14 | Then in this directory just run: lein test 15 | 16 | ## Usage 17 | 18 | lein test 19 | 20 | ## License 21 | 22 | Copyright © 2023-2025 Mark Watson 23 | 24 | This program and the accompanying materials are made available under the 25 | terms of the Eclipse Public License 2.0 which is available at 26 | http://www.eclipse.org/legal/epl-2.0. 27 | 28 | This Source Code may also be made available under the following Secondary 29 | Licenses when the conditions for such availability set forth in the Eclipse 30 | Public License, v. 2.0 are satisfied: GNU General Public License as published by 31 | the Free Software Foundation, either version 2 of the License, or (at your 32 | option) any later version, with the GNU Classpath Exception which is available 33 | at https://www.gnu.org/software/classpath/license.html. 34 | -------------------------------------------------------------------------------- /docs_qa/data/chemistry.txt: -------------------------------------------------------------------------------- 1 | Amyl alcohol is an organic compound with the formula C 5 H 12 O. All eight isomers of amyl alcohol are known. The most important is isobutyl carbinol, this being the chief constituent of fermentation amyl alcohol, and consequently a constituent of fusel oil. It can be separated from fusel oil by shaking with strong brine , separating the oily layer from the brine layer and it, the portion boiling between 125 and 140 C. being collected. For further purification it may be shaken with hot lime water, the oily layer separated, dried with calcium chloride and fractionated, the fraction boiling between 128 and 132 C only being collected. 2 | The 1730 definition of the word "chemistry", as used by Georg Ernst Stahl, meant the art of resolving mixed, compound, or aggregate bodies into their principles; and of composing such bodies from those principles. In 1837, Jean-Baptiste Dumas considered the word "chemistry" to refer to the science concerned with the laws and effects of molecular forces.[16] This definition further evolved until, in 1947, it came to mean the science of substances: their structure, their properties, and the reactions that change them into other substances - a characterization accepted by Linus Pauling.[17] More recently, in 1998, the definition of "chemistry" was broadened to mean the study of matter and the changes it undergoes, as phrased by Professor Raymond Chang. 3 | The current model of atomic structure is the quantum mechanical model.[36] Traditional chemistry starts with the study of elementary particles, atoms, molecules,[37] substances, metals, crystals and other aggregates of matter. This matter can be studied in solid, liquid, or gas states, in isolation or in combination. The interactions, reactions and transformations that are studied in chemistry are usually the result of interactions between atoms, leading to rearrangements of the chemical bonds which hold atoms together. Such behaviors are studied in a chemistry laboratory. 4 | 5 | The chemistry laboratory stereotypically uses various forms of laboratory glassware. However glassware is not central to chemistry, and a great deal of experimental (as well as applied/industrial) chemistry is done without it. 6 | The transfer of energy from one chemical substance to another depends on the size of energy quanta emitted from one substance. However, heat energy is often transferred more easily from almost any substance to another because the phonons responsible for vibrational and rotational energy levels in a substance have much less energy than photons invoked for the electronic energy transfer. Thus, because vibrational and rotational energy levels are more closely spaced than electronic energy levels, heat is more easily transferred between substances relative to light or other forms of electronic energy. For example, ultraviolet electromagnetic radiation is not transferred with as much efficacy from one substance to another as thermal or electrical energy. 7 | -------------------------------------------------------------------------------- /docs_qa/data/economics.txt: -------------------------------------------------------------------------------- 1 | The Austrian School (also known as the Vienna School or the Psychological School ) is a Schools of economic thought|school of economic thought that emphasizes the spontaneous organizing power of the price mechanism. Austrians hold that the complexity of subjective human choices makes mathematical modelling of the evolving market extremely difficult (or Undecidable and advocate a "laissez faire" approach to the economy. Austrian School economists advocate the strict enforcement of voluntary contractual agreements between economic agents, and hold that commercial transactions should be subject to the smallest possible imposition of forces they consider to be (in particular the smallest possible amount of government intervention). The Austrian School derives its name from its predominantly Austrian founders and early supporters, including Carl Menger, Eugen von Böhm-Bawerk and Ludwig von Mises. 2 | 3 | Economics is the social science that analyzes the production, distribution, and consumption of goods and services. Political economy was the earlier name for the subject, but economists in the late 19th century suggested "economics" as a shorter term for "economic science" that also avoided a narrow political-interest connotation and as similar in form to "mathematics", "ethics", and so forth.[2] 4 | 5 | A focus of the subject is how economic agents behave or interact and how economies work. Consistent with this, a primary textbook distinction is between microeconomics and macroeconomics. Microeconomics examines the behavior of basic elements in the economy, including individual agents (such as households and firms or as buyers and sellers) and markets, and their interactions. Macroeconomics analyzes the entire economy and issues affecting it, including unemployment, inflation, economic growth, and monetary and fiscal policy. 6 | 7 | The professionalization of economics, reflected in the growth of graduate programs on the subject, has been described as "the main change in economics since around 1900".[93] Most major universities and many colleges have a major, school, or department in which academic degrees are awarded in the subject, whether in the liberal arts, business, or for professional study; see Master of Economics. 8 | 9 | 10 | Economics is the social science that studies the behavior of individuals, households, and organizations (called economic actors, players, or agents), when they manage or use scarce resources, which have alternative uses, to achieve desired ends. Agents are assumed to act rationally, have multiple desirable ends in sight, limited resources to obtain these ends, a set of stable preferences, a definite overall guiding objective, and the capability of making a choice. There exists an economic problem, subject to study by economic science, when a decision (choice) is made by one or more resource-controlling players to attain the best possible outcome under bounded rational conditions. In other words, resource-controlling agents maximize value subject to the constraints imposed by the information the agents have, their cognitive limitations, and the finite amount of time they have to make and execute a decision. Economic science centers on the activities of the economic agents that comprise society.[1] They are the focus of economic analysis.[2] 11 | 12 | The traditional concern of economic analysis is to gain an understanding of the processes that govern the production, distribution and consumption of goods and services in an exchange economy.[3] An approach to understanding these processes, through the study of agent behavior under scarcity, may go as s: 13 | -------------------------------------------------------------------------------- /docs_qa/data/health.txt: -------------------------------------------------------------------------------- 1 | 2 | which requires that you sit at a desk all day. ; If you hate to talk 3 | politics, don't associate with people who love to talk politics, etc. Learn to live one day at a time. Every day, do something you really enjoy. Add an ounce of love to everything you do. Take a hot bath or shower (or a cool one in summertime) to relieve tension. Do something for somebody else. Focus on understanding rather than on being understood; on loving rather than on being loved. Do something that will improve your appearance. ; Looking better can help you feel better. Schedule a realistic day. ; Avoid the tendency to schedule back-to-back appointments; allow time between appointments for a breathing spell. Become more flexible. ; Some things are worth not doing perfectly and 4 | some issues are fine to compromise upon. Eliminate destructive self-talking 5 | 6 | I also felt they protected me from the hard road by interposing a layer of air between the sole of my foot and the pavement. So why was I sidelined with a heel injury for over two s? I listened to the manufacturer and changed my runners every 400 miles. Come to think of it, why do I see so many runners with lower extremity injuries in my office? The traditional answer to these questions has always been overuse often compounded by an underlying mechanical abnormality such as over-pronation or flat-feet. The treatment, along with modification of training, physiotherapy, stretching etc. has always included a close look at the runner's footwear, often with recommendations about motion control, stability, cushioning, orthotics or custom molded insoles. 7 | I do not recommend that you run your next half-marathon barefoot. But certainly, I predict that sooner or later, changes will come about in both shoe design and training. From the medical establishment's point of view, the prevention and treatment of running injuries must change to incorporate the concepts outlined above. In fact I view the ideas I've presented here as a major paradigm shift in sports medicine, the likes of which I have not seen in the last fifteen years. Of course, the major shoe companies have to own up and start introducing better shoes into their lines. 8 | 9 | adaptive immunity: The ability of the body to learn to fight specific infections after being exposed to the germs that cause them. 10 | 11 | addiction: Loss of control over indulging in a substance or performing an action or behavior, and continued craving for it despite negative consequences. 12 | 13 | 14 | adenosine triphosphate: An energy-storing molecule that is found in all human cells. Usually abbreviated as ATP. 15 | 16 | adequate intake: An of the amount of a nutrient needed by healthy people. The Adequate Intake is used when there isn’t enough information to set a recommended dietary allowance (RDA). 17 | 18 | agoraphobia: Fear and avoidance of public places and open spaces. 19 | 20 | amnesia: Unusual memory loss or forgetfulness. 21 | 22 | amputation: The surgical removal of a limb or other body part. 23 | 24 | anaerobic: Any process that doesn’t require oxygen. Often refers to a form of short, high intensity exercise, known as anaerobic exercise. 25 | 26 | anaerobic exercise: Exercise that improves the efficiency of energy-producing systems that do not rely on oxygen. Examples include sprinting and weight lifting. 27 | 28 | 29 | dry eye: Stinging, burning, or irritation that occurs when the eye doesn’t produce enough moisture. 30 | 31 | duct: A tube or vessel in the body which carries the secretion of a gland; Secretion examples are tears, breast milk, etc. 32 | 33 | 34 | upper airway resistance syndrome: Inhalation that requires undue extra exertion; this extra work may cause insomnia and daytime sleepiness. 35 | 36 | urea: A waste product of protein digestion and metabolism. 37 | 38 | ureter: The tube that connects each kidney to the bladder. 39 | 40 | urethra: The tube leading from the bladder through which urine is carried from the body. 41 | -------------------------------------------------------------------------------- /docs_qa/data/sports.txt: -------------------------------------------------------------------------------- 1 | Sport is generally recognised as activities based in physical athleticism or physical dexterity. Sports are usually governed by rules to ensure fair competition and consistent adjudication of the winner. 2 | 3 | "Sport" comes from the Old French desport meaning "leisure", with the oldest definition in English from around 1300 being "anything humans find amusing or entertaining". 4 | 5 | Other bodies advocate widening the definition of sport to include all physical activity and exercise. For instance, the Council of Europe include all forms of physical exercise, including those completed just for fun. 6 | 7 | -------------------------------------------------------------------------------- /docs_qa/doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to docs_qa 2 | 3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) 4 | -------------------------------------------------------------------------------- /docs_qa/project.clj: -------------------------------------------------------------------------------- 1 | (defproject docs_qa "0.1.0-SNAPSHOT" 2 | :description "FIXME: write description" 3 | :url "http://example.com/FIXME" 4 | :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" 5 | :url "https://www.eclipse.org/legal/epl-2.0/"} 6 | :dependencies [[org.clojure/clojure "1.10.1"] 7 | [org.clojure/java.jdbc "0.7.12"] 8 | [org.clojars.cloggo/sqlite "0.1.1"] 9 | [openai_api "0.1.0-SNAPSHOT"]] 10 | :main ^:skip-aot docs-qa.core 11 | :target-path "target/%s" 12 | :profiles {:uberjar {:aot :all 13 | :jvm-opts ["-Dclojure.compiler.direct-linking=true"]}}) 14 | -------------------------------------------------------------------------------- /docs_qa/src/docs_qa/core.clj: -------------------------------------------------------------------------------- 1 | (ns docs-qa.core 2 | (:require [clojure.java.jdbc :as jdbc] 3 | [openai-api.core :refer :all] 4 | [docs-qa.vectordb :refer :all]) 5 | (:gen-class)) 6 | 7 | (defn best-vector-matches [query] 8 | (print "**count:" (count docs-qa.vectordb/embeddings-with-chunk-texts)) 9 | (clojure.string/join 10 | " ." 11 | (let [query-embedding (openai-api.core/embeddings query)] 12 | (map 13 | second 14 | (filter 15 | (fn [emb-text-pair] 16 | (let [emb (first emb-text-pair) 17 | text (second emb-text-pair)] 18 | (> (openai-api.core/dot-product 19 | query-embedding 20 | emb) 21 | 0.79))) 22 | docs-qa.vectordb/embeddings-with-chunk-texts))))) 23 | 24 | (defn answer-prompt [prompt] 25 | (openai-api.core/answer-question 26 | prompt)) 27 | 28 | (defn -main 29 | [] 30 | (println "Loading text files in ./data/, performing chunking and getting OpenAI embeddings...") 31 | (answer-prompt " do nothiing ") 32 | (print "...done loading data and getting local embeddings.\n") 33 | (loop [] 34 | (println "Enter a query:") 35 | (let [input (read-line)] 36 | (if (empty? input) 37 | (println "Done.") 38 | (do 39 | (let [text (best-vector-matches input) 40 | prompt 41 | (clojure.string/replace 42 | (clojure.string/join 43 | "\n" 44 | ["With the following CONTEXT:\n\n" 45 | text 46 | "\n\nANSWER:\n\n" 47 | input]) 48 | #"\s+" " ")] 49 | (println "** PROMPT:" prompt) 50 | (println (answer-prompt prompt))) 51 | (recur)))))) 52 | -------------------------------------------------------------------------------- /docs_qa/src/docs_qa/vectordb.clj: -------------------------------------------------------------------------------- 1 | (ns docs-qa.vectordb) 2 | 3 | (defn string-to-floats [s] 4 | (map #(Float/parseFloat %) (clojure.string/split s #" "))) 5 | 6 | (defn truncate-string [s max-length] 7 | (if (< (count s) max-length) 8 | s 9 | (subs s 0 max-length))) 10 | 11 | (defn break-into-chunks [s chunk-size] 12 | (let [chunks (partition-all chunk-size s)] 13 | (map #(apply str %) chunks))) 14 | 15 | (defn document-texts-from_dir [dir-path] 16 | (map #(slurp %) (rest (file-seq (clojure.java.io/file dir-path))))) 17 | 18 | (defn document-texts-to-chunks [strings] 19 | (flatten 20 | (map #(break-into-chunks % 200) strings))) 21 | 22 | (def directory-path "data") 23 | 24 | (def doc-strings (document-texts-from_dir directory-path)) 25 | 26 | (def doc-chunks 27 | (filter #(> (count %) 40) (document-texts-to-chunks doc-strings))) 28 | 29 | (def chunk-embeddings 30 | (map #(openai-api.core/embeddings %) doc-chunks)) 31 | 32 | (def embeddings-with-chunk-texts 33 | (map vector chunk-embeddings doc-chunks)) 34 | 35 | ;;(clojure.pprint/pprint (first embeddings-with-chunk-texts)) 36 | -------------------------------------------------------------------------------- /docs_qa/test/docs_qa/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns docs-qa.core-test 2 | (:require [clojure.test :refer :all] 3 | [docs-qa.core :refer :all] 4 | [docs-qa.vectordb :refer :all] 5 | [openai-api.core :refer :all])) 6 | 7 | (deftest a-test 8 | (testing "FIXME, I fail." 9 | '(println 10 | (openai-api.core/answer-question 11 | "Where is the Valley of Kings?")) 12 | (println 13 | (docs-qa.core/best-vector-matches 14 | "What is Chemistry. How useful, really, are the sciences. Is Amyl alcohol is an organic compound?")) 15 | (is (= 0 0)))) 16 | -------------------------------------------------------------------------------- /gemini_api/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | .idea 5 | .lsp 6 | profiles.clj 7 | pom.xml 8 | pom.xml.asc 9 | *.jar 10 | *.class 11 | /.lein-* 12 | /.nrepl-port 13 | /.prepl-port 14 | .hgignore 15 | .hg/ 16 | 17 | .codegpt -------------------------------------------------------------------------------- /gemini_api/README.md: -------------------------------------------------------------------------------- 1 | # gemini_api 2 | 3 | A Clojure library for using the gemini APIs 4 | 5 | Run the Code: 6 | (require '[gemini-api.core :as gemini]) 7 | 8 | (println (gemini/generate-content "Write a short poem about the ocean.")) 9 | (println (gemini/summarize "The quick brown fox jumps over the lazy dog.")) 10 | 11 | ## Usage 12 | 13 | lein test 14 | 15 | ## Code for my book "Practical Artificial Intelligence Programming With Clojure" 16 | 17 | You can read the book for this example code for free online: [https://leanpub.com/clojureai/read](https://leanpub.com/clojureai/read). 18 | 19 | If you would like to pay me for a copy of this book then please visit [https://leanpub.com/clojureai](https://leanpub.com/clojureai). 20 | 21 | Please visit see my website [https://markwatson.com](https://markwatson.com). 22 | 23 | ## License 24 | 25 | Copyright © 2021-2024 Mark Watson 26 | 27 | This program and the accompanying materials are made available under the 28 | terms of the Eclipse Public License 2.0 which is available at 29 | http://www.eclipse.org/legal/epl-2.0. 30 | 31 | This Source Code may also be made available under the following Secondary 32 | Licenses when the conditions for such availability set forth in the Eclipse 33 | Public License, v. 2.0 are satisfied: GNU General Public License as published by 34 | the Free Software Foundation, either version 2 of the License, or (at your 35 | option) any later version, with the GNU Classpath Exception which is available 36 | at https://www.gnu.org/software/classpath/license.html. 37 | -------------------------------------------------------------------------------- /gemini_api/deps.edn: -------------------------------------------------------------------------------- 1 | {:deps {org.clojure/clojure {:mvn/version "1.11.1"} 2 | clj-http/clj-http {:mvn/version "3.12.1"} 3 | org.clojure/data.json {:mvn/version "2.5.0"}}} 4 | -------------------------------------------------------------------------------- /gemini_api/doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to gemini_api 2 | 3 | TODO -------------------------------------------------------------------------------- /gemini_api/project.clj: -------------------------------------------------------------------------------- 1 | (defproject gemini_api "0.1.0-SNAPSHOT" 2 | :description "FIXME: write description" 3 | :url "http://example.com/FIXME" 4 | :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" 5 | :url "https://www.eclipse.org/legal/epl-2.0/"} 6 | :dependencies [[org.clojure/clojure "1.11.1"] 7 | [clj-http "3.12.1"] 8 | [org.clojure/data.json "2.5.0"] 9 | ] 10 | :repl-options {:init-ns gemini-api.core}) 11 | -------------------------------------------------------------------------------- /gemini_api/src/gemini_api/core.clj: -------------------------------------------------------------------------------- 1 | (ns gemini-api.core 2 | (:require [clj-http.client :as client]) 3 | (:require [clojure.data.json :as json])) 4 | 5 | (def model "gemini-2.0-flash") ; or gemini-1.5-pro, etc. 6 | 7 | (def google-api-key (System/getenv "GOOGLE_API_KEY")) ; Make sure to set this env variable 8 | 9 | (def base-url "https://generativelanguage.googleapis.com/v1beta/models") 10 | 11 | (defn generate-content [prompt] 12 | (let [url (str base-url "/" model ":generateContent?key=" google-api-key) 13 | headers {"Content-Type" "application/json"} 14 | body {:contents [{:parts [{:text prompt}]}]}] 15 | (try 16 | (let [response (client/post url {:headers headers 17 | :body (json/write-str body) 18 | :content-type :json 19 | :accept :json}) 20 | _ (println "Raw response:" (:body response)) ; Debug print 21 | parsed-response (json/read-str (:body response) :key-fn keyword) 22 | candidates (:candidates parsed-response)] 23 | (if (seq candidates) 24 | (let [text (get-in (first candidates) [:content :parts 0 :text])] 25 | (if text 26 | text 27 | (do 28 | (println "No text found in response structure:" parsed-response) 29 | nil))) 30 | (do 31 | (println "No candidates found in response:" parsed-response) 32 | nil))) 33 | (catch Exception e 34 | (println "Error making request:" (.getMessage e)) 35 | (when-let [response-body (-> e ex-data :body)] 36 | (println "Error response body:" response-body)) 37 | nil)))) 38 | 39 | (defn summarize [text] 40 | (generate-content (str "Summarize the following text:\n\n" text))) 41 | -------------------------------------------------------------------------------- /gemini_api/test/gemini_api/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns gemini-api.core-test 2 | (:require [clojure.test :refer :all] 3 | [gemini-api.core :refer :all])) 4 | 5 | (def some-text 6 | "Jupiter is the fifth planet from the Sun and the largest in the Solar System. It is a gas giant with a mass one-thousandth that of the Sun, but two-and-a-half times that of all the other planets in the Solar System combined. Jupiter is one of the brightest objects visible to the naked eye in the night sky, and has been known to ancient civilizations since before recorded history. It is named after the Roman god Jupiter.[19] When viewed from Earth, Jupiter can be bright enough for its reflected light to cast visible shadows,[20] and is on average the third-brightest natural object in the night sky after the Moon and Venus.") 7 | 8 | (deftest completions-test 9 | (testing "gemini completions API" 10 | (let [results 11 | (gemini-api.core/generate-content "He walked to the river")] 12 | (println results) 13 | (is (= 0 0))))) 14 | 15 | (deftest summarize-test 16 | (testing "gemini summarize API" 17 | (let [results 18 | (gemini-api.core/summarize 19 | some-text)] 20 | (println results) 21 | (is (= 0 0))))) 22 | 23 | (deftest question-answering-test 24 | (testing "gemini question-answering API" 25 | (let [results 26 | (gemini-api.core/generate-content 27 | ;;"If it is not used for hair, a round brush is an example of what 1. hair brush 2. bathroom 3. art supplies 4. shower ?" 28 | "Where is the Valley of Kings?" 29 | ;"Where is San Francisco?" 30 | )] 31 | (println results) 32 | (is (= 0 0))))) 33 | -------------------------------------------------------------------------------- /knowledge_graph_navigator/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | profiles.clj 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | /.prepl-port 12 | .hgignore 13 | .hg/ 14 | cache.db 15 | *.db 16 | 17 | -------------------------------------------------------------------------------- /knowledge_graph_navigator/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/). 3 | 4 | ## [Unreleased] 5 | ### Changed 6 | - Add a new arity to `make-widget-async` to provide a different widget shape. 7 | 8 | ## [0.1.1] - 2021-04-30 9 | ### Changed 10 | - Documentation on how to make the widgets. 11 | 12 | ### Removed 13 | - `make-widget-sync` - we're all async, all the time. 14 | 15 | ### Fixed 16 | - Fixed widget maker to keep working when daylight savings switches over. 17 | 18 | ## 0.1.0 - 2021-04-30 19 | ### Added 20 | - Files from the new template. 21 | - Widget maker public API - `make-widget-sync`. 22 | 23 | [Unreleased]: https://github.com/your-name/knowledge_graph_navigator_clj/compare/0.1.1...HEAD 24 | [0.1.1]: https://github.com/your-name/knowledge_graph_navigator_clj/compare/0.1.0...0.1.1 25 | -------------------------------------------------------------------------------- /knowledge_graph_navigator/README.md: -------------------------------------------------------------------------------- 1 | # knowledge_graph_navigator_clj 2 | 3 | A Clojure library designed automate collecting information 4 | from SPARQL endpoints. 5 | 6 | ## Usage 7 | 8 | Run the tests using: 9 | 10 | lein test 11 | 12 | or run a simple main demo program using: 13 | 14 | lein run 15 | 16 | ## Code for my book "Practical Artificial Intelligence Programming With Clojure" 17 | 18 | You can download PDF/ePub/MOBI versions of my book for free on my 19 | website [https://markwatson.com](https://markwatson.com) or if you would like to pay me for a copy then please visit [https://leanpub.com/clojureai](https://leanpub.com/clojureai) 20 | 21 | ## License 22 | 23 | Copyright © 2021 Mark Watson 24 | 25 | This program and the accompanying materials are made available under the 26 | terms of the Eclipse Public License 2.0 which is available at 27 | http://www.eclipse.org/legal/epl-2.0. 28 | 29 | This Source Code may also be made available under the following Secondary 30 | Licenses when the conditions for such availability set forth in the Eclipse 31 | Public License, v. 2.0 are satisfied: GNU General Public License as published by 32 | the Free Software Foundation, either version 2 of the License, or (at your 33 | option) any later version, with the GNU Classpath Exception which is available 34 | at https://www.gnu.org/software/classpath/license.html. 35 | -------------------------------------------------------------------------------- /knowledge_graph_navigator/doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to knowledge_graph_navigator_clj 2 | 3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) 4 | -------------------------------------------------------------------------------- /knowledge_graph_navigator/entities_by_name.sparql: -------------------------------------------------------------------------------- 1 | select distinct ?s ?comment where { 2 | ?s 3 | ""@en . 4 | ?s 5 | ?comment . 6 | FILTER (lang(?comment) = "en") . 7 | ?s 8 | 9 | . 10 | } -------------------------------------------------------------------------------- /knowledge_graph_navigator/project.clj: -------------------------------------------------------------------------------- 1 | (defproject knowledge_graph_navigator_clj "0.1.0-SNAPSHOT" 2 | :description "Knowledge Graph Navigator" 3 | :url "https://markwatson.com" 4 | :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" 5 | :url "https://www.eclipse.org/legal/epl-2.0/"} 6 | :source-paths ["src"] 7 | :java-source-paths ["src-java"] 8 | :javac-options ["-target" "1.8" "-source" "1.8"] 9 | :dependencies [[org.clojure/clojure "1.11.1"] 10 | [clj-http "3.12.3"] 11 | [com.cemerick/url "0.1.1"] 12 | [org.clojure/data.csv "1.0.0"] 13 | [org.clojure/data.json "1.0.0"] 14 | [org.clojure/math.combinatorics "0.1.6"] 15 | [org.apache.derby/derby "10.15.2.0"] 16 | [org.apache.derby/derbytools "10.15.2.0"] 17 | [org.apache.derby/derbyclient "10.15.2.0"] 18 | [org.apache.jena/apache-jena-libs "4.4.0" :extension "pom"]] 19 | :repl-options {:init-ns knowledge-graph-navigator-clj.kgn} 20 | :main ^:skip-aot knowledge-graph-navigator-clj.kgn) 21 | -------------------------------------------------------------------------------- /knowledge_graph_navigator/relationships.sparql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT ?p { 2 | 3 | ?p 4 | . 5 | FILTER (!regex(str(?p),"wikiPage","i")) 6 | } LIMIT 5 7 | -------------------------------------------------------------------------------- /knowledge_graph_navigator/src-java/main/java/com/markwatson/semanticweb/Cache.java: -------------------------------------------------------------------------------- 1 | package com.markwatson.semanticweb; 2 | 3 | import java.sql.*; 4 | 5 | import org.apache.commons.lang3.SerializationUtils; 6 | import org.apache.derby.client.*; 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | import java.util.Properties; 10 | 11 | public class Cache { 12 | public Cache() throws SQLException, ClassNotFoundException { 13 | Properties props = new Properties(); 14 | String dbName = "./sparqlCache.db"; 15 | conn = DriverManager.getConnection(protocol + dbName 16 | + ";create=true", props); 17 | conn.setAutoCommit(true); 18 | try { 19 | Statement s = conn.createStatement(); 20 | s.execute("CREATE TABLE cache (query varchar(3000) PRIMARY KEY, result blob)"); 21 | System.out.println("Created table 'cache'"); 22 | } catch (Exception ex) { 23 | //System.out.println("Error (Cache()): " + ex.getMessage()); 24 | //System.out.println("Table 'cache' already exists"); 25 | } 26 | } 27 | public void saveQueryResultInCache (String query, byte [] result) { 28 | try { 29 | if (fetchResultFromCache(query) != null) { 30 | //System.out.println("Query is already in the cache"); 31 | return; 32 | } 33 | Statement s = conn.createStatement(); 34 | PreparedStatement ps = conn.prepareStatement( 35 | "insert into cache (query, result) values (?, ?)"); 36 | ps.setString(1, query); 37 | ps.setBytes(2, result); 38 | ps.executeUpdate(); 39 | } catch (Exception ex) { 40 | System.out.println("Error (saveQueryResultInCache): " + ex.getMessage()); 41 | } 42 | } 43 | 44 | public byte [] fetchResultFromCache (String query) { 45 | try { 46 | Statement s = conn.createStatement(); 47 | PreparedStatement ps = conn.prepareStatement( 48 | "select result from cache where query = ?"); 49 | ps.setString(1, query); 50 | ResultSet rs = ps.executeQuery(); 51 | if (!rs.next()) return null; 52 | return rs.getBytes(1); 53 | } catch (Exception ex) { 54 | System.out.println("Error (fetchResultFromCache): " + ex.getMessage()); 55 | return null; 56 | } 57 | } 58 | 59 | private Connection conn = null; 60 | private String protocol = "jdbc:derby:"; 61 | 62 | } 63 | -------------------------------------------------------------------------------- /knowledge_graph_navigator/src-java/main/java/com/markwatson/semanticweb/JenaApis.java: -------------------------------------------------------------------------------- 1 | package com.markwatson.semanticweb; 2 | 3 | import org.apache.commons.lang3.SerializationUtils; 4 | import org.apache.jena.query.*; 5 | import org.apache.jena.rdf.model.*; 6 | import org.apache.jena.riot.RDFDataMgr; 7 | import org.apache.jena.riot.RDFFormat; 8 | 9 | import java.io.FileNotFoundException; 10 | import java.io.FileOutputStream; 11 | import java.io.IOException; 12 | import java.sql.Blob; 13 | import java.sql.SQLException; 14 | import java.util.ArrayList; 15 | import java.util.List; 16 | import java.util.Scanner; 17 | 18 | public class JenaApis { 19 | 20 | public JenaApis() { 21 | //model = ModelFactory.createDefaultModel(); // use if OWL reasoning not required 22 | model = ModelFactory.createOntologyModel(); // use OWL reasoner 23 | } 24 | 25 | public Model model() { 26 | return model; 27 | } 28 | 29 | public void loadRdfFile(String fpath) { 30 | model.read(fpath); 31 | } 32 | 33 | public void saveModelToTurtleFormat(String outputPath) throws IOException { 34 | FileOutputStream fos = new FileOutputStream(outputPath); 35 | RDFDataMgr.write(fos, model, RDFFormat.TRIG_PRETTY); 36 | fos.close(); 37 | } 38 | public void saveModelToN3Format(String outputPath) throws IOException { 39 | FileOutputStream fos = new FileOutputStream(outputPath); 40 | RDFDataMgr.write(fos, model, RDFFormat.NTRIPLES); 41 | fos.close(); 42 | } 43 | 44 | public QueryResult query(String sparqlQuery) { 45 | Query query = QueryFactory.create(sparqlQuery); 46 | QueryExecution qexec = QueryExecutionFactory.create(query, model); 47 | ResultSet results = qexec.execSelect(); 48 | QueryResult qr = new QueryResult(results.getResultVars()); 49 | for (; results.hasNext(); ) { 50 | QuerySolution solution = results.nextSolution(); 51 | List newResultRow = new ArrayList(); 52 | for (String var : qr.variableList) { 53 | newResultRow.add(solution.get(var).toString()); 54 | } 55 | qr.rows.add(newResultRow); 56 | } 57 | return qr; 58 | } 59 | 60 | public QueryResult queryRemote(String service, String sparqlQuery) throws SQLException, ClassNotFoundException { 61 | if (cache == null) cache = new Cache(); 62 | byte [] b = cache.fetchResultFromCache(sparqlQuery); 63 | if (b != null) { 64 | //System.out.println("Found query in cache."); 65 | QueryResult l = SerializationUtils.deserialize(b); 66 | return l; 67 | } 68 | Query query = QueryFactory.create(sparqlQuery); 69 | QueryExecution qexec = QueryExecutionFactory.sparqlService(service, sparqlQuery); 70 | ResultSet results = qexec.execSelect(); 71 | QueryResult qr = new QueryResult(results.getResultVars()); 72 | for (; results.hasNext(); ) { 73 | QuerySolution solution = results.nextSolution(); 74 | List newResultRow = new ArrayList(); 75 | for (String var : qr.variableList) { 76 | newResultRow.add(solution.get(var).toString()); 77 | } 78 | qr.rows.add(newResultRow); 79 | } 80 | byte [] b3 = SerializationUtils.serialize(qr); 81 | cache.saveQueryResultInCache(sparqlQuery, b3); 82 | return qr; 83 | } 84 | 85 | private Cache cache = null; 86 | private Model model; 87 | 88 | public static void main(String[] args) { 89 | /* 90 | Execute using, for example: 91 | mvn exec:java -Dexec.mainClass="com.markwatson.semanticweb.JenaApis" \ 92 | -Dexec.args="data/news.n3" 93 | */ 94 | JenaApis ja = new JenaApis(); 95 | System.out.println(args.length); 96 | if (args.length == 0) { 97 | // no RDF input file names on command line so use a default file: 98 | ja.loadRdfFile("data/news.n3"); 99 | } else { 100 | for (String fpath : args) { 101 | ja.loadRdfFile(fpath); 102 | } 103 | } 104 | System.out.println("Multi-line queries are OK but don't use blank lines."); 105 | System.out.println("Enter a blank line to process query."); 106 | while (true) { 107 | System.out.println("Enter a SPARQL query:"); 108 | Scanner sc = new Scanner(System.in); 109 | StringBuilder sb = new StringBuilder(); 110 | while (sc.hasNextLine()) { //until no other inputs to proceed 111 | String s = sc.nextLine(); 112 | if (s.equals("quit") || s.equals("QUIT") || s.equals("exit") || s.equals("EXIT")) 113 | System.exit(0); 114 | if (s.length() < 1) break; 115 | sb.append(s); 116 | sb.append("\n"); 117 | } 118 | QueryResult qr = ja.query(sb.toString()); 119 | System.out.println(qr); 120 | } 121 | } 122 | } -------------------------------------------------------------------------------- /knowledge_graph_navigator/src-java/main/java/com/markwatson/semanticweb/QueryResult.java: -------------------------------------------------------------------------------- 1 | package com.markwatson.semanticweb; 2 | 3 | import java.io.Serializable; 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | 7 | public class QueryResult implements Serializable { 8 | private QueryResult() { } 9 | public QueryResult(List variableList) { 10 | this.variableList = variableList; 11 | } 12 | public List variableList; 13 | public List> rows = new ArrayList(); 14 | public String toString() { 15 | StringBuilder sb = new StringBuilder("[QueryResult vars:" + variableList + "\nRows:\n"); 16 | for (List row : rows) { 17 | sb.append(" " + row + "\n"); 18 | } 19 | return sb.toString(); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /knowledge_graph_navigator/src-java/main/resources/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /knowledge_graph_navigator/src/knowledge_graph_navigator_clj/entities_by_name.clj: -------------------------------------------------------------------------------- 1 | (ns knowledge-graph-navigator-clj.entities-by-name 2 | (:require [knowledge-graph-navigator-clj.sparql :as sparql]) 3 | (:require [knowledge-graph-navigator-clj.sparql-utils :as utils]) 4 | (:require [clojure.pprint :as pp]) 5 | (:require clojure.string)) 6 | 7 | (defn dbpedia-get-entities-by-name [name dbpedia-type] 8 | ;(println "** dbpedia-get-entities-by-name: name=" name "dbpedia-type=" dbpedia-type) 9 | (let [sparql-query 10 | (utils/sparql_template 11 | "entities_by_name.sparql" 12 | {"" name "" dbpedia-type}) 13 | results (sparql/sparql-endpoint sparql-query)] 14 | ;(println "Generated SPARQL to get DBPedia entity URIs from a name:") 15 | (println sparql-query) 16 | ;(println "Results:") (pprint results) 17 | results)) 18 | 19 | (defn -main 20 | "test/dev entities by name" 21 | [& _] 22 | (println (dbpedia-get-entities-by-name "Steve Jobs" "")) 23 | (println (dbpedia-get-entities-by-name "Microsoft" "")) 24 | (pp/pprint (dbpedia-get-entities-by-name "California" "")) 25 | ) 26 | -------------------------------------------------------------------------------- /knowledge_graph_navigator/src/knowledge_graph_navigator_clj/kgn.clj: -------------------------------------------------------------------------------- 1 | (ns knowledge-graph-navigator-clj.kgn 2 | (:require [knowledge-graph-navigator-clj.entities-by-name :as entity-name]) 3 | (:require [knowledge-graph-navigator-clj.relationships :as rel]) 4 | (:require [clojure.math.combinatorics :as combo]) 5 | (:require [clojure.pprint :as pp])) 6 | 7 | (def entity-map {:People "" 8 | :Organization "" 9 | :Place ""}) 10 | 11 | (defn kgn 12 | "Top level function for the Knowledge Graph Navigator library 13 | Inputs: a map with keys Person, Place, and Organization. values list of names" 14 | [input-entity-map] 15 | ;;(println "* kgn:" input-entity-map) 16 | (let [entities-summary-data 17 | (filter 18 | (fn [x] (> (count x) 1)) ;; get rid of empty SPARQL results 19 | (mapcat ;; flatten just top level 20 | identity 21 | (for [entity-key (keys input-entity-map)] 22 | (for [entity-name (input-entity-map entity-key)] 23 | (cons 24 | entity-name 25 | (second 26 | (entity-name/dbpedia-get-entities-by-name 27 | entity-name 28 | (entity-map entity-key)))))))) 29 | entity-uris (map second entities-summary-data) 30 | combinations-by-2-of-entity-uris (combo/combinations entity-uris 2) 31 | discovered-relationships 32 | (filter 33 | (fn [x] (> (count x) 0)) 34 | (for [pair-of-uris combinations-by-2-of-entity-uris] 35 | (seq (rel/entity-results->relationship-links pair-of-uris))))] 36 | {:entity-summaries entities-summary-data 37 | :discovered-relationships discovered-relationships})) 38 | 39 | (defn -main 40 | "Main function for KGN example" 41 | [& _] 42 | (let [results (kgn {:People ["Bill Gates" "Steve Jobs" "Melinda Gates"] 43 | :Organization ["Microsoft"] 44 | :Place ["California"]})] 45 | (println " -- results:") (pp/pprint results))) 46 | -------------------------------------------------------------------------------- /knowledge_graph_navigator/src/knowledge_graph_navigator_clj/relationships.clj: -------------------------------------------------------------------------------- 1 | (ns knowledge-graph-navigator-clj.relationships 2 | (:require [knowledge-graph-navigator-clj.sparql :as sparql]) ;; for non-cached 3 | (:require [knowledge-graph-navigator-clj.sparql-utils :as utils]) 4 | (:require [clojure.pprint :as pp]) 5 | (:require clojure.string)) 6 | 7 | (defn dbpedia-get-relationships [s-uri o-uri] 8 | (let [query 9 | (utils/sparql_template 10 | "relationships.sparql" 11 | {"" s-uri "" o-uri}) 12 | results (sparql/sparql-endpoint query)] 13 | (map 14 | (fn [u] (clojure.string/join "" ["<" u ">"])) 15 | (second results)))) ; discard SPARQL variable name p (?p) 16 | 17 | (defn entity-results->relationship-links [uris-no-brackets] 18 | (let [uris (map 19 | (fn [u] (clojure.string/join "" ["<" u ">"])) 20 | uris-no-brackets) 21 | relationship-statements (atom [])] 22 | (doseq [e1 uris] 23 | (doseq [e2 uris] 24 | (if (not (= e1 e2)) 25 | (let [l1 (dbpedia-get-relationships e1 e2) 26 | l2 (dbpedia-get-relationships e2 e1)] 27 | (doseq [x l1] 28 | (let [a-tuple [e1 x e2]] 29 | (if (not (. @relationship-statements contains a-tuple)) 30 | (reset! relationship-statements (cons a-tuple @relationship-statements)) 31 | nil)) 32 | (doseq [x l2] 33 | (let [a-tuple [e2 x e1]] 34 | (if (not (. @relationship-statements contains a-tuple)) 35 | (reset! relationship-statements (cons a-tuple @relationship-statements)) 36 | nil))))) 37 | nil))) 38 | @relationship-statements)) 39 | 40 | (defn -main 41 | "dev/test entity relationships code" 42 | [& _] 43 | (println "Testing entity-results->relationship-links") 44 | (pp/pprint (entity-results->relationship-links ["http://dbpedia.org/resource/Bill_Gates" "http://dbpedia.org/resource/Microsoft"]))) 45 | 46 | -------------------------------------------------------------------------------- /knowledge_graph_navigator/src/knowledge_graph_navigator_clj/sparql.clj: -------------------------------------------------------------------------------- 1 | (ns knowledge-graph-navigator-clj.sparql 2 | (:require [clj-http.client :as client]) 3 | (:require clojure.stacktrace) 4 | (:require [cemerick.url :refer (url-encode)]) 5 | (:require [clojure.data.csv :as csv]) 6 | (:require [semantic-web-jena-clj.core :as jena])) 7 | 8 | ;; Copied from https://github.com/mark-watson/clj-sparql 9 | 10 | (def USE-LOCAL-GRAPHDB false) 11 | (def USE-CACHING true) ;; use Jena wrapper 12 | 13 | (defn dbpedia [sparql-query] 14 | (let [q (str "https://dbpedia.org//sparql?output=csv&query=" (url-encode sparql-query)) 15 | response (client/get q) 16 | body (:body response)] 17 | (csv/read-csv body))) 18 | 19 | (defn- graphdb-helper [host port graph-name sparql-query] 20 | (let [q (str host ":" port "/repositories/" graph-name "?query=" (url-encode sparql-query)) 21 | response (client/get q) 22 | body (:body response)] 23 | (csv/read-csv body))) 24 | 25 | (defn graphdb 26 | ([graph-name sparql-query] (graphdb-helper "http://127.0.0.1" 7200 graph-name sparql-query)) 27 | ([host port graph-name sparql-query] (graphdb-helper host port graph-name sparql-query))) 28 | 29 | (defn sparql-endpoint [sparql-query] 30 | (try 31 | (if USE-LOCAL-GRAPHDB 32 | (graphdb "dbpedia" sparql-query) 33 | (if USE-CACHING 34 | (jena/query-dbpedia sparql-query) 35 | (dbpedia sparql-query))) 36 | (catch Exception e 37 | (do 38 | (println "WARNING: a SPARQL query failed:\n" sparql-query) 39 | (println (.getMessage e)) 40 | (clojure.stacktrace/print-stack-trace e) 41 | [])))) 42 | 43 | (defn -main 44 | "SPARQL example" 45 | [& _] 46 | (println (sparql-endpoint "select * { ?s ?p ?o } limit 10"))) 47 | -------------------------------------------------------------------------------- /knowledge_graph_navigator/src/knowledge_graph_navigator_clj/sparql_utils.clj: -------------------------------------------------------------------------------- 1 | (ns knowledge-graph-navigator-clj.sparql-utils) 2 | 3 | (defn sparql_template 4 | "open SPARQL template file and perform variable substitutions" 5 | [template-fpath substitution-map] 6 | (let [template-as-string (slurp template-fpath)] 7 | (clojure.string/replace 8 | template-as-string 9 | (re-pattern 10 | ; create a regex pattern of quoted replacements separated by |: 11 | ; code derived from a stackoverflow example by user bmillare 12 | (apply 13 | str 14 | (interpose 15 | "|" 16 | (map 17 | (fn [x] (java.util.regex.Pattern/quote x)) 18 | (keys substitution-map))))) 19 | substitution-map))) 20 | -------------------------------------------------------------------------------- /knowledge_graph_navigator/src/semantic_web_jena_clj/core.clj: -------------------------------------------------------------------------------- 1 | (ns semantic-web-jena-clj.core 2 | (:import (com.markwatson.semanticweb JenaApis Cache QueryResult))) 3 | 4 | (defn- get-jena-api-model "get a default model with OWL reasoning" [] 5 | (new JenaApis)) 6 | 7 | (defonce model (get-jena-api-model)) 8 | 9 | (defn- results->clj [results] 10 | (let [variable-list (seq (. results variableList)) 11 | bindings-list (seq (map seq (. results rows)))] 12 | (cons variable-list bindings-list))) 13 | 14 | (defn load-rdf-file [fpath] 15 | (. model loadRdfFile fpath)) 16 | 17 | (defn query "SPARQL query" [sparql-query] 18 | (results->clj (. model query sparql-query))) 19 | 20 | (defn query-remote "remote service like DBPedia, etc." [remote-service sparql-query] 21 | (results->clj (. model queryRemote remote-service sparql-query))) 22 | 23 | (defn query-dbpedia [sparql-query] 24 | (query-remote "https://dbpedia.org/sparql" sparql-query)) 25 | ;; (query-dbpedia "select * where { ?subject ?property ?object . } limit 10") 26 | 27 | (defn query-wikidata [sparql-query] 28 | (query-remote "\"https://query.wikidata.org/bigdata/namespace/wdq/sparql" sparql-query)) 29 | ;; (query-dbpedia "select ?p where { ?p . } limit 3") 30 | -------------------------------------------------------------------------------- /knowledge_graph_navigator/test/knowledge_graph_navigator_clj/kgn_test.clj: -------------------------------------------------------------------------------- 1 | (ns knowledge-graph-navigator-clj.kgn-test 2 | (:require [clojure.test :as test] 3 | [knowledge-graph-navigator-clj.kgn :as kgn]) 4 | (:require [knowledge-graph-navigator-clj.relationships :as enitites]) 5 | (:require [clojure.pprint :as pp])) 6 | 7 | 8 | (test/deftest get-relations-test 9 | (test/testing "test dbpedia-get-relationships" 10 | (pp/pprint (enitites/dbpedia-get-relationships 11 | "" 12 | "")) 13 | (test/is (= 0 0)))) 14 | 15 | (test/deftest relations-test 16 | (test/testing "test entity-results->relationship-links" 17 | (pp/pprint (enitites/entity-results->relationship-links 18 | ["http://dbpedia.org/resource/Bill_Gates" 19 | "http://dbpedia.org/resource/Microsoft"])) 20 | (test/is (= 0 0)))) 21 | 22 | 23 | (test/deftest rtop-level-test 24 | (test/testing "Top level test" 25 | (let [results 26 | (kgn/kgn {:People ["Bill Gates" "Steve Jobs" "Melinda Gates"] 27 | :Organization ["Microsoft"] 28 | :Place ["California"]})] 29 | (println results) 30 | (test/is (= (count results) 2))))) 31 | 32 | -------------------------------------------------------------------------------- /llm_bosquet/.gitignore: -------------------------------------------------------------------------------- 1 | .cpcache 2 | .codegpt 3 | .clj-kondo 4 | -------------------------------------------------------------------------------- /llm_bosquet/Makefile: -------------------------------------------------------------------------------- 1 | test: 2 | clj -X:test 3 | 4 | -------------------------------------------------------------------------------- /llm_bosquet/README.md: -------------------------------------------------------------------------------- 1 | # Try the Clojure LLM library Bosquet 2 | 3 | Currently uses Bosquet's defaults: 4 | 5 | - Model: mistral-small 6 | - Local Ollama model hosting 7 | 8 | ## Example files 9 | 10 | project-root/ 11 | ├── deps.edn 12 | ├── src/ 13 | │ └── llm_bosquet/ 14 | │ └── core.clj 15 | └── test/ 16 | └── llm_bosquet/ 17 | └── core_test.clj 18 | 19 | ## Running the example 20 | 21 | Make sure you define a valif OpenAI API key (if using OpenAI): 22 | 23 | export OPENAI_API_KEY=sk-...... 24 | 25 | Run the tests: 26 | 27 | clj -X:test 28 | 29 | # THIS EXAMPE IS WORK IN PROGRESS - NOT YET IN BOOK! 30 | 31 | -------------------------------------------------------------------------------- /llm_bosquet/config.edn: -------------------------------------------------------------------------------- 1 | {:llm 2 | {:ollama 3 | {:api-endpoint "http://localhost:11434/api" 4 | :chat-fn bosquet.llm.ollama/chat 5 | :complete-fn bosquet.llm.ollama/complete 6 | :embed-fn bosquet.llm.ollama/create-embedding}}} 7 | -------------------------------------------------------------------------------- /llm_bosquet/deps.edn: -------------------------------------------------------------------------------- 1 | {:paths ["src"] 2 | 3 | :deps {org.clojure/clojure {:mvn/version "1.11.1"} 4 | io.github.zmedelis/bosquet {:mvn/version "2024.08.08"}} 5 | 6 | :aliases 7 | {:test 8 | {:extra-paths ["test"] 9 | :extra-deps {io.github.cognitect-labs/test-runner 10 | {:git/tag "v0.5.1" :git/sha "dfb30dd"}} 11 | :main-opts ["-m" "cognitect.test-runner"] 12 | :exec-fn cognitect.test-runner.api/test}}} -------------------------------------------------------------------------------- /llm_bosquet/secrets.edn: -------------------------------------------------------------------------------- 1 | {:ollama 2 | {:api-key "ollama-api-key-not-used"}} 3 | -------------------------------------------------------------------------------- /llm_bosquet/src/llm_bosquet/core.clj: -------------------------------------------------------------------------------- 1 | (ns llm-bosquet.core 2 | (:require [bosquet.llm.generator :refer [generate llm]] 3 | [bosquet.llm.wkk :as wkk])) 4 | 5 | ;;(defn openai-gpt3-5turbo-generate [prompt] 6 | ;; (generate prompt)) ;; defaults to OpenAI 7 | 8 | (defn ollama-generate [prompt] 9 | (generate {:question-answer "Question: {{question}} Answer: {{answer}}" 10 | :answer (llm :ollama wkk/model-params {:model :mistral-small :max-tokens 50}) 11 | :self-eval ["{{question-answer}}" 12 | "Is this a correct answer?" 13 | "{{test}}"] 14 | :test (llm :ollama wkk/model-params {:model :mistral-small :max-tokens 50})} 15 | {:question prompt})) -------------------------------------------------------------------------------- /llm_bosquet/test/llm_bosquet/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns llm-bosquet.core-test 2 | (:require [clojure.test :refer :all] 3 | [llm-bosquet.core :refer [ollama-generate]])) 4 | 5 | (deftest test-ollama-generate 6 | (testing "ollama-generate function" 7 | (let [prompt "What is the distance from the Moon to Io?" 8 | response (ollama-generate prompt) 9 | answer (get-in response [:bosquet/completions :answer])] 10 | (println "\nComplete response: " response) 11 | (println "\nAnswer: ", answer)))) 12 | -------------------------------------------------------------------------------- /nlp_libpython/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | profiles.clj 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | /.prepl-port 12 | .hgignore 13 | .hg/ 14 | __pycache__ 15 | 16 | -------------------------------------------------------------------------------- /nlp_libpython/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/). 3 | 4 | ## [Unreleased] 5 | ### Changed 6 | - Add a new arity to `make-widget-async` to provide a different widget shape. 7 | 8 | ## [0.1.1] - 2021-05-01 9 | ### Changed 10 | - Documentation on how to make the widgets. 11 | 12 | ### Removed 13 | - `make-widget-sync` - we're all async, all the time. 14 | 15 | ### Fixed 16 | - Fixed widget maker to keep working when daylight savings switches over. 17 | 18 | ## 0.1.0 - 2021-05-01 19 | ### Added 20 | - Files from the new template. 21 | - Widget maker public API - `make-widget-sync`. 22 | 23 | [Unreleased]: https://github.com/your-name/python_interop_deeplearning/compare/0.1.1...HEAD 24 | [0.1.1]: https://github.com/your-name/python_interop_deeplearning/compare/0.1.0...0.1.1 25 | -------------------------------------------------------------------------------- /nlp_libpython/INSTALL_MLW.txt: -------------------------------------------------------------------------------- 1 | On a fresh Ubuntu GCP instance: 2 | 3 | export CLOJURE_TOOLS_VERSION=1.10.1.507 4 | sudo apt-get -qq update 5 | sudo apt-get -qq -y install curl wget bzip2 openjdk-8-jdk-headless python3.8 libpython3.8 python3-pip 6 | sudo apt-get -qq -y install aptitude 7 | aptitude search python 8 | sudo apt-get -qq -y install curl wget bzip2 openjdk-8-jdk-headless python3.7 libpython3.7 python3-pip 9 | aptitude search openjdk 10 | sudo apt-get -qq -y install curl wget bzip2 openjdk-11-jdk-headless python3.7 libpython3.7 python3-pip 11 | curl -o install-clojure https://download.clojure.org/install/linux-install-${CLOJURE_TOOLS_VERSION}.sh 12 | chmod +x install-clojure 13 | sudo ./install-clojure 14 | mkdir bin 15 | cd bin 16 | wget https://raw.githubusercontent.com/technomancy/leiningen/stable/bin/lein 17 | chmod +x lein 18 | cd 19 | vi .profile # edit to add: export PATH=$PATH:~/bin 20 | lein 21 | python3 -m pip install numpy nltk spacy 22 | rm install-clojure 23 | # install Carin Meier's excellent examples for using many Python libraries: 24 | git clone https://github.com/gigasquid/libpython-clj-examples.git 25 | sudo apt-get install git emacs-nox 26 | cd libpython-clj-examples/ 27 | lein repl 28 | python3 -m pip install -U pip setuptools wheel 29 | python3 -m pip install -U spacy 30 | python3 -m spacy download en_core_web_sm 31 | 32 | -------------------------------------------------------------------------------- /nlp_libpython/QA.py: -------------------------------------------------------------------------------- 1 | from transformers import pipeline 2 | 3 | qa = pipeline( 4 | "question-answering", 5 | #model="NeuML/bert-small-cord19qa", 6 | model="NeuML/bert-small-cord19-squad2", 7 | tokenizer="NeuML/bert-small-cord19qa" 8 | ) 9 | 10 | def answer (query_text,context_text): 11 | answer = qa({ 12 | "question": query_text, 13 | "context": context_text 14 | }) 15 | print(answer) 16 | return answer 17 | -------------------------------------------------------------------------------- /nlp_libpython/doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to python_interop_deeplearning 2 | 3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) 4 | -------------------------------------------------------------------------------- /nlp_libpython/get_entity_text.sparql: -------------------------------------------------------------------------------- 1 | select distinct ?s ?comment where { 2 | ?s ""@en . 3 | ?s ?comment . 4 | FILTER (lang(?comment) = 'en') . 5 | ?s 6 | . 7 | } limit 15 8 | -------------------------------------------------------------------------------- /nlp_libpython/project.clj: -------------------------------------------------------------------------------- 1 | (defproject python_interop_deeplearning "0.1.0-SNAPSHOT" 2 | :description "Example using libpython-clj with the Python spaCy NLP library" 3 | :url "https://github.com/gigasquid/libpython-clj-examples" 4 | :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" 5 | :url "https://www.eclipse.org/legal/epl-2.0/"} 6 | :jvm-opts ["-Djdk.attach.allowAttachSelf" 7 | "-XX:+UnlockDiagnosticVMOptions" 8 | "-XX:+DebugNonSafepoints"] 9 | :plugins [[lein-tools-deps "0.4.5"]] 10 | :middleware [lein-tools-deps.plugin/resolve-dependencies-with-deps-edn] 11 | :lein-tools-deps/config {:config-files [:project] 12 | :resolve-aliases []} 13 | 14 | :mvn/repos {"central" {:url "https://repo1.maven.org/maven2/"} 15 | "clojars" {:url "https://clojars.org/repo"}} 16 | 17 | :dependencies [[org.clojure/clojure "1.11.1"] 18 | [clj-python/libpython-clj "1.37"] 19 | [clj-http "3.10.3"] 20 | [com.cemerick/url "0.1.1"] 21 | [org.clojure/data.csv "1.0.0"] 22 | [org.clojure/data.json "1.0.0"]] 23 | :main ^:skip-aot nlp-libpython-spacy.core 24 | :target-path "target/%s" 25 | :profiles {:uberjar {:aot :all 26 | :jvm-opts ["-Dclojure.compiler.direct-linking=true"]}}) 27 | -------------------------------------------------------------------------------- /nlp_libpython/src/knowledge_graph_navigator_clj/entity_text_by_uri.clj: -------------------------------------------------------------------------------- 1 | (ns knowledge-graph-navigator-clj.entity-text-by-uri 2 | (:require [knowledge-graph-navigator-clj.sparql :as sparql]) 3 | (:require [knowledge-graph-navigator-clj.sparql-utils :as utils]) 4 | (:require [clojure.pprint :as pp]) 5 | (:require clojure.string)) 6 | 7 | (defn dbpedia-get-entity-text-by-name [name dbpedia-type] 8 | (let [sparql-query 9 | (utils/sparql_template 10 | "get_entity_text.sparql" 11 | {"" name "" dbpedia-type}) 12 | results (sparql/dbpedia sparql-query)] 13 | (clojure.string/join " " (map second (rest results))))) 14 | 15 | (defn -main 16 | "test/dev entities by name" 17 | [& _] 18 | (println (dbpedia-get-entity-text-by-name "Paris" "")) 19 | (println (dbpedia-get-entity-text-by-name "IBM" "")) 20 | (println (dbpedia-get-entity-text-by-name "Bill Gates" ""))) 21 | -------------------------------------------------------------------------------- /nlp_libpython/src/knowledge_graph_navigator_clj/sparql.clj: -------------------------------------------------------------------------------- 1 | (ns knowledge-graph-navigator-clj.sparql 2 | (:require [clj-http.client :as client]) 3 | (:require clojure.stacktrace) 4 | (:require [cemerick.url :refer (url-encode)]) 5 | (:require [clojure.data.csv :as csv])) 6 | 7 | ;; Copied from https://github.com/mark-watson/clj-sparql 8 | 9 | (defn dbpedia [sparql-query] 10 | (let [q (str "https://dbpedia.org//sparql?output=csv&query=" (url-encode sparql-query)) 11 | response (client/get q) 12 | body (:body response)] 13 | (csv/read-csv body))) 14 | 15 | (defn -main 16 | "SPARQL example" 17 | [& _] 18 | (println (dbpedia "select * { ?s ?p ?o } limit 4"))) 19 | -------------------------------------------------------------------------------- /nlp_libpython/src/knowledge_graph_navigator_clj/sparql_utils.clj: -------------------------------------------------------------------------------- 1 | (ns knowledge-graph-navigator-clj.sparql-utils) 2 | 3 | (defn sparql_template 4 | "open SPARQL template file and perform variable substitutions" 5 | [template-fpath substitution-map] 6 | (let [template-as-string (slurp template-fpath)] 7 | (clojure.string/replace 8 | template-as-string 9 | (re-pattern 10 | ; create a regex pattern of quoted replacements separated by |: 11 | ; code derived from a stackoverflow example by user bmillare 12 | (apply 13 | str 14 | (interpose 15 | "|" 16 | (map 17 | (fn [x] (java.util.regex.Pattern/quote x)) 18 | (keys substitution-map))))) 19 | substitution-map))) 20 | -------------------------------------------------------------------------------- /nlp_libpython/src/nlp_libpython_spacy/core.clj: -------------------------------------------------------------------------------- 1 | (ns nlp-libpython-spacy.core 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.-]]) 4 | (:require [knowledge-graph-navigator-clj.entity-text-by-uri :as kgn])) 5 | 6 | (require-python '[spacy :as sp]) 7 | (require-python '[QA :as qa]) ;; loads the local file QA.py 8 | 9 | (def nlp (sp/load "en_core_web_sm")) 10 | 11 | (def test-text "John Smith worked for IBM in Mexico last year and earned $1 million in salary and bonuses.") 12 | 13 | (defn text->tokens [text] 14 | ;; use py-. to 15 | (map (fn [token] (py.- token text)) 16 | (nlp text))) 17 | 18 | (defn text->pos [text] 19 | (map (fn [token] (py.- token pos_)) 20 | (nlp text))) 21 | 22 | (defn text->tokens-and-pos [text] 23 | (map (fn [token] [(py.- token text) (py.- token pos_)]) 24 | (nlp text))) 25 | 26 | (defn text->entities [text] 27 | (println "!! text/entities text:" text) 28 | (map (fn [entity] [(py.- entity text) (py.- entity label_)]) 29 | (py.- (nlp text) ents))) 30 | 31 | (defn qa 32 | "Use Transformer model for question answering" 33 | [question context-text] 34 | (qa/answer question context-text)) ;; prints to stdout and returns a map 35 | 36 | (defn spacy-qa-demo [natural-language-query] 37 | (let [entity-map 38 | {"PERSON" "" 39 | "ORG" "" 40 | "GPE" ""} 41 | entities (text->entities natural-language-query) 42 | get-text-fn 43 | (fn [entity] 44 | (clojure.string/join 45 | " " 46 | (for [entity entities] 47 | (kgn/dbpedia-get-entity-text-by-name 48 | (first entity) 49 | (get entity-map (second entity)))))) 50 | context-text 51 | (clojure.string/join 52 | " " 53 | (for [entity entities] 54 | (get-text-fn entity))) 55 | answer (qa natural-language-query context-text)] 56 | answer)) 57 | 58 | (defn -main 59 | [& _] 60 | (println (text->entities test-text)) 61 | (println (text->tokens-and-pos test-text)) 62 | (println (text->pos test-text)) 63 | (println (text->tokens test-text)) 64 | (qa "where does Bill call home?" 65 | "Since last year, Bill lives in Seattle. He likes to skateboard.") 66 | (qa "what does Bill enjoy?" 67 | "Since last year, Bill lives in Seattle. He likes to skateboard.") 68 | (spacy-qa-demo "what is the population of Paris?") 69 | (spacy-qa-demo "where does Bill Gates Work?")) 70 | -------------------------------------------------------------------------------- /nlp_libpython/test/nlp_libpython_spacy/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns nlp-libpython-spacy.core-test 2 | (:require [clojure.test :as test] 3 | [nlp-libpython-spacy.core :as sp])) 4 | 5 | (def test-text "John Smith worked for IBM in Mexico last year and earned $1 million in salary and bonuses.") 6 | 7 | (test/deftest tokenization-test 8 | (test/testing "tokenization test" 9 | (test/is (= 033 (count (sp/text->tokens test-text)))))) 10 | -------------------------------------------------------------------------------- /nlp_opennlp/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | profiles.clj 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | /.prepl-port 12 | .hgignore 13 | .hg/ 14 | -------------------------------------------------------------------------------- /nlp_opennlp/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/). 3 | 4 | ## [Unreleased] 5 | ### Changed 6 | - Add a new arity to `make-widget-async` to provide a different widget shape. 7 | 8 | ## [0.1.1] - 2021-04-30 9 | ### Changed 10 | - Documentation on how to make the widgets. 11 | 12 | ### Removed 13 | - `make-widget-sync` - we're all async, all the time. 14 | 15 | ### Fixed 16 | - Fixed widget maker to keep working when daylight savings switches over. 17 | 18 | ## 0.1.0 - 2021-04-30 19 | ### Added 20 | - Files from the new template. 21 | - Widget maker public API - `make-widget-sync`. 22 | 23 | [Unreleased]: https://github.com/your-name/opennlp-clj/compare/0.1.1...HEAD 24 | [0.1.1]: https://github.com/your-name/opennlp-clj/compare/0.1.0...0.1.1 25 | -------------------------------------------------------------------------------- /nlp_opennlp/README.md: -------------------------------------------------------------------------------- 1 | # opennlp-clj 2 | 3 | A Clojure library for using teh Java OpenNLP library 4 | 5 | ## Code for my book "Practical Artificial Intelligence Programming With Clojure" 6 | 7 | You read my eBooks for free, see my 8 | website [https://markwatson.com](https://markwatson.com). If you would like to pay me for a copy then please visit [https://leanpub.com/clojureai](https://leanpub.com/clojureai). 9 | 10 | ## Running the Example Code 11 | 12 | lein test 13 | 14 | ## Definitions for part of speech terms 15 | 16 | - CC Coordinating conjunction 17 | - CD Cardinal number 18 | - DT Determiner 19 | - EX Existential there 20 | - FW Foreign word 21 | - IN Preposition or subordinating conjunction 22 | - JJ Adjective 23 | - JJR Adjective, comparative 24 | - JJS Adjective, superlative 25 | - LS List item marker 26 | - MD Modal 27 | - NN Noun, singular or mass 28 | - NNS Noun, plural 29 | - NNP Proper noun, singular 30 | - NNPS Proper noun, plural 31 | - PDT Predeterminer 32 | - POS Possessive ending 33 | - PRP Personal pronoun 34 | - PRP$ Possessive pronoun 35 | - RB Adverb 36 | - RBR Adverb, comparative 37 | - RBS Adverb, superlative 38 | - RP Particle 39 | - SYM Symbol 40 | - TO to 41 | - UH Interjection 42 | - VB Verb, base form 43 | - VBD Verb, past tense 44 | - VBG Verb, gerund or present participle 45 | - VBN Verb, past participle 46 | - VBP Verb, non­3rd person singular present 47 | - VBZ Verb, 3rd person singular present 48 | - WDT Wh­determiner 49 | - WP Wh­pronoun 50 | - WP$ Possessive wh­pronoun 51 | - WRB Wh­adverb 52 | 53 | ## License 54 | 55 | Copyright © 2021 Mark Watson 56 | 57 | This program and the accompanying materials are made available under the 58 | terms of the Eclipse Public License 2.0 which is available at 59 | http://www.eclipse.org/legal/epl-2.0. 60 | 61 | This Source Code may also be made available under the following Secondary 62 | Licenses when the conditions for such availability set forth in the Eclipse 63 | Public License, v. 2.0 are satisfied: GNU General Public License as published by 64 | the Free Software Foundation, either version 2 of the License, or (at your 65 | option) any later version, with the GNU Classpath Exception which is available 66 | at https://www.gnu.org/software/classpath/license.html. 67 | -------------------------------------------------------------------------------- /nlp_opennlp/doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to opennlp-clj 2 | 3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) 4 | -------------------------------------------------------------------------------- /nlp_opennlp/models/en-ner-location.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mark-watson/Clojure-AI-Book-Code/3ce517a4a9de72349b10a3b0ed7f98d9481888ad/nlp_opennlp/models/en-ner-location.bin -------------------------------------------------------------------------------- /nlp_opennlp/models/en-ner-organization.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mark-watson/Clojure-AI-Book-Code/3ce517a4a9de72349b10a3b0ed7f98d9481888ad/nlp_opennlp/models/en-ner-organization.bin -------------------------------------------------------------------------------- /nlp_opennlp/models/en-ner-person.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mark-watson/Clojure-AI-Book-Code/3ce517a4a9de72349b10a3b0ed7f98d9481888ad/nlp_opennlp/models/en-ner-person.bin -------------------------------------------------------------------------------- /nlp_opennlp/models/en-newscat.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mark-watson/Clojure-AI-Book-Code/3ce517a4a9de72349b10a3b0ed7f98d9481888ad/nlp_opennlp/models/en-newscat.bin -------------------------------------------------------------------------------- /nlp_opennlp/models/en-pos-maxent.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mark-watson/Clojure-AI-Book-Code/3ce517a4a9de72349b10a3b0ed7f98d9481888ad/nlp_opennlp/models/en-pos-maxent.bin -------------------------------------------------------------------------------- /nlp_opennlp/models/en-sent.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mark-watson/Clojure-AI-Book-Code/3ce517a4a9de72349b10a3b0ed7f98d9481888ad/nlp_opennlp/models/en-sent.bin -------------------------------------------------------------------------------- /nlp_opennlp/models/en-token.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mark-watson/Clojure-AI-Book-Code/3ce517a4a9de72349b10a3b0ed7f98d9481888ad/nlp_opennlp/models/en-token.bin -------------------------------------------------------------------------------- /nlp_opennlp/project.clj: -------------------------------------------------------------------------------- 1 | (defproject opennlp-clj "0.1.0-SNAPSHOT" 2 | :description "FIXME: write description" 3 | :url "http://example.com/FIXME" 4 | :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" 5 | :url "https://www.eclipse.org/legal/epl-2.0/"} 6 | :source-paths ["src"] 7 | :java-source-paths ["src-java"] 8 | :javac-options ["-target" "1.8" "-source" "1.8"] 9 | 10 | :dependencies [[org.clojure/clojure "1.11.1"] 11 | ;[com.markwatson/opennlp "1.0-SNAPSHOT"] ;;from my Java AI book 12 | [opennlp/tools "1.5.0"] 13 | ] 14 | :repl-options {:init-ns opennlp-clj.core}) 15 | -------------------------------------------------------------------------------- /nlp_opennlp/src-java/main/java/com/markwatson/opennlp/NLP.java: -------------------------------------------------------------------------------- 1 | package com.markwatson.opennlp; 2 | 3 | import opennlp.tools.namefind.NameFinderME; 4 | import opennlp.tools.namefind.TokenNameFinderModel; 5 | import opennlp.tools.postag.POSModel; 6 | import opennlp.tools.postag.POSTaggerME; 7 | import opennlp.tools.sentdetect.SentenceDetectorME; 8 | import opennlp.tools.sentdetect.SentenceModel; 9 | import opennlp.tools.tokenize.Tokenizer; 10 | import opennlp.tools.tokenize.TokenizerME; 11 | import opennlp.tools.tokenize.TokenizerModel; 12 | import opennlp.tools.util.Span; 13 | 14 | import java.io.*; 15 | import java.util.*; 16 | 17 | public class NLP { 18 | 19 | private static String getModelPath() { 20 | String ret = "models/"; 21 | try { 22 | new FileInputStream("models/en-ner-organization.bin"); 23 | } catch (FileNotFoundException e) { 24 | System.err.println("Error: can not find OpenNLP model files, looking in ./models"); 25 | } 26 | return ret; 27 | } 28 | public static String[] tokenize(String s) { 29 | return tokenizer.tokenize(s); 30 | } 31 | public static String[] sentenceSplitter(String s) { 32 | return sentenceSplitter.sentDetect(s); 33 | } 34 | 35 | public static String[] POS(String s) { 36 | return tagger.tag(tokenize(s)); 37 | } 38 | 39 | public static String[] POS(String[] tokens) { 40 | return tagger.tag(tokens); 41 | } 42 | 43 | public static Set companyNames(String text) { 44 | return companyNames(tokenizer.tokenize(text)); 45 | } 46 | 47 | public static Set companyNames(String tokens[]) { 48 | Set ret = new HashSet(); 49 | Span[] nameSpans = organizationFinder.find(tokens); 50 | if (nameSpans.length == 0) return ret; 51 | for (int i = 0; i < nameSpans.length; i++) { 52 | Span span = nameSpans[i]; 53 | StringBuilder sb = new StringBuilder(); 54 | for (int j = span.getStart(); j < span.getEnd(); j++) sb.append(tokens[j] + " "); 55 | ret.add(sb.toString().trim().replaceAll(" ,", ",")); 56 | } 57 | return ret; 58 | } 59 | 60 | public static Set locationNames(String text) { 61 | return locationNames(tokenizer.tokenize(text)); 62 | } 63 | 64 | public static Set locationNames(String tokens[]) { 65 | Set ret = new HashSet(); 66 | Span[] nameSpans = locationFinder.find(tokens); 67 | if (nameSpans.length == 0) return ret; 68 | for (int i = 0; i < nameSpans.length; i++) { 69 | Span span = nameSpans[i]; 70 | StringBuilder sb = new StringBuilder(); 71 | for (int j = span.getStart(); j < span.getEnd(); j++) 72 | sb.append(tokens[j] + " "); 73 | ret.add(sb.toString().trim().replaceAll(" ,", ",")); 74 | } 75 | return ret; 76 | } 77 | 78 | public static Set personNames(String text) { 79 | return personNames(tokenizer.tokenize(text)); 80 | } 81 | 82 | public static Set personNames(String tokens[]) { 83 | Set ret = new HashSet(); 84 | Span[] nameSpans = personNameFinder.find(tokens); 85 | if (nameSpans.length == 0) return ret; 86 | for (int i = 0; i < nameSpans.length; i++) { 87 | Span span = nameSpans[i]; 88 | StringBuilder sb = new StringBuilder(); 89 | for (int j = span.getStart(); j < span.getEnd(); j++) sb.append(tokens[j] + " "); 90 | ret.add(sb.toString().trim().replaceAll(" ,", ",")); 91 | } 92 | return ret; 93 | } 94 | 95 | static public Tokenizer tokenizer = null; 96 | static public SentenceDetectorME sentenceSplitter = null; 97 | static POSTaggerME tagger = null; 98 | static NameFinderME organizationFinder = null; 99 | static NameFinderME locationFinder = null; 100 | static NameFinderME personNameFinder = null; 101 | static String modelPath = null; 102 | 103 | static { 104 | 105 | try { 106 | modelPath = getModelPath(); 107 | InputStream organizationInputStream = new FileInputStream(modelPath + "en-ner-organization.bin"); 108 | TokenNameFinderModel model = new TokenNameFinderModel(organizationInputStream); 109 | organizationFinder = new NameFinderME(model); 110 | organizationInputStream.close(); 111 | 112 | InputStream locationInputStream = new FileInputStream(modelPath + "en-ner-location.bin"); 113 | model = new TokenNameFinderModel(locationInputStream); 114 | locationFinder = new NameFinderME(model); 115 | locationInputStream.close(); 116 | 117 | InputStream personNameInputStream = new FileInputStream(modelPath + "en-ner-person.bin"); 118 | model = new TokenNameFinderModel(personNameInputStream); 119 | personNameFinder = new NameFinderME(model); 120 | personNameInputStream.close(); 121 | 122 | InputStream tokienizerInputStream = new FileInputStream(modelPath + "en-token.bin"); 123 | TokenizerModel modelTokenizer = new TokenizerModel(tokienizerInputStream); 124 | tokenizer = new TokenizerME(modelTokenizer); 125 | tokienizerInputStream.close(); 126 | 127 | InputStream sentenceInputStream = new FileInputStream(modelPath + "en-sent.bin"); 128 | SentenceModel sentenceTokenizer = new SentenceModel(sentenceInputStream); 129 | sentenceSplitter = new SentenceDetectorME(sentenceTokenizer); 130 | tokienizerInputStream.close(); 131 | 132 | organizationInputStream = new FileInputStream(modelPath + "en-pos-maxent.bin"); 133 | POSModel posModel = new POSModel(organizationInputStream); 134 | tagger = new POSTaggerME(posModel); 135 | 136 | } catch (IOException e) { 137 | e.printStackTrace(); 138 | } 139 | } 140 | 141 | } 142 | -------------------------------------------------------------------------------- /nlp_opennlp/src-java/main/java/com/markwatson/opennlp/Pair.java: -------------------------------------------------------------------------------- 1 | package com.markwatson.opennlp; 2 | 3 | public class Pair { 4 | public Pair(X x, Y y) { 5 | this.key = x; 6 | this.val = y; 7 | } 8 | public X key; 9 | public Y val; 10 | public String toString() { 11 | return "[" + key + ", " + val + "]"; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /nlp_opennlp/src-java/main/resources/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /nlp_opennlp/src/opennlp_clj/core.clj: -------------------------------------------------------------------------------- 1 | (ns opennlp-clj.core 2 | (:import (com.markwatson.opennlp NLP))) 3 | 4 | (defn sentence-splitter "tokenize entire sentences" [string-input] 5 | (seq (NLP/sentenceSplitter string-input))) 6 | 7 | (defn tokenize->seq "tokenize words to Clojure seq" [string-input] 8 | (seq (NLP/tokenize string-input))) 9 | 10 | (defn tokenize->java "tokenize words to Java array" [string-input] 11 | (NLP/tokenize string-input)) 12 | 13 | ;; Word analysis: 14 | 15 | (defn POS "part of speech" [java-token-array] 16 | (seq (NLP/POS java-token-array))) 17 | 18 | ;; Entities: 19 | 20 | (defn company-names [java-token-array] 21 | (seq (NLP/companyNames java-token-array))) 22 | 23 | (defn location-names [java-token-array] 24 | (seq (NLP/locationNames java-token-array))) 25 | 26 | (defn person-names [java-token-array] 27 | (seq (NLP/personNames java-token-array))) 28 | -------------------------------------------------------------------------------- /nlp_opennlp/test/opennlp_clj/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns opennlp-clj.core-test 2 | (:require [clojure.test :as test]) 3 | (:require [opennlp-clj.core :as onlp])) 4 | 5 | (def 6 | test-text 7 | "The cat chased the mouse around the tree while Mary Smith (who works at IBM in San Francisco) watched.") 8 | 9 | (test/deftest pos-test 10 | (test/testing "parts of speech" 11 | (let [token-java-array (onlp/tokenize->java test-text) 12 | token-clojure-seq (onlp/tokenize->seq test-text) 13 | words-pos (onlp/POS token-java-array) 14 | companies (onlp/company-names token-java-array) 15 | places (onlp/location-names token-java-array) 16 | people (onlp/person-names token-java-array)] 17 | (println "Input text:\n" test-text) 18 | (println "Tokens as Java array:\n" token-java-array) 19 | (println "Tokens as Clojure seq:\n" token-clojure-seq) 20 | (println "Part of speech tokens:\n" words-pos) 21 | (println "Companies:\n" companies) 22 | (println "Places:\n" places) 23 | (println "People:\n" people) 24 | (test/is (= (first words-pos) "DT"))))) 25 | -------------------------------------------------------------------------------- /old_code/openai_api_old/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | .idea 5 | .lsp 6 | profiles.clj 7 | pom.xml 8 | pom.xml.asc 9 | *.jar 10 | *.class 11 | /.lein-* 12 | /.nrepl-port 13 | /.prepl-port 14 | .hgignore 15 | .hg/ 16 | -------------------------------------------------------------------------------- /old_code/openai_api_old/README.md: -------------------------------------------------------------------------------- 1 | # openai_api 2 | 3 | A Clojure library for using the OpenAI APIs (GPT-3 based) 4 | 5 | ## Usage 6 | 7 | lein test 8 | 9 | ## Code for my book "Practical Artificial Intelligence Programming With Clojure" 10 | 11 | You read my eBooks for free, see my 12 | website [https://markwatson.com](https://markwatson.com). If you would like to pay me for a copy then please visit [https://leanpub.com/clojureai](https://leanpub.com/clojureai). 13 | 14 | ## License 15 | 16 | Copyright © 2021 Mark Watson 17 | 18 | This program and the accompanying materials are made available under the 19 | terms of the Eclipse Public License 2.0 which is available at 20 | http://www.eclipse.org/legal/epl-2.0. 21 | 22 | This Source Code may also be made available under the following Secondary 23 | Licenses when the conditions for such availability set forth in the Eclipse 24 | Public License, v. 2.0 are satisfied: GNU General Public License as published by 25 | the Free Software Foundation, either version 2 of the License, or (at your 26 | option) any later version, with the GNU Classpath Exception which is available 27 | at https://www.gnu.org/software/classpath/license.html. 28 | -------------------------------------------------------------------------------- /old_code/openai_api_old/doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to openai_api 2 | 3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) 4 | -------------------------------------------------------------------------------- /old_code/openai_api_old/project.clj: -------------------------------------------------------------------------------- 1 | (defproject openai_api "0.1.0-SNAPSHOT" 2 | :description "FIXME: write description" 3 | :url "http://example.com/FIXME" 4 | :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" 5 | :url "https://www.eclipse.org/legal/epl-2.0/"} 6 | :dependencies [[org.clojure/clojure "1.10.1"] 7 | [clj-http "3.12.3"] 8 | [org.clojure/data.json "2.3.1"] 9 | ] 10 | :repl-options {:init-ns openai-api.core}) 11 | -------------------------------------------------------------------------------- /old_code/openai_api_old/src/openai_api/core.clj: -------------------------------------------------------------------------------- 1 | (ns openai-api.core 2 | (:require [clj-http.client :as client]) 3 | (:require [clojure.data.json :as json])) 4 | 5 | ;; define the environment variable "OPENAI_KEY" with the value of your OpenAI API key 6 | 7 | (defn- openai-helper [body] 8 | (let [json-results 9 | (client/post 10 | "https://api.openai.com/v1/engines/davinci/completions" 11 | {:accept :json 12 | :headers 13 | {"Content-Type" "application/json" 14 | "Authorization" (str "Bearer " (System/getenv "OPENAI_KEY")) 15 | } 16 | :body body 17 | })] 18 | ((first ((json/read-str (json-results :body)) "choices")) "text"))) 19 | 20 | (defn completions 21 | "Use the OpenAI API for text completions" 22 | [prompt-text max-tokens] 23 | (let 24 | [body 25 | (str 26 | "{\"prompt\": \"" prompt-text "\", \"max_tokens\": " 27 | (str max-tokens) "}")] 28 | (openai-helper body))) 29 | 30 | (defn summarize 31 | "Use the OpenAI API for text summarization" 32 | [prompt-text max-tokens] 33 | (let 34 | [body 35 | (str 36 | "{\"prompt\": \"" prompt-text "\", \"max_tokens\": " 37 | (str max-tokens) ", \"presence_penalty\": 0.0" 38 | ", \"temperature\": 0.3, \"top_p\": 1.0, \"frequency_penalty\": 0.0" 39 | "}")] 40 | (openai-helper body))) 41 | 42 | 43 | (defn answer-question 44 | "Use the OpenAI API for question answering" 45 | [prompt-text max-tokens] 46 | (let 47 | [body 48 | (str 49 | "{\"prompt\": \"" (str "nQ: " prompt-text) "nA:\", \"max_tokens\": " 50 | (str max-tokens) ", \"presence_penalty\": 0.0" 51 | ", \"temperature\": 0.3, \"top_p\": 1.0, \"frequency_penalty\": 0.0" 52 | ", \"stop\": [\"\\n\"]" 53 | "}") 54 | results (openai-helper body) 55 | ind (clojure.string/index-of results "nQ:")] 56 | (if (nil? ind) 57 | results 58 | (subs results 0 ind)))) 59 | -------------------------------------------------------------------------------- /old_code/openai_api_old/test/openai_api/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns openai-api.core-test 2 | (:require [clojure.test :refer :all] 3 | [openai-api.core :refer :all])) 4 | 5 | (def some-text 6 | "Jupiter is the fifth planet from the Sun and the largest in the Solar System. It is a gas giant with a mass one-thousandth that of the Sun, but two-and-a-half times that of all the other planets in the Solar System combined. Jupiter is one of the brightest objects visible to the naked eye in the night sky, and has been known to ancient civilizations since before recorded history. It is named after the Roman god Jupiter.[19] When viewed from Earth, Jupiter can be bright enough for its reflected light to cast visible shadows,[20] and is on average the third-brightest natural object in the night sky after the Moon and Venus.") 7 | 8 | '(deftest book-test 9 | (testing "OpenAI completions API" 10 | (let [results 11 | (openai-api.core/completions 12 | "Tainn doesn’t have that much work to do since she seems to be dedicated to serving the largest and ugliest of the worm creatures, his name is Mock and he is twice the size of most other worms who are about Tainn’s size." 13 | 240)] 14 | (println results) 15 | (is (= 0 0))))) 16 | 17 | '(deftest completions-test 18 | (testing "OpenAI completions API" 19 | (let [results 20 | (openai-api.core/completions "He walked to the river" 80)] 21 | (println results) 22 | (is (= 0 0))))) 23 | 24 | '(deftest summarize-test 25 | (testing "OpenAI summarize API" 26 | (let [results 27 | (openai-api.core/summarize 28 | some-text 29 | 24)] 30 | (println results) 31 | (is (= 0 0))))) 32 | 33 | 34 | (deftest question-answering-test 35 | (testing "OpenAI question-answering API" 36 | (let [results 37 | (openai-api.core/answer-question 38 | ;;"If it is not used for hair, a round brush is an example of what 1. hair brush 2. bathroom 3. art supplies 4. shower ?" 39 | "Where is the Valley of Kings?" 40 | ;"Where is San Francisco?" 41 | 60)] 42 | (println results) 43 | (is (= 0 0))))) 44 | -------------------------------------------------------------------------------- /ollama/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | .idea 5 | .lsp 6 | profiles.clj 7 | pom.xml 8 | pom.xml.asc 9 | *.jar 10 | *.class 11 | /.lein-* 12 | /.nrepl-port 13 | /.prepl-port 14 | .hgignore 15 | .hg/ 16 | -------------------------------------------------------------------------------- /ollama/LICENSE: -------------------------------------------------------------------------------- 1 | Eclipse Public License - v 2.0 2 | 3 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE 4 | PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION 5 | OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 6 | 7 | 1. DEFINITIONS 8 | 9 | "Contribution" means: 10 | 11 | a) in the case of the initial Contributor, the initial content 12 | Distributed under this Agreement, and 13 | 14 | b) in the case of each subsequent Contributor: 15 | i) changes to the Program, and 16 | ii) additions to the Program; 17 | where such changes and/or additions to the Program originate from 18 | and are Distributed by that particular Contributor. A Contribution 19 | "originates" from a Contributor if it was added to the Program by 20 | such Contributor itself or anyone acting on such Contributor's behalf. 21 | Contributions do not include changes or additions to the Program that 22 | are not Modified Works. 23 | 24 | "Contributor" means any person or entity that Distributes the Program. 25 | 26 | "Licensed Patents" mean patent claims licensable by a Contributor which 27 | are necessarily infringed by the use or sale of its Contribution alone 28 | or when combined with the Program. 29 | 30 | "Program" means the Contributions Distributed in accordance with this 31 | Agreement. 32 | 33 | "Recipient" means anyone who receives the Program under this Agreement 34 | or any Secondary License (as applicable), including Contributors. 35 | 36 | "Derivative Works" shall mean any work, whether in Source Code or other 37 | form, that is based on (or derived from) the Program and for which the 38 | editorial revisions, annotations, elaborations, or other modifications 39 | represent, as a whole, an original work of authorship. 40 | 41 | "Modified Works" shall mean any work in Source Code or other form that 42 | results from an addition to, deletion from, or modification of the 43 | contents of the Program, including, for purposes of clarity any new file 44 | in Source Code form that contains any contents of the Program. Modified 45 | Works shall not include works that contain only declarations, 46 | interfaces, types, classes, structures, or files of the Program solely 47 | in each case in order to link to, bind by name, or subclass the Program 48 | or Modified Works thereof. 49 | 50 | "Distribute" means the acts of a) distributing or b) making available 51 | in any manner that enables the transfer of a copy. 52 | 53 | "Source Code" means the form of a Program preferred for making 54 | modifications, including but not limited to software source code, 55 | documentation source, and configuration files. 56 | 57 | "Secondary License" means either the GNU General Public License, 58 | Version 2.0, or any later versions of that license, including any 59 | exceptions or additional permissions as identified by the initial 60 | Contributor. 61 | 62 | 2. GRANT OF RIGHTS 63 | 64 | a) Subject to the terms of this Agreement, each Contributor hereby 65 | grants Recipient a non-exclusive, worldwide, royalty-free copyright 66 | license to reproduce, prepare Derivative Works of, publicly display, 67 | publicly perform, Distribute and sublicense the Contribution of such 68 | Contributor, if any, and such Derivative Works. 69 | 70 | b) Subject to the terms of this Agreement, each Contributor hereby 71 | grants Recipient a non-exclusive, worldwide, royalty-free patent 72 | license under Licensed Patents to make, use, sell, offer to sell, 73 | import and otherwise transfer the Contribution of such Contributor, 74 | if any, in Source Code or other form. This patent license shall 75 | apply to the combination of the Contribution and the Program if, at 76 | the time the Contribution is added by the Contributor, such addition 77 | of the Contribution causes such combination to be covered by the 78 | Licensed Patents. The patent license shall not apply to any other 79 | combinations which include the Contribution. No hardware per se is 80 | licensed hereunder. 81 | 82 | c) Recipient understands that although each Contributor grants the 83 | licenses to its Contributions set forth herein, no assurances are 84 | provided by any Contributor that the Program does not infringe the 85 | patent or other intellectual property rights of any other entity. 86 | Each Contributor disclaims any liability to Recipient for claims 87 | brought by any other entity based on infringement of intellectual 88 | property rights or otherwise. As a condition to exercising the 89 | rights and licenses granted hereunder, each Recipient hereby 90 | assumes sole responsibility to secure any other intellectual 91 | property rights needed, if any. For example, if a third party 92 | patent license is required to allow Recipient to Distribute the 93 | Program, it is Recipient's responsibility to acquire that license 94 | before distributing the Program. 95 | 96 | d) Each Contributor represents that to its knowledge it has 97 | sufficient copyright rights in its Contribution, if any, to grant 98 | the copyright license set forth in this Agreement. 99 | 100 | e) Notwithstanding the terms of any Secondary License, no 101 | Contributor makes additional grants to any Recipient (other than 102 | those set forth in this Agreement) as a result of such Recipient's 103 | receipt of the Program under the terms of a Secondary License 104 | (if permitted under the terms of Section 3). 105 | 106 | 3. REQUIREMENTS 107 | 108 | 3.1 If a Contributor Distributes the Program in any form, then: 109 | 110 | a) the Program must also be made available as Source Code, in 111 | accordance with section 3.2, and the Contributor must accompany 112 | the Program with a statement that the Source Code for the Program 113 | is available under this Agreement, and informs Recipients how to 114 | obtain it in a reasonable manner on or through a medium customarily 115 | used for software exchange; and 116 | 117 | b) the Contributor may Distribute the Program under a license 118 | different than this Agreement, provided that such license: 119 | i) effectively disclaims on behalf of all other Contributors all 120 | warranties and conditions, express and implied, including 121 | warranties or conditions of title and non-infringement, and 122 | implied warranties or conditions of merchantability and fitness 123 | for a particular purpose; 124 | 125 | ii) effectively excludes on behalf of all other Contributors all 126 | liability for damages, including direct, indirect, special, 127 | incidental and consequential damages, such as lost profits; 128 | 129 | iii) does not attempt to limit or alter the recipients' rights 130 | in the Source Code under section 3.2; and 131 | 132 | iv) requires any subsequent distribution of the Program by any 133 | party to be under a license that satisfies the requirements 134 | of this section 3. 135 | 136 | 3.2 When the Program is Distributed as Source Code: 137 | 138 | a) it must be made available under this Agreement, or if the 139 | Program (i) is combined with other material in a separate file or 140 | files made available under a Secondary License, and (ii) the initial 141 | Contributor attached to the Source Code the notice described in 142 | Exhibit A of this Agreement, then the Program may be made available 143 | under the terms of such Secondary Licenses, and 144 | 145 | b) a copy of this Agreement must be included with each copy of 146 | the Program. 147 | 148 | 3.3 Contributors may not remove or alter any copyright, patent, 149 | trademark, attribution notices, disclaimers of warranty, or limitations 150 | of liability ("notices") contained within the Program from any copy of 151 | the Program which they Distribute, provided that Contributors may add 152 | their own appropriate notices. 153 | 154 | 4. COMMERCIAL DISTRIBUTION 155 | 156 | Commercial distributors of software may accept certain responsibilities 157 | with respect to end users, business partners and the like. While this 158 | license is intended to facilitate the commercial use of the Program, 159 | the Contributor who includes the Program in a commercial product 160 | offering should do so in a manner which does not create potential 161 | liability for other Contributors. Therefore, if a Contributor includes 162 | the Program in a commercial product offering, such Contributor 163 | ("Commercial Contributor") hereby agrees to defend and indemnify every 164 | other Contributor ("Indemnified Contributor") against any losses, 165 | damages and costs (collectively "Losses") arising from claims, lawsuits 166 | and other legal actions brought by a third party against the Indemnified 167 | Contributor to the extent caused by the acts or omissions of such 168 | Commercial Contributor in connection with its distribution of the Program 169 | in a commercial product offering. The obligations in this section do not 170 | apply to any claims or Losses relating to any actual or alleged 171 | intellectual property infringement. In order to qualify, an Indemnified 172 | Contributor must: a) promptly notify the Commercial Contributor in 173 | writing of such claim, and b) allow the Commercial Contributor to control, 174 | and cooperate with the Commercial Contributor in, the defense and any 175 | related settlement negotiations. The Indemnified Contributor may 176 | participate in any such claim at its own expense. 177 | 178 | For example, a Contributor might include the Program in a commercial 179 | product offering, Product X. That Contributor is then a Commercial 180 | Contributor. If that Commercial Contributor then makes performance 181 | claims, or offers warranties related to Product X, those performance 182 | claims and warranties are such Commercial Contributor's responsibility 183 | alone. Under this section, the Commercial Contributor would have to 184 | defend claims against the other Contributors related to those performance 185 | claims and warranties, and if a court requires any other Contributor to 186 | pay any damages as a result, the Commercial Contributor must pay 187 | those damages. 188 | 189 | 5. NO WARRANTY 190 | 191 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT 192 | PERMITTED BY APPLICABLE LAW, THE PROGRAM IS PROVIDED ON AN "AS IS" 193 | BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR 194 | IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF 195 | TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR 196 | PURPOSE. Each Recipient is solely responsible for determining the 197 | appropriateness of using and distributing the Program and assumes all 198 | risks associated with its exercise of rights under this Agreement, 199 | including but not limited to the risks and costs of program errors, 200 | compliance with applicable laws, damage to or loss of data, programs 201 | or equipment, and unavailability or interruption of operations. 202 | 203 | 6. DISCLAIMER OF LIABILITY 204 | 205 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT 206 | PERMITTED BY APPLICABLE LAW, NEITHER RECIPIENT NOR ANY CONTRIBUTORS 207 | SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 208 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST 209 | PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 210 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 211 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 212 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE 213 | POSSIBILITY OF SUCH DAMAGES. 214 | 215 | 7. GENERAL 216 | 217 | If any provision of this Agreement is invalid or unenforceable under 218 | applicable law, it shall not affect the validity or enforceability of 219 | the remainder of the terms of this Agreement, and without further 220 | action by the parties hereto, such provision shall be reformed to the 221 | minimum extent necessary to make such provision valid and enforceable. 222 | 223 | If Recipient institutes patent litigation against any entity 224 | (including a cross-claim or counterclaim in a lawsuit) alleging that the 225 | Program itself (excluding combinations of the Program with other software 226 | or hardware) infringes such Recipient's patent(s), then such Recipient's 227 | rights granted under Section 2(b) shall terminate as of the date such 228 | litigation is filed. 229 | 230 | All Recipient's rights under this Agreement shall terminate if it 231 | fails to comply with any of the material terms or conditions of this 232 | Agreement and does not cure such failure in a reasonable period of 233 | time after becoming aware of such noncompliance. If all Recipient's 234 | rights under this Agreement terminate, Recipient agrees to cease use 235 | and distribution of the Program as soon as reasonably practicable. 236 | However, Recipient's obligations under this Agreement and any licenses 237 | granted by Recipient relating to the Program shall continue and survive. 238 | 239 | Everyone is permitted to copy and distribute copies of this Agreement, 240 | but in order to avoid inconsistency the Agreement is copyrighted and 241 | may only be modified in the following manner. The Agreement Steward 242 | reserves the right to publish new versions (including revisions) of 243 | this Agreement from time to time. No one other than the Agreement 244 | Steward has the right to modify this Agreement. The Eclipse Foundation 245 | is the initial Agreement Steward. The Eclipse Foundation may assign the 246 | responsibility to serve as the Agreement Steward to a suitable separate 247 | entity. Each new version of the Agreement will be given a distinguishing 248 | version number. The Program (including Contributions) may always be 249 | Distributed subject to the version of the Agreement under which it was 250 | received. In addition, after a new version of the Agreement is published, 251 | Contributor may elect to Distribute the Program (including its 252 | Contributions) under the new version. 253 | 254 | Except as expressly stated in Sections 2(a) and 2(b) above, Recipient 255 | receives no rights or licenses to the intellectual property of any 256 | Contributor under this Agreement, whether expressly, by implication, 257 | estoppel or otherwise. All rights in the Program not expressly granted 258 | under this Agreement are reserved. Nothing in this Agreement is intended 259 | to be enforceable by any entity that is not a Contributor or Recipient. 260 | No third-party beneficiary rights are created under this Agreement. 261 | 262 | Exhibit A - Form of Secondary Licenses Notice 263 | 264 | "This Source Code may also be made available under the following 265 | Secondary Licenses when the conditions for such availability set forth 266 | in the Eclipse Public License, v. 2.0 are satisfied: GNU General Public 267 | License as published by the Free Software Foundation, either version 2 268 | of the License, or (at your option) any later version, with the GNU 269 | Classpath Exception which is available at 270 | https://www.gnu.org/software/classpath/license.html." 271 | 272 | Simply including a copy of this Agreement, including this Exhibit A 273 | is not sufficient to license the Source Code under Secondary Licenses. 274 | 275 | If it is not possible or desirable to put the notice in a particular 276 | file, then You may include the notice in a location (such as a LICENSE 277 | file in a relevant directory) where a recipient would be likely to 278 | look for such a notice. 279 | 280 | You may add additional accurate notices of copyright ownership. 281 | -------------------------------------------------------------------------------- /ollama/README.md: -------------------------------------------------------------------------------- 1 | # Using the OpenAI, Anthropic, Mistral, and Local Hugging Face Large Language Model APIs in Racket 2 | 3 | A Clojure library for using the Ollama LLM APIs 4 | 5 | You need to install Ollama on your system: https://ollama.ai 6 | 7 | You then need to install the Mistral model (this takes a while the first time, but the model file is cached so future startups are fast): 8 | 9 | ollama run mistral 10 | 11 | 12 | ## Usage 13 | 14 | In one console, run the Ollama REST API service: 15 | 16 | ollama serve 17 | 18 | Then run the tests in another window: 19 | 20 | lein test 21 | 22 | ## Code for my book "Practical Artificial Intelligence Programming With Clojure" 23 | 24 | You read my eBooks for free, see my 25 | website [https://markwatson.com](https://markwatson.com). If you would like to pay me for a copy then please visit [https://leanpub.com/clojureai](https://leanpub.com/clojureai). 26 | 27 | ## License 28 | 29 | Copyright © 2021 Mark Watson 30 | 31 | This program and the accompanying materials are made available under the 32 | terms of the Eclipse Public License 2.0 which is available at 33 | http://www.eclipse.org/legal/epl-2.0. 34 | 35 | This Source Code may also be made available under the following Secondary 36 | Licenses when the conditions for such availability set forth in the Eclipse 37 | Public License, v. 2.0 are satisfied: GNU General Public License as published by 38 | the Free Software Foundation, either version 2 of the License, or (at your 39 | option) any later version, with the GNU Classpath Exception which is available 40 | at https://www.gnu.org/software/classpath/license.html. 41 | -------------------------------------------------------------------------------- /ollama/doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to openai_api 2 | 3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) 4 | -------------------------------------------------------------------------------- /ollama/project.clj: -------------------------------------------------------------------------------- 1 | (defproject ollama_api "0.1.0-SNAPSHOT" 2 | :description "FIXME: write description" 3 | :url "http://example.com/FIXME" 4 | :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" 5 | :url "https://www.eclipse.org/legal/epl-2.0/"} 6 | :dependencies [[org.clojure/clojure "1.10.1"] 7 | [clj-http "3.12.3"] 8 | [org.clojure/data.json "2.3.1"] 9 | ] 10 | :repl-options {:init-ns ollama-api.core}) 11 | -------------------------------------------------------------------------------- /ollama/src/ollama_api/core.clj: -------------------------------------------------------------------------------- 1 | (ns ollama-api.core 2 | (:require [clj-http.client :as client]) 3 | (:require [clojure.data.json :as json])) 4 | 5 | 6 | (defn- ollama-helper [body] 7 | (let [json-results 8 | (client/post 9 | "http://localhost:11434/api/generate" 10 | {:accept :json 11 | :headers 12 | {"Content-Type" "application/json"} 13 | :body body 14 | })] 15 | ((json/read-str (json-results :body)) "response"))) 16 | 17 | (defn completions 18 | "Use the Ollama API for text completions" 19 | [prompt-text] 20 | (let 21 | [body 22 | (json/write-str 23 | {:prompt prompt-text 24 | :model "mistral-small" 25 | :stream false})] 26 | (ollama-helper body))) 27 | 28 | (defn summarize 29 | "Use the Ollama API for text summarization" 30 | [prompt-text] 31 | (completions (str "Summarize the following text: " prompt-text))) 32 | 33 | 34 | (defn answer-question 35 | "Use the Ollama API for question answering" 36 | [prompt-text] 37 | (completions (str "Answer the following question: " prompt-text))) 38 | -------------------------------------------------------------------------------- /ollama/test/ollama_api/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns ollama-api.core-test 2 | (:require [clojure.test :refer :all] 3 | [ollama-api.core :refer :all])) 4 | 5 | (def some-text 6 | "Jupiter is the fifth planet from the Sun and the largest in the Solar System. It is a gas giant with a mass one-thousandth that of the Sun, but two-and-a-half times that of all the other planets in the Solar System combined. Jupiter is one of the brightest objects visible to the naked eye in the night sky, and has been known to ancient civilizations since before recorded history. It is named after the Roman god Jupiter.[19] When viewed from Earth, Jupiter can be bright enough for its reflected light to cast visible shadows,[20] and is on average the third-brightest natural object in the night sky after the Moon and Venus.") 7 | 8 | (deftest completions-test 9 | (testing "ollama-Ollama completions API" 10 | (let [results 11 | (ollama-api.core/completions "He walked to the river")] 12 | (println results)))) 13 | 14 | (deftest summarize-test 15 | (testing "ollama-Ollama summarize API" 16 | (let [results 17 | (ollama-api.core/summarize 18 | some-text)] 19 | (println results)))) 20 | 21 | 22 | (deftest question-answering-test 23 | (testing "ollama-Ollama question-answering API" 24 | (let [results 25 | (ollama-api.core/answer-question 26 | "Where is the Valley of Kings?" 27 | )] 28 | (println results)))) 29 | -------------------------------------------------------------------------------- /openai_api/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | .idea 5 | .lsp 6 | profiles.clj 7 | pom.xml 8 | pom.xml.asc 9 | *.jar 10 | *.class 11 | /.lein-* 12 | /.nrepl-port 13 | /.prepl-port 14 | .hgignore 15 | .hg/ 16 | 17 | .codegpt -------------------------------------------------------------------------------- /openai_api/README.md: -------------------------------------------------------------------------------- 1 | # openai_api 2 | 3 | A Clojure library for using the OpenAI APIs (GPT-4 based) 4 | 5 | Note: May 2025: I just modified the OpenAI client code using Gemini and OpenAI. 6 | 7 | 8 | ## Usage 9 | 10 | lein test 11 | 12 | ## Code for my book "Practical Artificial Intelligence Programming With Clojure" 13 | 14 | You can read the book for this example code for free online: [https://leanpub.com/clojureai/read](https://leanpub.com/clojureai/read). 15 | 16 | If you would like to pay me for a copy of this book then please visit [https://leanpub.com/clojureai](https://leanpub.com/clojureai). 17 | 18 | Please visit see my website [https://markwatson.com](https://markwatson.com). 19 | 20 | ## License 21 | 22 | Copyright © 2021-2024 Mark Watson 23 | 24 | This program and the accompanying materials are made available under the 25 | terms of the Eclipse Public License 2.0 which is available at 26 | http://www.eclipse.org/legal/epl-2.0. 27 | 28 | This Source Code may also be made available under the following Secondary 29 | Licenses when the conditions for such availability set forth in the Eclipse 30 | Public License, v. 2.0 are satisfied: GNU General Public License as published by 31 | the Free Software Foundation, either version 2 of the License, or (at your 32 | option) any later version, with the GNU Classpath Exception which is available 33 | at https://www.gnu.org/software/classpath/license.html. 34 | -------------------------------------------------------------------------------- /openai_api/doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to openai_api 2 | 3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) 4 | -------------------------------------------------------------------------------- /openai_api/project.clj: -------------------------------------------------------------------------------- 1 | (defproject openai_api "0.1.0-SNAPSHOT" 2 | :description "FIXME: write description" 3 | :url "http://example.com/FIXME" 4 | :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" 5 | :url "https://www.eclipse.org/legal/epl-2.0/"} 6 | :dependencies [[org.clojure/clojure "1.10.1"] 7 | [clj-http "3.12.3"] 8 | [org.clojure/data.json "2.3.1"] 9 | ] 10 | :repl-options {:init-ns openai-api.core}) 11 | -------------------------------------------------------------------------------- /openai_api/src/openai_api/core.clj: -------------------------------------------------------------------------------- 1 | (ns openai-api.core 2 | (:require [clj-http.client :as client]) 3 | (:require [clojure.data.json :as json])) 4 | 5 | (def model2 "gpt-4o-mini") 6 | 7 | (def api-key (System/getenv "OPENAI_API_KEY")) 8 | 9 | (defn completions [prompt] 10 | (let [url "https://api.openai.com/v1/chat/completions" 11 | headers {"Authorization" (str "Bearer " api-key) 12 | "Content-Type" "application/json"} 13 | body {:model model2 14 | :messages [{:role "user" :content prompt}]} 15 | response (client/post url {:headers headers 16 | :body (json/write-str body)})] 17 | ;;(println (:body response)) 18 | (get 19 | (get 20 | (first 21 | (get 22 | (json/read-str (:body response) :key-fn keyword) 23 | :choices)) 24 | :message) 25 | :content))) 26 | 27 | (defn summarize [text] 28 | (completions (str "Summarize the following text:\n\n" text))) 29 | 30 | (defn answer-question 31 | "Use the OpenAI API for question answering" 32 | [text] 33 | (completions (str "Answer the following question:\n\n" text))) 34 | 35 | 36 | (defn embeddings [text] 37 | (try 38 | (let* [body 39 | (str 40 | "{\"input\": \"" 41 | (clojure.string/replace 42 | (clojure.string/replace text #"[\" \n :]" " ") 43 | #"\s+" " ") 44 | "\", \"model\": \"text-embedding-ada-002\"}") 45 | json-results 46 | (client/post 47 | "https://api.openai.com/v1/embeddings" 48 | {:accept :json 49 | :headers 50 | {"Content-Type" "application/json" 51 | "Authorization" (str "Bearer " api-key)} 52 | :body body})] 53 | ((first ((json/read-str (json-results :body)) "data")) "embedding")) 54 | (catch Exception e 55 | (println "Error:" (.getMessage e)) 56 | ""))) 57 | 58 | (defn dot-product [a b] 59 | (reduce + (map * a b))) 60 | 61 | -------------------------------------------------------------------------------- /openai_api/test/openai_api/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns openai-api.core-test 2 | (:require [clojure.test :refer :all] 3 | [openai-api.core :refer :all])) 4 | 5 | (def some-text 6 | "Jupiter is the fifth planet from the Sun and the largest in the Solar System. It is a gas giant with a mass one-thousandth that of the Sun, but two-and-a-half times that of all the other planets in the Solar System combined. Jupiter is one of the brightest objects visible to the naked eye in the night sky, and has been known to ancient civilizations since before recorded history. It is named after the Roman god Jupiter.[19] When viewed from Earth, Jupiter can be bright enough for its reflected light to cast visible shadows,[20] and is on average the third-brightest natural object in the night sky after the Moon and Venus.") 7 | 8 | (deftest completions-test 9 | (testing "OpenAI completions API" 10 | (let [results 11 | (openai-api.core/completions "He walked to the river")] 12 | (println results) 13 | (is (= 0 0))))) 14 | 15 | (deftest summarize-test 16 | (testing "OpenAI summarize API" 17 | (let [results 18 | (openai-api.core/summarize 19 | some-text)] 20 | (println results) 21 | (is (= 0 0))))) 22 | 23 | (deftest question-answering-test 24 | (testing "OpenAI question-answering API" 25 | (let [results 26 | (openai-api.core/answer-question 27 | "Where is the Valley of Kings?" 28 | )] 29 | (println results) 30 | (is (= 0 0))))) 31 | -------------------------------------------------------------------------------- /replit.nix: -------------------------------------------------------------------------------- 1 | { pkgs }: { 2 | deps = [ 3 | pkgs.leiningen 4 | pkgs.clojure 5 | pkgs.clojure-lsp 6 | ]; 7 | } -------------------------------------------------------------------------------- /semantic_web_jena/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | profiles.clj 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | /.prepl-port 12 | .hgignore 13 | .hg/ 14 | -------------------------------------------------------------------------------- /semantic_web_jena/README.md: -------------------------------------------------------------------------------- 1 | # semantic_web_jena_clj 2 | 3 | A Clojure library designed to use the Java Jena RDF and SPARQL library. 4 | 5 | ## Usage 6 | 7 | lein test 8 | 9 | ## Code for my book "Practical Artificial Intelligence Programming With Clojure" 10 | 11 | You read my eBooks for free, see my 12 | website [https://markwatson.com](https://markwatson.com). If you would like to pay me for a copy then please visit [https://leanpub.com/clojureai](https://leanpub.com/clojureai). 13 | 14 | ## License 15 | 16 | Copyright © 2021 FIXME 17 | 18 | This program and the accompanying materials are made available under the 19 | terms of the Eclipse Public License 2.0 which is available at 20 | http://www.eclipse.org/legal/epl-2.0. 21 | 22 | This Source Code may also be made available under the following Secondary 23 | Licenses when the conditions for such availability set forth in the Eclipse 24 | Public License, v. 2.0 are satisfied: GNU General Public License as published by 25 | the Free Software Foundation, either version 2 of the License, or (at your 26 | option) any later version, with the GNU Classpath Exception which is available 27 | at https://www.gnu.org/software/classpath/license.html. 28 | -------------------------------------------------------------------------------- /semantic_web_jena/data/business.sql: -------------------------------------------------------------------------------- 1 | create table customers (id int PRIMARY KEY, name char(30), city char(30), state char(30), country char(30)); 2 | 3 | create table products (id int PRIMARY KEY, name char(40), price float); 4 | 5 | create table orders (id int PRIMARY KEY, product_id int REFERENCES products (id), customer_id int REFERENCES customers (id), number int); 6 | 7 | insert into customers values (1, 'IBM', 'Armonk', 'New York', 'USA'); 8 | insert into customers values (2, 'Oracle', 'Redwood Shores', 'California', 'USA'); 9 | 10 | insert into products values (1, 'KnowledgeBooks NLP Library', 500.0); 11 | insert into products values (2, 'Spider Library', 20.0); 12 | 13 | insert into orders values (1, 1, 1, 1); 14 | insert into orders values (2, 2, 1, 2); 15 | insert into orders values (3, 1, 2, 4); 16 | -------------------------------------------------------------------------------- /semantic_web_jena/data/foaf.nt: -------------------------------------------------------------------------------- 1 | . 2 | . 3 | . 4 | . 5 | . 6 | . 7 | . 8 | "Her Majesty's Secret Service" . 9 | "HMSS" . 10 | . 11 | . 12 | . 13 | . 14 | . 15 | "M" . 16 | . 17 | . 18 | . 19 | . 20 | "Q" . 21 | . 22 | . 23 | . 24 | "James Bond" . 25 | . 26 | . 27 | . 28 | "Darko Kerim" . 29 | . 30 | -------------------------------------------------------------------------------- /semantic_web_jena/data/inferencing_1.txt: -------------------------------------------------------------------------------- 1 | # triples to add to a new repository: 2 | 3 | @prefix rdf: . 4 | @prefix rdfs: . 5 | @prefix kb: . 6 | @prefix person: . 7 | 8 | kb:Sibling rdfs:subClassOf rdfs:Class . 9 | 10 | kb:Brother rdfs:subClassOf kb:Sibling . 11 | 12 | person:mark rdf:type kb:Brother . 13 | 14 | # sample queries to demonstrate inferencing: 15 | 16 | SELECT DISTINCT ?s 17 | WHERE { ?s rdf:type kb:Brother } 18 | 19 | SELECT DISTINCT ?s 20 | WHERE { ?s rdf:type kb:Sibling } -------------------------------------------------------------------------------- /semantic_web_jena/data/inferencing_2.txt: -------------------------------------------------------------------------------- 1 | # triples to add to a new repository: 2 | 3 | @prefix rdf: . 4 | @prefix rdfs: . 5 | @prefix kb: . 6 | @prefix person: . 7 | 8 | kb:mother rdfs:subPropertyOf kb:parent . 9 | kb:father rdfs:subPropertyOf kb:parent . 10 | 11 | person:ron kb:father person:anthony . 12 | 13 | # sample queries to demonstrate inferencing: 14 | 15 | SELECT DISTINCT ?s ?o 16 | WHERE { ?s kb:father ?o } 17 | 18 | SELECT DISTINCT ?s ?o 19 | WHERE { ?s kb:parent ?o } -------------------------------------------------------------------------------- /semantic_web_jena/data/inferencing_3.txt: -------------------------------------------------------------------------------- 1 | # triples to add to a new repository: 2 | 3 | @prefix rdf: . 4 | @prefix rdfs: . 5 | @prefix kb: . 6 | @prefix person: . 7 | 8 | kb:mother rdfs:domain kb:Female . 9 | kb:father rdfs:domain kb:Male . 10 | 11 | person:kate rdf:type kb:Female . 12 | person:kate kb:father person:bill . 13 | 14 | -------------------------------------------------------------------------------- /semantic_web_jena/data/inferencing_4.txt: -------------------------------------------------------------------------------- 1 | # triples to add to a new repository: 2 | 3 | @prefix rdf: . 4 | @prefix rdfs: . 5 | @prefix kb: . 6 | @prefix person: . 7 | 8 | kb:mother rdfs:domain kb:Female . 9 | kb:father rdfs:domain kb:Male . 10 | 11 | person:mary kb:mother person:susan . -------------------------------------------------------------------------------- /semantic_web_jena/data/news.n3: -------------------------------------------------------------------------------- 1 | @prefix kb: . 2 | @prefix rdfs: . 3 | 4 | kb:containsCity rdfs:subPropertyOf kb:containsPlace . 5 | 6 | kb:containsCountry rdfs:subPropertyOf kb:containsPlace . 7 | 8 | kb:containsState rdfs:subPropertyOf kb:containsPlace . 9 | 10 | kb:containsCity "Burlington" , "Denver" , "St. Paul" , "Chicago" , "Quincy" , "CHICAGO" , "Iowa City" ; 11 | kb:containsRegion "U.S. Midwest" , "Midwest" ; 12 | kb:containsCountry "United States" , "Japan" ; 13 | kb:containsState "Minnesota" , "Illinois" , "Mississippi" , "Iowa" ; 14 | kb:containsOrganization "National Guard" , "U.S. Department of Agriculture" , "White House" , "Chicago Board of Trade" , "Department of Transportation" ; 15 | kb:containsPerson "Dena Gray-Fisher" , "Donald Miller" , "Glenn Hollander" , "Rich Feltes" , "George W. Bush" ; 16 | kb:containsIndustryTerm "food inflation" , "food" , "finance ministers" , "oil" . 17 | 18 | kb:containsCity "Washington" , "FLINT" , "Baghdad" , "Arlington" , "Flint" ; 19 | kb:containsCountry "United States" , "Afghanistan" , "Iraq" ; 20 | kb:containsState "Illinois" , "Virginia" , "Arizona" , "Michigan" ; 21 | kb:containsOrganization "White House" , "Obama administration" , "Iraqi government" ; 22 | kb:containsPerson "David Petraeus" , "John McCain" , "Hoshiyar Zebari" , "Barack Obama" , "George W. Bush" , "Carly Fiorina" ; 23 | kb:containsIndustryTerm "oil prices" . 24 | 25 | kb:containsCity "WASHINGTON" ; 26 | kb:containsCountry "United States" , "Pakistan" , "Islamic Republic of Iran" ; 27 | kb:containsState "Maryland" ; 28 | kb:containsOrganization "University of Maryland" , "United Nations" ; 29 | kb:containsPerson "Ban Ki-moon" , "Gordon Brown" , "Hu Jintao" , "George W. Bush" , "Pervez Musharraf" , "Vladimir Putin" , "Steven Kull" , "Mahmoud Ahmadinejad" . 30 | 31 | kb:containsCity "Sao Paulo" , "Kuala Lumpur" ; 32 | kb:containsRegion "Midwest" ; 33 | kb:containsCountry "United States" , "Britain" , "Saudi Arabia" , "Spain" , "Italy" , "India" , "France" , "Canada" , "Russia" , "Germany" , "China" , "Japan" , "South Korea" ; 34 | kb:containsOrganization "Federal Reserve Bank" , "European Union" , "European Central Bank" , "European Commission" ; 35 | kb:containsPerson "Lee Myung-bak" , "Rajat Nag" , "Luiz Inacio Lula da Silva" , "Jeffrey Lacker" ; 36 | kb:containsCompany "Development Bank Managing" , "Reuters" , "Richmond Federal Reserve Bank" ; 37 | kb:containsIndustryTerm "central bank" , "food" , "energy costs" , "finance ministers" , "crude oil prices" , "oil prices" , "oil shock" , "food prices" , "Finance ministers" , "Oil prices" , "oil" . 38 | -------------------------------------------------------------------------------- /semantic_web_jena/data/news.nt: -------------------------------------------------------------------------------- 1 | @prefix kb: . 2 | @prefix rdfs: . 3 | 4 | kb:containsCity rdfs:subPropertyOf kb:containsPlace . 5 | 6 | kb:containsCountry rdfs:subPropertyOf kb:containsPlace . 7 | 8 | kb:containsState rdfs:subPropertyOf kb:containsPlace . 9 | 10 | 11 | kb:containsCity "Burlington" , "Denver" , 12 | "St. Paul" ," Chicago" , 13 | "Quincy" , "CHICAGO" , 14 | "Iowa City" ; 15 | kb:containsRegion "U.S. Midwest" , "Midwest" ; 16 | kb:containsCountry "United States" , "Japan" ; 17 | kb:containsState "Minnesota" , "Illinois" , 18 | "Mississippi" , "Iowa" ; 19 | kb:containsOrganization "National Guard" , 20 | "U.S. Department of Agriculture" , 21 | "White House" , 22 | "Chicago Board of Trade" , 23 | "Department of Transportation" ; 24 | kb:containsPerson "Dena Gray-Fisher" , 25 | "Donald Miller" , 26 | "Glenn Hollander" , 27 | "Rich Feltes" , 28 | "George W. Bush" ; 29 | kb:containsIndustryTerm "food inflation" , "food" , 30 | "finance ministers" , 31 | "oil" . 32 | 33 | 34 | kb:containsCity "Washington" , "Baghdad" , 35 | "Arlington" , "Flint" ; 36 | kb:containsCountry "United States" , 37 | "Afghanistan" , 38 | "Iraq" ; 39 | kb:containsState "Illinois" , "Virginia" , 40 | "Arizona" , "Michigan" ; 41 | kb:containsOrganization "White House" , 42 | "Obama administration" , 43 | "Iraqi government" ; 44 | kb:containsPerson "David Petraeus" , 45 | "John McCain" , 46 | "Hoshiyar Zebari" , 47 | "Barack Obama" , 48 | "George W. Bush" , 49 | "Carly Fiorina" ; 50 | kb:containsIndustryTerm "oil prices" . 51 | 52 | 53 | kb:containsCity "WASHINGTON" ; 54 | kb:containsCountry "United States" , "Pakistan" , 55 | "Islamic Republic of Iran" ; 56 | kb:containsState "Maryland" ; 57 | kb:containsOrganization "University of Maryland" , 58 | "United Nations" ; 59 | kb:containsPerson "Ban Ki-moon" , "Gordon Brown" , 60 | "Hu Jintao" , "George W. Bush" , 61 | "Pervez Musharraf" , 62 | "Vladimir Putin" , 63 | "Steven Kull" , 64 | "Mahmoud Ahmadinejad" . 65 | 66 | 67 | kb:containsCity "Sao Paulo" , "Kuala Lumpur" ; 68 | kb:containsRegion "Midwest" ; 69 | kb:containsCountry "United States" , "Britain" , 70 | "Saudi Arabia" , "Spain" , 71 | "Italy" , India" , 72 | ""France" , "Canada" , 73 | "Russia" , "Germany" , "China" , 74 | "Japan" , "South Korea" ; 75 | kb:containsOrganization "Federal Reserve Bank" , 76 | "European Union" , 77 | "European Central Bank" , 78 | "European Commission" ; 79 | kb:containsPerson "Lee Myung-bak" , "Rajat Nag" , 80 | "Luiz Inacio Lula da Silva" , 81 | "Jeffrey Lacker" ; 82 | kb:containsCompany "Development Bank Managing" , 83 | "Reuters" , 84 | "Richmond Federal Reserve Bank" ; 85 | kb:containsIndustryTerm "central bank" , "food" , 86 | "energy costs" , 87 | "finance ministers" , 88 | "crude oil prices" , 89 | "oil prices" , 90 | "oil shock" , 91 | "food prices" , 92 | "Finance ministers" , 93 | "Oil prices" , "oil" . 94 | -------------------------------------------------------------------------------- /semantic_web_jena/data/rdfs_business.n3: -------------------------------------------------------------------------------- 1 | @prefix : . 2 | @prefix owl: . 3 | @prefix rdfs: . 4 | @prefix rdfs_sample_1: . 5 | @prefix xsd: . 6 | 7 | a owl:Ontology . 8 | 9 | :FreshVeggies a :Business . 10 | :HanksHardware a :Business . 11 | :MarkWatsonSoftware a :Business . 12 | :Amazon a :Business . 13 | :MarkWatsonSoftware :Customer :Amazon . 14 | 15 | :Business a owl:Class; 16 | rdfs:label "Business"; 17 | rdfs:subClassOf :Organization . 18 | 19 | :Customer a owl:Class; 20 | rdfs:label "Customer"; 21 | rdfs:subClassOf :Business . 22 | 23 | :Organization a owl:Class; 24 | rdfs:label "Organization" . 25 | 26 | :organizationName a owl:ObjectProperty; 27 | rdfs:domain :Organization; 28 | rdfs:range xsd:stringstring . 29 | 30 | xsd:stringstring a owl:Class . 31 | -------------------------------------------------------------------------------- /semantic_web_jena/data/rdfs_business.nt: -------------------------------------------------------------------------------- 1 | . 2 | 3 | . 4 | 5 | "Business" . 6 | 7 | . 8 | 9 | . 10 | 11 | "Customer" . 12 | 13 | . 14 | 15 | . 16 | 17 | . 18 | 19 | . 20 | 21 | . 22 | 23 | . 24 | 25 | "Organization" . 26 | 27 | . 28 | 29 | . 30 | 31 | . 32 | 33 | . 34 | 35 | -------------------------------------------------------------------------------- /semantic_web_jena/data/rdfs_sample_1.n3: -------------------------------------------------------------------------------- 1 | @prefix : . 2 | @prefix kb: . 3 | @prefix rdf: . 4 | 5 | kb:Business a :Class; 6 | :label "Business"; 7 | :subClassOf kb:Organization . 8 | 9 | kb:Customer a :Class; 10 | :label "Customer"; 11 | :subClassOf kb:Business . 12 | 13 | kb:Organization a :Class; 14 | :label "Organization" . 15 | 16 | kb:organizationName a rdf:Property; 17 | :domain kb:Organization; 18 | :range . 19 | -------------------------------------------------------------------------------- /semantic_web_jena/data/rdfs_sample_1.owl: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /semantic_web_jena/data/rdfs_sample_2.n3: -------------------------------------------------------------------------------- 1 | #Processed by Id: cwm.py,v 1.197 2007/12/13 15:38:39 syosi Exp 2 | # using base file:///Users/markw/Documents/WORK/ruby_scripting_book/src/part2/data/zzzzz.rdf 3 | 4 | # Notation3 generation by 5 | # notation3.py,v 1.200 2007/12/11 21:18:08 syosi Exp 6 | 7 | # Base was: file:///Users/markw/Documents/WORK/ruby_scripting_book/src/part2/data/zzzzz.rdf 8 | @prefix : . 9 | @prefix owl: . 10 | @prefix rdfs: . 11 | @prefix rdfs_sample_1: . 12 | @prefix xsd: . 13 | 14 | a owl:Ontology . 15 | 16 | rdfs_sample_1:FreshVeggies a :Business . 17 | 18 | :Business a owl:Class; 19 | rdfs:label "Business"; 20 | rdfs:subClassOf :Organization . 21 | 22 | :Customer a owl:Class; 23 | rdfs:label "Customer"; 24 | rdfs:subClassOf :Business . 25 | 26 | :Organization a owl:Class; 27 | rdfs:label "Organization" . 28 | 29 | :organizationName a owl:ObjectProperty; 30 | rdfs:domain :Organization; 31 | rdfs:range xsd:stringstring . 32 | 33 | xsd:stringstring a owl:Class . 34 | 35 | #ENDS 36 | -------------------------------------------------------------------------------- /semantic_web_jena/data/rdfs_sample_2.owl: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | ]> 13 | 14 | 15 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | Business 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | Customer 44 | 45 | 46 | 47 | 48 | 49 | Organization 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /semantic_web_jena/data/sample_news.n3: -------------------------------------------------------------------------------- 1 | @prefix kb: . 2 | 3 | kb:oak_creek_flooding kb:storyType kb:disaster ; 4 | kb:summary "Oak Creek flooded last week affecting 5 businesses" ; 5 | kb:title "Oak Creek Flood" . 6 | 7 | kb:bear_mountain_fire kb:storyType kb:disaster ; 8 | kb:summary "The fire on Bear Mountain was caused by lightning" ; 9 | kb:title "Bear Mountain Fire" . 10 | 11 | kb:trout_season kb:storyType kb:sports , kb:recreation ; 12 | kb:summary "Fishing was good the first day of trout season" ; 13 | kb:title "Trout Season Starts" . 14 | 15 | kb:jc_basketball kb:storyType kb:sports ; 16 | kb:summary "Local JC Basketball team lost by 12 points last night" ; 17 | kb:title "Local JC Lost Last Night" . 18 | -------------------------------------------------------------------------------- /semantic_web_jena/data/sample_news.nt: -------------------------------------------------------------------------------- 1 | . 2 | 3 | "Oak Creek flooded last week affecting 5 businesses" . 4 | 5 | . 6 | 7 | "The fire on Bear Mountain was caused by lightening" . 8 | 9 | . 10 | 11 | . 12 | 13 | . 14 | -------------------------------------------------------------------------------- /semantic_web_jena/data/sparql_ask_test.txt: -------------------------------------------------------------------------------- 1 | PREFIX kb: 2 | ASK 3 | WHERE { ?article_uri ?predicate "Trout Season Starts" } 4 | 5 | PREFIX kb: 6 | ASK 7 | WHERE { ?article_uri kb:copyright ?copyright_value } 8 | -------------------------------------------------------------------------------- /semantic_web_jena/data/sparql_combine_rdfs_test.txt: -------------------------------------------------------------------------------- 1 | @prefix kb: . 2 | @prefix testnews: . 3 | @prefix rdfs: . 4 | 5 | kb:title rdfs:subPropertyOf testnews:title . 6 | testnews:title rdfs:subPropertyOf kb:title . 7 | 8 | kb:oak_creek_flooding kb:storyType kb:disaster ; 9 | kb:summary "Oak Creek flooded last week affecting 5 businesses" ; 10 | kb:title "Oak Creek Flood" . 11 | 12 | PREFIX kb: 13 | PREFIX kb: 14 | 15 | SELECT DISTINCT ?article_uri1 ?object 16 | WHERE { 17 | ?article_uri1 kb:title ?object . 18 | } 19 | 20 | SELECT DISTINCT ?article_uri1 ?object 21 | WHERE { 22 | ?article_uri1 testnews:title ?object . 23 | } 24 | -------------------------------------------------------------------------------- /semantic_web_jena/data/sparql_combine_test.txt: -------------------------------------------------------------------------------- 1 | @prefix kb: . 2 | @prefix testnews: . 3 | @prefix owl: . 4 | 5 | kb:title owl:sameAs testnews:title . 6 | 7 | kb:oak_creek_flooding kb:storyType kb:disaster ; 8 | kb:summary "Oak Creek flooded last week affecting 5 businesses" ; 9 | kb:title "Oak Creek Flood" . 10 | 11 | PREFIX kb: 12 | PREFIX kb: 13 | 14 | SELECT DISTINCT ?article_uri1 ?object 15 | WHERE { 16 | ?article_uri1 kb:title ?object . 17 | } 18 | 19 | # works if OWL inferencing is supported: 20 | SELECT DISTINCT ?article_uri1 ?object 21 | WHERE { 22 | ?article_uri1 testnews:title ?object . 23 | } 24 | -------------------------------------------------------------------------------- /semantic_web_jena/data/sparql_construct_test.txt: -------------------------------------------------------------------------------- 1 | 2 | PREFIX kb: 3 | CONSTRUCT { ?story_type kb:subject_of ?article1 . } 4 | WHERE { 5 | { ?article1 ?story_type kb:sports } UNION { ?article1 ?story_type kb:recreation } . 6 | } 7 | 8 | PREFIX kb: 9 | CONSTRUCT { ?story_type kb:subject_of ?article1 . } 10 | WHERE { 11 | { ?article1 ?story_type kb:sports } UNION { ?article1 ?story_type kb:recreation } . 12 | { ?article2 ?story_type kb:sports } UNION { ?article2 ?story_type kb:recreation } . 13 | FILTER (?article1 != ?article2) 14 | } 15 | 16 | PREFIX kb: 17 | CONSTRUCT { ?story_type kb:subject_of ?article1 . } 18 | WHERE { 19 | ?article1 ?story_type ?story_type_value . 20 | ?article2 ?story_type ?story_type_value . 21 | FILTER ((?article1 != ?article2) && ((?story_type_value = kb:sports) || (?story_type_value = kb:recreation))) 22 | } 23 | 24 | PREFIX kb: 25 | CONSTRUCT { ?story_type_value kb:subject_of ?article1 . } 26 | WHERE { 27 | ?article1 ?story_type ?story_type_value . 28 | ?article2 ?story_type ?story_type_value . 29 | FILTER ((?article1 != ?article2) && ((?story_type_value = kb:sports) || (?story_type_value = kb:recreation))) 30 | } 31 | 32 | PREFIX kb: 33 | CONSTRUCT { ?story_type_value kb:subject_of ?article1 . 34 | ?article2 kb:same_topic ?article1 .} 35 | WHERE { 36 | ?article1 ?story_type ?story_type_value . 37 | ?article2 ?story_type ?story_type_value . 38 | FILTER ((?article1 != ?article2) && ((?story_type_value = kb:sports) || (?story_type_value = kb:recreation))) 39 | } 40 | -------------------------------------------------------------------------------- /semantic_web_jena/data/sparql_describe_test.txt: -------------------------------------------------------------------------------- 1 | PREFIX kb: 2 | DESCRIBE ?article_uri 3 | WHERE { ?article_uri kb:title "Trout Season Starts" } 4 | 5 | PREFIX kb: 6 | DESCRIBE ?article_uri ?predicate 7 | WHERE { ?article_uri ?predicate "Trout Season Starts" } 8 | -------------------------------------------------------------------------------- /semantic_web_jena/data/sparql_select_test.txt: -------------------------------------------------------------------------------- 1 | @prefix kb: . 2 | 3 | kb:oak_creek_flooding kb:storyType kb:disaster ; 4 | kb:summary "Oak Creek flooded last week affecting 5 businesses" ; 5 | kb:title "Oak Creek Flood" . 6 | 7 | kb:bear_mountain_fire kb:storyType kb:disaster ; 8 | kb:summary "The fire on Bear Mountain was caused by lightning" ; 9 | kb:title "Bear Mountain Fire" . 10 | 11 | kb:trout_season kb:storyType kb:sports , kb:recreation ; 12 | kb:summary "Fishing was good the first day of trout season" ; 13 | kb:title "Trout Season Starts" . 14 | 15 | kb:jc_basketball kb:storyType kb:sports ; 16 | kb:summary "Local JC Basketball team lost by 12 points last night" ; 17 | kb:title "Local JC Lost Last Night" . 18 | 19 | 20 | 21 | PREFIX kb: 22 | SELECT DISTINCT ?article_uri1 ?article_uri2 ?predicate1 ?predicate2 23 | WHERE { 24 | ?article_uri1 ?predicate1 ?same_object . 25 | ?article_uri2 ?predicate2 ?same_object . 26 | FILTER (sameTerm(?predicate1, ?predicate2) && !sameTerm(?article_uri1, ?article_uri2)) . 27 | } 28 | 29 | 30 | PREFIX kb: 31 | SELECT DISTINCT ?article_uri1 ?article_uri2 ?predicate1 ?predicate2 32 | WHERE { 33 | ?article_uri1 ?predicate1 "Trout Season Starts" . 34 | ?article_uri2 ?predicate2 "Trout Season Starts" . 35 | FILTER (!sameTerm(?article_uri1, ?article_uri2)) . 36 | } 37 | 38 | PREFIX kb: 39 | SELECT DISTINCT ?article_uri1 ?article_uri2 ?predicate1 ?predicate2 40 | WHERE { 41 | ?article_uri1 ?predicate1 ?o1 . 42 | ?article_uri2 ?predicate2 ?o2 . 43 | FILTER (!sameTerm(?article_uri1, ?article_uri2) && sameTerm(?predicate1, ?predicate2)) . 44 | FILTER regex(?o1, "Season") . 45 | FILTER regex(?o2, "Season") . 46 | } 47 | 48 | PREFIX kb: 49 | SELECT ?article_uri ?title ?summary 50 | WHERE { 51 | { ?article_uri kb:storyType kb:sports } UNION { ?article_uri kb:storyType kb:recreation } . 52 | ?article_uri kb:title ?title . 53 | ?article_uri kb:summary ?summary . 54 | } 55 | 56 | PREFIX kb: 57 | SELECT DISTINCT ?article_uri ?title ?summary 58 | WHERE { 59 | { ?article_uri kb:storyType kb:sports } UNION { ?article_uri kb:storyType kb:recreation } . 60 | ?article_uri kb:title ?title . 61 | ?article_uri kb:summary ?summary . 62 | } 63 | 64 | 65 | # new triple, without a summary: 66 | kb:jc_bowling kb:storyType kb:sports ; 67 | kb:title "JC Bowling Team to Open Season" . 68 | 69 | PREFIX kb: 70 | SELECT DISTINCT ?title ?summary 71 | WHERE { ?article_uri kb:title ?title . 72 | OPTIONAL { ?article_uri kb:summary ?summary } 73 | } 74 | 75 | PREFIX kb: 76 | SELECT DISTINCT ?title ?summary ?page_count 77 | WHERE { ?article_uri kb:title ?title . 78 | OPTIONAL { ?article_uri kb:summary ?summary } . 79 | OPTIONAL { ?article_uri kb:page_count ?page_count . FILTER (?page_count > 1) } . 80 | } 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /semantic_web_jena/doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to semantic_web_jena_clj 2 | 3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) 4 | -------------------------------------------------------------------------------- /semantic_web_jena/project.clj: -------------------------------------------------------------------------------- 1 | (defproject semantic_web_jena_clj "0.1.0-SNAPSHOT" 2 | :description "Clojure Wrapper for the Apache Jena RDF and SPARQL library" 3 | :url "https://markwatson.com" 4 | :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" 5 | :url "https://www.eclipse.org/legal/epl-2.0/"} 6 | :source-paths ["src"] 7 | :java-source-paths ["src-java"] 8 | :javac-options ["-target" "1.8" "-source" "1.8"] 9 | :dependencies [[org.clojure/clojure "1.11.1"] 10 | ;[com.markwatson/semanticweb "1.0.3-SNAPSHOT"] 11 | [org.apache.derby/derby "10.15.2.0"] 12 | [org.apache.derby/derbytools "10.15.2.0"] 13 | [org.apache.derby/derbyclient "10.15.2.0"] 14 | [org.apache.jena/apache-jena-libs "3.17.0" :extension "pom"]] 15 | :repl-options {:init-ns semantic-web-jena-clj.core}) 16 | -------------------------------------------------------------------------------- /semantic_web_jena/src-java/main/java/com/markwatson/semanticweb/Cache.java: -------------------------------------------------------------------------------- 1 | package com.markwatson.semanticweb; 2 | 3 | import java.sql.*; 4 | 5 | import org.apache.commons.lang3.SerializationUtils; 6 | import org.apache.derby.client.*; 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | import java.util.Properties; 10 | 11 | public class Cache { 12 | public Cache() throws SQLException, ClassNotFoundException { 13 | Properties props = new Properties(); 14 | String dbName = "./sparqlCache.db"; 15 | conn = DriverManager.getConnection(protocol + dbName 16 | + ";create=true", props); 17 | conn.setAutoCommit(true); 18 | try { 19 | Statement s = conn.createStatement(); 20 | s.execute("CREATE TABLE cache (query varchar(3000) PRIMARY KEY, result blob)"); 21 | System.out.println("Created table 'cache'"); 22 | } catch (Exception ex) { 23 | //System.out.println("Error (Cache()): " + ex.getMessage()); 24 | //System.out.println("Table 'cache' already exists"); 25 | } 26 | } 27 | public void saveQueryResultInCache (String query, byte [] result) { 28 | try { 29 | if (fetchResultFromCache(query) != null) { 30 | //System.out.println("Query is already in the cache"); 31 | return; 32 | } 33 | Statement s = conn.createStatement(); 34 | PreparedStatement ps = conn.prepareStatement( 35 | "insert into cache (query, result) values (?, ?)"); 36 | ps.setString(1, query); 37 | ps.setBytes(2, result); 38 | ps.executeUpdate(); 39 | } catch (Exception ex) { 40 | System.out.println("Error (saveQueryResultInCache): " + ex.getMessage()); 41 | } 42 | } 43 | 44 | public byte [] fetchResultFromCache (String query) { 45 | try { 46 | Statement s = conn.createStatement(); 47 | PreparedStatement ps = conn.prepareStatement( 48 | "select result from cache where query = ?"); 49 | ps.setString(1, query); 50 | ResultSet rs = ps.executeQuery(); 51 | if (!rs.next()) return null; 52 | return rs.getBytes(1); 53 | } catch (Exception ex) { 54 | System.out.println("Error (fetchResultFromCache): " + ex.getMessage()); 55 | return null; 56 | } 57 | } 58 | 59 | private Connection conn = null; 60 | private String protocol = "jdbc:derby:"; 61 | 62 | } 63 | -------------------------------------------------------------------------------- /semantic_web_jena/src-java/main/java/com/markwatson/semanticweb/JenaApis.java: -------------------------------------------------------------------------------- 1 | package com.markwatson.semanticweb; 2 | 3 | import org.apache.commons.lang3.SerializationUtils; 4 | import org.apache.jena.query.*; 5 | import org.apache.jena.rdf.model.*; 6 | import org.apache.jena.riot.RDFDataMgr; 7 | import org.apache.jena.riot.RDFFormat; 8 | 9 | import java.io.FileNotFoundException; 10 | import java.io.FileOutputStream; 11 | import java.io.IOException; 12 | import java.sql.Blob; 13 | import java.sql.SQLException; 14 | import java.util.ArrayList; 15 | import java.util.List; 16 | import java.util.Scanner; 17 | 18 | public class JenaApis { 19 | 20 | public JenaApis() { 21 | //model = ModelFactory.createDefaultModel(); // use if OWL reasoning not required 22 | model = ModelFactory.createOntologyModel(); // use OWL reasoner 23 | } 24 | 25 | public Model model() { 26 | return model; 27 | } 28 | 29 | public void loadRdfFile(String fpath) { 30 | model.read(fpath); 31 | } 32 | 33 | public void saveModelToTurtleFormat(String outputPath) throws IOException { 34 | FileOutputStream fos = new FileOutputStream(outputPath); 35 | RDFDataMgr.write(fos, model, RDFFormat.TRIG_PRETTY); 36 | fos.close(); 37 | } 38 | public void saveModelToN3Format(String outputPath) throws IOException { 39 | FileOutputStream fos = new FileOutputStream(outputPath); 40 | RDFDataMgr.write(fos, model, RDFFormat.NTRIPLES); 41 | fos.close(); 42 | } 43 | 44 | public QueryResult query(String sparqlQuery) { 45 | Query query = QueryFactory.create(sparqlQuery); 46 | QueryExecution qexec = QueryExecutionFactory.create(query, model); 47 | ResultSet results = qexec.execSelect(); 48 | QueryResult qr = new QueryResult(results.getResultVars()); 49 | for (; results.hasNext(); ) { 50 | QuerySolution solution = results.nextSolution(); 51 | List newResultRow = new ArrayList(); 52 | for (String var : qr.variableList) { 53 | newResultRow.add(solution.get(var).toString()); 54 | } 55 | qr.rows.add(newResultRow); 56 | } 57 | return qr; 58 | } 59 | 60 | public QueryResult queryRemote(String service, String sparqlQuery) throws SQLException, ClassNotFoundException { 61 | if (cache == null) cache = new Cache(); 62 | byte [] b = cache.fetchResultFromCache(sparqlQuery); 63 | if (b != null) { 64 | //System.out.println("Found query in cache."); 65 | QueryResult l = SerializationUtils.deserialize(b); 66 | return l; 67 | } 68 | Query query = QueryFactory.create(sparqlQuery); 69 | QueryExecution qexec = QueryExecutionFactory.sparqlService(service, sparqlQuery); 70 | ResultSet results = qexec.execSelect(); 71 | QueryResult qr = new QueryResult(results.getResultVars()); 72 | for (; results.hasNext(); ) { 73 | QuerySolution solution = results.nextSolution(); 74 | List newResultRow = new ArrayList(); 75 | for (String var : qr.variableList) { 76 | newResultRow.add(solution.get(var).toString()); 77 | } 78 | qr.rows.add(newResultRow); 79 | } 80 | byte [] b3 = SerializationUtils.serialize(qr); 81 | cache.saveQueryResultInCache(sparqlQuery, b3); 82 | return qr; 83 | } 84 | 85 | private Cache cache = null; 86 | private Model model; 87 | 88 | public static void main(String[] args) { 89 | /* 90 | Execute using, for example: 91 | mvn exec:java -Dexec.mainClass="com.markwatson.semanticweb.JenaApis" \ 92 | -Dexec.args="data/news.n3" 93 | */ 94 | JenaApis ja = new JenaApis(); 95 | System.out.println(args.length); 96 | if (args.length == 0) { 97 | // no RDF input file names on command line so use a default file: 98 | ja.loadRdfFile("data/news.n3"); 99 | } else { 100 | for (String fpath : args) { 101 | ja.loadRdfFile(fpath); 102 | } 103 | } 104 | System.out.println("Multi-line queries are OK but don't use blank lines."); 105 | System.out.println("Enter a blank line to process query."); 106 | while (true) { 107 | System.out.println("Enter a SPARQL query:"); 108 | Scanner sc = new Scanner(System.in); 109 | StringBuilder sb = new StringBuilder(); 110 | while (sc.hasNextLine()) { //until no other inputs to proceed 111 | String s = sc.nextLine(); 112 | if (s.equals("quit") || s.equals("QUIT") || s.equals("exit") || s.equals("EXIT")) 113 | System.exit(0); 114 | if (s.length() < 1) break; 115 | sb.append(s); 116 | sb.append("\n"); 117 | } 118 | QueryResult qr = ja.query(sb.toString()); 119 | System.out.println(qr); 120 | } 121 | } 122 | } -------------------------------------------------------------------------------- /semantic_web_jena/src-java/main/java/com/markwatson/semanticweb/QueryResult.java: -------------------------------------------------------------------------------- 1 | package com.markwatson.semanticweb; 2 | 3 | import java.io.Serializable; 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | 7 | public class QueryResult implements Serializable { 8 | private QueryResult() { } 9 | public QueryResult(List variableList) { 10 | this.variableList = variableList; 11 | } 12 | public List variableList; 13 | public List> rows = new ArrayList(); 14 | public String toString() { 15 | StringBuilder sb = new StringBuilder("[QueryResult vars:" + variableList + "\nRows:\n"); 16 | for (List row : rows) { 17 | sb.append(" " + row + "\n"); 18 | } 19 | return sb.toString(); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /semantic_web_jena/src-java/main/resources/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /semantic_web_jena/src/semantic_web_jena_clj/core.clj: -------------------------------------------------------------------------------- 1 | (ns semantic-web-jena-clj.core 2 | (:import (com.markwatson.semanticweb JenaApis Cache QueryResult))) 3 | 4 | (defn- get-jena-api-model "get a default model with OWL reasoning" [] 5 | (new JenaApis)) 6 | 7 | (defonce model (get-jena-api-model)) 8 | 9 | (defn- results->clj [results] 10 | (let [variable-list (seq (. results variableList)) 11 | bindings-list (seq (map seq (. results rows)))] 12 | (cons variable-list bindings-list))) 13 | 14 | (defn load-rdf-file [fpath] 15 | (. model loadRdfFile fpath)) 16 | 17 | (defn query "SPARQL query" [sparql-query] 18 | (results->clj (. model query sparql-query))) 19 | 20 | (defn query-remote "remote service like DBPedia, etc." [remote-service sparql-query] 21 | (results->clj (. model queryRemote remote-service sparql-query))) 22 | 23 | (defn query-dbpedia [sparql-query] 24 | (query-remote "https://dbpedia.org/sparql" sparql-query)) 25 | 26 | (defn query-wikidata [sparql-query] 27 | (query-remote "https://query.wikidata.org/bigdata/namespace/wdq/sparql" sparql-query)) 28 | -------------------------------------------------------------------------------- /semantic_web_jena/test/semantic_web_jena_clj/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns semantic-web-jena-clj.core-test 2 | (:require [clojure.pprint :as pprint] 3 | [clojure.test :refer :all] 4 | [semantic-web-jena-clj.core :refer :all])) 5 | 6 | (deftest load-data-and-sample-queries 7 | (testing "Load local triples files and make some SPARQL queries" 8 | (load-rdf-file "data/sample_news.nt") 9 | (let [results (query "select * { ?s ?p ?o } limit 5")] 10 | (pprint/pprint results) 11 | (is (= (count results) 6))))) 12 | 13 | (deftest dbpedia-test 14 | (testing "Try SPARQL query to DBPedia endpoint" 15 | (pprint/pprint 16 | (query-dbpedia 17 | "select ?p where { ?p . } limit 10")))) 18 | 19 | (deftest wikidata-test 20 | (testing "Try SPARQL query to WikiData endpoint" 21 | (pprint/pprint 22 | (query-dbpedia 23 | "select * where { ?subject ?property ?object . } limit 10")))) 24 | -------------------------------------------------------------------------------- /simple_rdf_sparql/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | .idea 5 | .lsp 6 | profiles.clj 7 | pom.xml 8 | pom.xml.asc 9 | *.jar 10 | *.class 11 | /.lein-* 12 | /.nrepl-port 13 | /.prepl-port 14 | .hgignore 15 | .hg/ 16 | -------------------------------------------------------------------------------- /simple_rdf_sparql/README.md: -------------------------------------------------------------------------------- 1 | # simple_rdf_sparql 2 | 3 | TBD 4 | 5 | ## License 6 | 7 | Copyright © 2021 Mark Watson 8 | 9 | This program and the accompanying materials are made available under the 10 | terms of the Eclipse Public License 2.0 which is available at 11 | http://www.eclipse.org/legal/epl-2.0. 12 | 13 | This Source Code may also be made available under the following Secondary 14 | Licenses when the conditions for such availability set forth in the Eclipse 15 | Public License, v. 2.0 are satisfied: GNU General Public License as published by 16 | the Free Software Foundation, either version 2 of the License, or (at your 17 | option) any later version, with the GNU Classpath Exception which is available 18 | at https://www.gnu.org/software/classpath/license.html. 19 | -------------------------------------------------------------------------------- /simple_rdf_sparql/doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to Simple RDF SPARQL 2 | 3 | -------------------------------------------------------------------------------- /simple_rdf_sparql/project.clj: -------------------------------------------------------------------------------- 1 | (defproject simple_rdf_sparql "0.1.0-SNAPSHOT" 2 | :description "FIXME: write description" 3 | :url "http://example.com/FIXME" 4 | :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" 5 | :url "https://www.eclipse.org/legal/epl-2.0/"} 6 | :dependencies [[org.clojure/clojure "1.10.1"]] 7 | :repl-options {:init-ns simple-rdf-sparql.core} 8 | :main simple-rdf-sparql.core) 9 | -------------------------------------------------------------------------------- /simple_rdf_sparql/src/simple_rdf_sparql/core.clj: -------------------------------------------------------------------------------- 1 | ;; Simple RDF SPARQL. Copyright 2024 Mark Watson. All rights reserved. 2 | ;; GNU AFFERO GENERAL PUBLIC LICENSE Version 3 3 | 4 | (ns simple-rdf-sparql.core 5 | ;;(:require [clojure.pprint :refer [pprint]]) 6 | (:require [clojure.string :as str])) 7 | 8 | ;; RDF triple structure 9 | (defrecord Triple [subject predicate object]) 10 | 11 | ;; RDF datastore 12 | (def ^:dynamic *rdf-store* (atom [])) 13 | 14 | ;; Add a triple to the datastore 15 | (defn add-triple [subject predicate object] 16 | (swap! *rdf-store* conj (->Triple subject predicate object))) 17 | 18 | ;; Remove a triple from the datastore 19 | (defn remove-triple [subject predicate object] 20 | (swap! *rdf-store* (fn [store] 21 | (remove #(and (= (:subject %) subject) 22 | (= (:predicate %) predicate) 23 | (= (:object %) object)) 24 | store)))) 25 | 26 | ;; Helper function to check if a string is a variable 27 | (defn variable? [s] 28 | (and (string? s) (not (empty? s)) (= (first s) \?))) 29 | 30 | ;; Convert triple to binding 31 | (defn triple-to-binding [triple pattern] 32 | (into {} 33 | (filter second 34 | (map (fn [field pattern-item] 35 | (when (variable? pattern-item) 36 | [pattern-item (field triple)])) 37 | [:subject :predicate :object] 38 | pattern)))) 39 | 40 | (defn query-triples [subject predicate object] 41 | (filter (fn [triple] 42 | (and (or (nil? subject) (variable? subject) (= (:subject triple) subject)) 43 | (or (nil? predicate) (variable? predicate) (= (:predicate triple) predicate)) 44 | (or (nil? object) (variable? object) (= (:object triple) object)))) 45 | @*rdf-store*)) 46 | 47 | ;; Print all triples in the datastore 48 | (defn print-all-triples [] 49 | (println "All triples in the datastore:") 50 | (doseq [triple @*rdf-store*] 51 | (println (str (:subject triple) " " (:predicate triple) " " (:object triple)))) 52 | (println)) 53 | 54 | ;; SPARQL query structure 55 | (defrecord SPARQLQuery [select-vars where-patterns]) 56 | 57 | ;; Apply bindings to a pattern 58 | (defn apply-bindings [pattern bindings] 59 | (mapv (fn [item] 60 | (if (variable? item) 61 | (get bindings item item) 62 | item)) 63 | pattern)) 64 | 65 | ;; Merge bindings 66 | (defn merge-bindings [binding1 binding2] 67 | (merge binding1 binding2)) 68 | 69 | (defn parse-where-patterns [where-clause] 70 | (loop [tokens where-clause 71 | current-pattern [] 72 | patterns []] 73 | (cond 74 | (empty? tokens) 75 | (if (empty? current-pattern) 76 | patterns 77 | (conj patterns current-pattern)) 78 | 79 | (= (first tokens) ".") 80 | (recur (rest tokens) 81 | [] 82 | (if (empty? current-pattern) 83 | patterns 84 | (conj patterns current-pattern))) 85 | 86 | :else 87 | (recur (rest tokens) 88 | (conj current-pattern (first tokens)) 89 | patterns)))) 90 | 91 | (defn parse-sparql-query [query-string] 92 | (let [tokens (remove #{"{" "}"} (str/split query-string #"\s+")) 93 | select-index (.indexOf tokens "select") 94 | where-index (.indexOf tokens "where") 95 | select-vars (subvec (vec tokens) (inc select-index) where-index) 96 | where-clause (subvec (vec tokens) (inc where-index)) 97 | where-patterns (parse-where-patterns where-clause)] 98 | (->SPARQLQuery select-vars where-patterns))) 99 | 100 | (defn remove-duplicate-bindings [bindings] 101 | (into {} bindings)) 102 | 103 | (defn project-results [results select-vars] 104 | (if (= select-vars ["*"]) 105 | (map remove-duplicate-bindings results) 106 | (map (fn [result] 107 | (remove-duplicate-bindings 108 | (select-keys result select-vars))) 109 | results))) 110 | 111 | ;; Execute WHERE patterns with bindings 112 | (defn execute-where-patterns-with-bindings [patterns bindings] 113 | (if (empty? patterns) 114 | [bindings] 115 | (let [pattern (first patterns) 116 | remaining-patterns (rest patterns) 117 | bound-pattern (apply-bindings pattern bindings) 118 | matching-triples (apply query-triples bound-pattern) 119 | new-bindings (map #(merge-bindings bindings (triple-to-binding % pattern)) 120 | matching-triples)] 121 | (if (empty? remaining-patterns) 122 | new-bindings 123 | (mapcat #(execute-where-patterns-with-bindings remaining-patterns %) 124 | new-bindings))))) 125 | 126 | (defn execute-where-patterns [patterns] 127 | (if (empty? patterns) 128 | [{}] 129 | (let [pattern (first patterns) 130 | remaining-patterns (rest patterns) 131 | matching-triples (apply query-triples pattern) 132 | bindings (map #(triple-to-binding % pattern) matching-triples)] 133 | (if (empty? remaining-patterns) 134 | bindings 135 | (mapcat (fn [binding] 136 | (let [results (execute-where-patterns-with-bindings remaining-patterns binding)] 137 | (map #(merge-bindings binding %) results))) 138 | bindings))))) 139 | 140 | (defn execute-sparql-query [query-string] 141 | (let [query (parse-sparql-query query-string) 142 | where-patterns (:where-patterns query) 143 | select-vars (:select-vars query) 144 | results (execute-where-patterns where-patterns) 145 | projected-results (project-results results select-vars)] 146 | projected-results)) 147 | 148 | (defn print-query-results [query-string] 149 | (println "Query:" query-string) 150 | (let [results (execute-sparql-query query-string)] 151 | (println "Final Results:") 152 | (if (empty? results) 153 | (println " No results") 154 | (doseq [result results] 155 | (println " " (str/join ", " (map (fn [[k v]] (str k ": " v)) result))))) 156 | (println))) 157 | 158 | (defn -main [] 159 | (reset! *rdf-store* []) 160 | 161 | (add-triple "John" "age" "30") 162 | (add-triple "John" "likes" "pizza") 163 | (add-triple "Mary" "age" "25") 164 | (add-triple "Mary" "likes" "sushi") 165 | (add-triple "Bob" "age" "35") 166 | (add-triple "Bob" "likes" "burger") 167 | 168 | (print-all-triples) 169 | 170 | (print-query-results "select * where { ?name age ?age . ?name likes ?food }") 171 | (print-query-results "select ?s ?o where { ?s likes ?o }") 172 | (print-query-results "select * where { ?name age ?age . ?name likes pizza }")) 173 | -------------------------------------------------------------------------------- /simple_rdf_sparql/test/simple_rdf_sparql/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns simple-rdf-sparql.core-test 2 | (:require [clojure.test :refer :all] 3 | [simple-rdf-sparql :refer :all])) 4 | 5 | 6 | (deftest search-test 7 | (testing "Simple RDF SPARQL" 8 | (is (= 0 0)))) 9 | -------------------------------------------------------------------------------- /webscraping/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | profiles.clj 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | /.prepl-port 12 | .hgignore 13 | .hg/ 14 | -------------------------------------------------------------------------------- /webscraping/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/). 3 | 4 | ## [Unreleased] 5 | ### Changed 6 | - Add a new arity to `make-widget-async` to provide a different widget shape. 7 | 8 | ## [0.1.1] - 2021-05-13 9 | ### Changed 10 | - Documentation on how to make the widgets. 11 | 12 | ### Removed 13 | - `make-widget-sync` - we're all async, all the time. 14 | 15 | ### Fixed 16 | - Fixed widget maker to keep working when daylight savings switches over. 17 | 18 | ## 0.1.0 - 2021-05-13 19 | ### Added 20 | - Files from the new template. 21 | - Widget maker public API - `make-widget-sync`. 22 | 23 | [Unreleased]: https://sourcehost.site/your-name/webscraping/compare/0.1.1...HEAD 24 | [0.1.1]: https://sourcehost.site/your-name/webscraping/compare/0.1.0...0.1.1 25 | -------------------------------------------------------------------------------- /webscraping/README.md: -------------------------------------------------------------------------------- 1 | # webscraping 2 | 3 | A Clojure library using the Java jsoup library. 4 | 5 | ## Usage 6 | 7 | Run tests: 8 | 9 | lein test 10 | 11 | Look at the tests for API usage. 12 | 13 | Run main function in core.clj that contains a simple example: 14 | 15 | lein run 16 | 17 | ## Code for my book "Practical Artificial Intelligence Programming With Clojure" 18 | 19 | You can read the book for this example code for free online: [https://leanpub.com/clojureai/read](https://leanpub.com/clojureai/read). 20 | 21 | If you would like to pay me for a copy of this book then please visit [https://leanpub.com/clojureai](https://leanpub.com/clojureai). 22 | 23 | Please visit see my website [https://markwatson.com](https://markwatson.com). 24 | 25 | 26 | ## License 27 | 28 | Copyright © 2021 Mark Watson 29 | 30 | This program and the accompanying materials are made available under the 31 | terms of the Eclipse Public License 2.0 which is available at 32 | http://www.eclipse.org/legal/epl-2.0. 33 | 34 | This Source Code may also be made available under the following Secondary 35 | Licenses when the conditions for such availability set forth in the Eclipse 36 | Public License, v. 2.0 are satisfied: GNU General Public License as published by 37 | the Free Software Foundation, either version 2 of the License, or (at your 38 | option) any later version, with the GNU Classpath Exception which is available 39 | at https://www.gnu.org/software/classpath/license.html. 40 | -------------------------------------------------------------------------------- /webscraping/project.clj: -------------------------------------------------------------------------------- 1 | (defproject webscraping "0.1.0-SNAPSHOT" 2 | :description "Demonstration of using Java Jsoup library" 3 | :url "http://markwatson.com" 4 | :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" 5 | :url "https://www.eclipse.org/legal/epl-2.0/"} 6 | :dependencies [[org.clojure/clojure "1.11.1"] 7 | [org.jsoup/jsoup "1.17.2"]] 8 | :main webscraping.core 9 | :repl-options {:init-ns webscraping.core}) 10 | -------------------------------------------------------------------------------- /webscraping/src/webscraping/core.clj: -------------------------------------------------------------------------------- 1 | (ns webscraping.core 2 | (:require [clojure.string :as str])) 3 | 4 | (import (org.jsoup Jsoup)) 5 | 6 | (defn get-html-anchors [jsoup-web-page-contents] 7 | (let [anchors (. jsoup-web-page-contents select "a[href]")] 8 | (->> anchors 9 | (map (fn [anchor] 10 | (try 11 | {:text (str/trim (. anchor text)) 12 | :uri (. anchor absUrl "href")} 13 | (catch Exception e 14 | (binding [*out* *err*] (println (str "Error processing anchor: " (.getMessage e) " on page: " (. jsoup-web-page-contents title)))) 15 | nil)))) 16 | (filterv some?)))) 17 | 18 | (defn fetch-web-page-data 19 | "Get the anchor data and full text from a web URI" 20 | [a-uri] 21 | (let [doc 22 | (-> 23 | (. Jsoup connect a-uri) 24 | (.userAgent 25 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.0; rv:77.0) Gecko/20100101 Firefox/77.0") 26 | (.timeout 20000) 27 | (.get)) 28 | all-page-text (. doc text) 29 | anchors (get-html-anchors doc)] 30 | {:page-text all-page-text :anchors anchors})) 31 | 32 | (defn -main [& args] 33 | (println "Fetching data from https://markwatson.com...") 34 | (let [page-data (fetch-web-page-data "https://markwatson.com") 35 | anchors (:anchors page-data)] 36 | (if (seq anchors) 37 | (do 38 | (println (str "\nFound " (count anchors) " anchors:")) 39 | (println "\nFirst 5 anchors:") 40 | (doseq [anchor (take 5 anchors)] 41 | (println (str " Text: " (:text anchor))) 42 | (println (str " URI: " (:uri anchor))) 43 | (println "----"))) 44 | (println "\nNo anchors found on the page.")) 45 | (println "\nFetching complete."))) 46 | -------------------------------------------------------------------------------- /webscraping/test/webscraping/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns webscraping.core-test 2 | (:require [clojure.test :refer :all] 3 | [clojure.pprint :as pp] 4 | [clojure.string :as str] ;; Added for string checks 5 | [webscraping.core :refer :all]) 6 | (:import (org.jsoup Jsoup))) ;; Added Jsoup import 7 | 8 | (deftest mark-watson-website-test 9 | (testing "Fetch Mark Watson's website and verify basic anchor extraction" 10 | (let [page-data (fetch-web-page-data "https://markwatson.com")] 11 | (is (string? (:page-text page-data))) 12 | (is (not (str/blank? (:page-text page-data)))) 13 | (is (vector? (:anchors page-data))) 14 | (is (pos? (count (:anchors page-data)))) 15 | (let [anchors (:anchors page-data)] 16 | (is (some #(str/includes? (:text %) "Read My Blog on Blogspot") anchors) 17 | "Expected to find an anchor with 'Read My Blog on Blogspot' in its text") 18 | (is (some #(= (:uri %) "https://mark-watson.blogspot.com/") anchors) 19 | "Expected to find an anchor linking to https://mark-watson.blogspot.com/") 20 | 21 | (is (some #(str/includes? (str/lower-case (:text %)) "clojure") anchors) 22 | "Expected to find an anchor with 'Clojure' (case-insensitive) in its text") 23 | (is (some #(= (:uri %) "https://leanpub.com/clojureai") anchors) 24 | "Expected to find an anchor linking to Leanpub Clojure AI book") 25 | 26 | ;; Add a check for one more reasonably stable link, e.g., "My Books" 27 | (is (some #(and (str/includes? (:text %) "My Books") 28 | (= (:uri %) "https://markwatson.com#books")) 29 | anchors) 30 | "Expected to find an anchor 'My Books' linking to '#books'"))))) 31 | 32 | (deftest no-anchors-test 33 | (testing "Page with no anchor tags" 34 | (let [html-doc (Jsoup/parse "

No links here.

") 35 | anchors (get-html-anchors html-doc)] 36 | (is (empty? anchors) "Expected no anchors from HTML with no links")))) 37 | 38 | (deftest relative-and-absolute-uris-test 39 | (testing "Anchor URI resolution for relative and absolute paths" 40 | (let [base-uri "http://example.com/docs/" 41 | html-content "
Page 1 Page 2 Page 3 Page 4Page 5" 42 | html-doc (. Jsoup parse html-content base-uri) 43 | anchors (get-html-anchors html-doc) 44 | uris (set (map :uri anchors))] 45 | (is (contains? uris "http://example.com/page1") "Relative /page1 should resolve to http://example.com/page1") 46 | (is (contains? uris "http://domain.com/page2") "Absolute http://domain.com/page2 should remain unchanged") 47 | (is (contains? uris "http://example.com/page3") "Relative ../page3 should resolve to http://example.com/page3") 48 | (is (contains? uris "http://example.com/docs/sub/page4") "Relative sub/page4 should resolve to http://example.com/docs/sub/page4") 49 | (is (contains? uris "http://example.com/docs/page5.html") "Relative page5.html should resolve to http://example.com/docs/page5.html")))) 50 | --------------------------------------------------------------------------------