├── doc └── intro.md ├── .gitignore ├── src ├── clj_deepwalk │ └── core.clj ├── graphs │ └── core.clj └── model │ └── core.clj ├── project.clj ├── test └── clj_deepwalk │ └── core_test.clj ├── CHANGELOG.md ├── LICENSE └── README.md /doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to clj-deepwalk 2 | 3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | profiles.clj 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | /.prepl-port 12 | .hgignore 13 | .hg/ 14 | /.clj-kondo 15 | /.lsp -------------------------------------------------------------------------------- /src/clj_deepwalk/core.clj: -------------------------------------------------------------------------------- 1 | (ns clj-deepwalk.core 2 | (:require [graphs.core :as gc] 3 | [model.core :as mc])) 4 | 5 | 6 | (defn deepwalk 7 | "Run DeepWalk algorithm on the given graph" 8 | {:opts [:vector-size 9 | :walk-length 10 | :num-walks 11 | :window-size 12 | :learning-rate]} 13 | [edges & opts] 14 | (let [graph (gc/create-graph edges) 15 | trained-vectors (mc/train-deepwalk graph (apply hash-map opts))] 16 | {:graph graph 17 | :vectors trained-vectors})) 18 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject clj-deepwalk "0.1.0-SNAPSHOT" 2 | :description "A lightweight implementation of the DeepWalk 3 | algorithm for graph embedding in Clojure" 4 | :url "http://example.com/FIXME" 5 | :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" 6 | :url "https://www.eclipse.org/legal/epl-2.0/"} 7 | :dependencies [[org.clojure/clojure "1.11.1"]] 8 | :main ^:skip-aot clj-deepwalk.core 9 | :target-path "target/%s" 10 | :profiles {:uberjar {:aot :all 11 | :jvm-opts ["-Dclojure.compiler.direct-linking=true"]}}) 12 | -------------------------------------------------------------------------------- /test/clj_deepwalk/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns clj-deepwalk.core-test 2 | (:require [clojure.test :refer :all] 3 | [graphs.core :as gc])) 4 | 5 | 6 | (deftest test-create-graph 7 | (let [edges-1 [[:a :b] [:a :c] [:b :d]] 8 | edges-2 [[:a :b] [:c :d]]] 9 | (testing "create-graph" 10 | (is (= {:a #{:b :c} 11 | :b #{:a :d} 12 | :c #{:a} 13 | :d #{:b}} 14 | (gc/create-graph edges-1))) 15 | (is (= {:a #{:b} 16 | :b #{:a} 17 | :c #{:d} 18 | :d #{:c}} 19 | (gc/create-graph edges-2)))))) 20 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/). 3 | 4 | ## [Unreleased] 5 | ### Changed 6 | - Add a new arity to `make-widget-async` to provide a different widget shape. 7 | 8 | ## [0.1.1] - 2024-07-21 9 | ### Changed 10 | - Documentation on how to make the widgets. 11 | 12 | ### Removed 13 | - `make-widget-sync` - we're all async, all the time. 14 | 15 | ### Fixed 16 | - Fixed widget maker to keep working when daylight savings switches over. 17 | 18 | ## 0.1.0 - 2024-07-21 19 | ### Added 20 | - Files from the new template. 21 | - Widget maker public API - `make-widget-sync`. 22 | 23 | [Unreleased]: https://sourcehost.site/your-name/clj-deepwalk/compare/0.1.1...HEAD 24 | [0.1.1]: https://sourcehost.site/your-name/clj-deepwalk/compare/0.1.0...0.1.1 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Mihir Deshpande 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/graphs/core.clj: -------------------------------------------------------------------------------- 1 | (ns graphs.core) 2 | 3 | ;; Graph representation 4 | (defn create-graph 5 | "Create a graph from a sequence of edges" 6 | [edges] 7 | (reduce (fn [graph [u v]] 8 | (-> graph 9 | (update u (fnil conj #{}) v) 10 | (update v (fnil conj #{}) u))) 11 | {} 12 | edges)) 13 | 14 | 15 | (defn get-neighbors 16 | "Get neighbors of a node in the graph" 17 | [graph node] 18 | (get graph node)) 19 | 20 | 21 | (defn create-vocab 22 | "Indexes graph nodes. Creates a map of keys being nodes and the corresponding 23 | values are incrementing integers. As each graph node is analogous to a word in 24 | NLP terms, this function essentially creates vocab - collection of 25 | words (indexed)" 26 | [graph] 27 | (zipmap (keys graph) (range))) 28 | 29 | 30 | ;; Random walk generation 31 | (defn random-walk 32 | "Generate a random walk of length 'walk-length' starting from 'start-node'" 33 | [graph start-node walk-length] 34 | (loop [walk [start-node] 35 | current-node start-node 36 | steps-left (dec walk-length)] 37 | (if (zero? steps-left) 38 | walk 39 | (let [neighbors (get-neighbors graph current-node) 40 | next-node (rand-nth (seq neighbors))] 41 | (recur (conj walk next-node) 42 | next-node 43 | (dec steps-left)))))) 44 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepWalk Clojure Library 2 | 3 | ## Overview 4 | 5 | This library implements the DeepWalk algorithm for learning continuous feature 6 | representations for nodes in networks. DeepWalk uses short random walks to learn 7 | representations of vertices in a graph, which can be used for various downstream 8 | machine learning tasks such as node classification, link prediction, and 9 | community detection. 10 | 11 | More on this here - [Deepwalking in clojure](https://www.mihirdeshpande.com/posts/deepwalk_clojure) 12 | 13 | ## Usage 14 | 15 | ```clojure 16 | (require '[deepwalk.core :as dw]) 17 | 18 | (def edges [[0 1] [1 2] [2 3] [3 0] [0 2] [1 3]]) 19 | (def result (dw/deepwalk edges)) 20 | ``` 21 | 22 | ### Using custom opts 23 | ```clojure 24 | (def result (dw/deepwalk edges 25 | :vector-size 128 26 | :walk-length 20 27 | :num-walks 100 28 | :window-size 10 29 | :learning-rate 0.025)) 30 | ``` 31 | 32 | ### Sample output 33 | ```clojure 34 | => (def edges [[:a :b] [:b :c] [:b :d]]) 35 | 36 | => (dw/deepwalk edges :vector-size 5) 37 | 38 | => {:graph {:a #{:b}, :b #{:c :d :a}, :c #{:b}, :d #{:b}}, 39 | :vectors 40 | [[1.2962443195668192 41 | 0.6028800604250182 42 | 0.651273176448106 43 | 1.2017765618740872 44 | 1.435104748831545] 45 | [1.5642613286885012 46 | 0.7275341153860508 47 | 0.7859331986660882 48 | 1.45026101413665 49 | 1.731833133096857] 50 | [1.280047368643186 51 | 0.5953468981930664 52 | 0.6431353281138268 53 | 1.1867600131417053 54 | 1.4171726963347773] 55 | [1.2820077614684258 56 | 0.5962586720979554 57 | 0.6441202900095928 58 | 1.1885775363615692 59 | 1.4193430966293958]]} 60 | ``` 61 | 62 | ## API Reference 63 | 64 | edges: A sequence of edges representing the graph. Each edge is a vector of two node identifiers. 65 | opts: Optional parameters (key-value pairs) 66 | 67 | - `:vector-size` (default: 64): Dimensionality of the feature representations 68 | - `:walk-length` (default: 10): Length of each random walk 69 | - `:num-walks` (default: 10): Number of random walks per node 70 | - `:window-size` (default: 5): Maximum distance between the current and predicted node in the skip-gram model 71 | - `:learning-rate` (default: 0.025): Learning rate for the skip-gram model 72 | 73 | Returns a map containing: 74 | 75 | - `:graph` The input graph represented as an adjacency list 76 | - `:vectors` The learned feature representations for each node 77 | 78 | 79 | ## Limitations and Future Work 80 | 81 | This implementation is a basic version of DeepWalk and has some limitations: 82 | 83 | - It may not be efficient for very large graphs. 84 | - There currently isn't any parallelization for random walk generation or training. 85 | -------------------------------------------------------------------------------- /src/model/core.clj: -------------------------------------------------------------------------------- 1 | (ns model.core 2 | (:require [graphs.core :as gc])) 3 | 4 | 5 | (defn sigmoid 6 | [x] 7 | (/ 1 (+ 1 (Math/exp (- x))))) 8 | 9 | 10 | (defn train-pair 11 | "Train on a single word pair" 12 | [word context learning-rate vocab vectors] 13 | (let [word-idx (get vocab word) 14 | context-idx (get vocab context) 15 | word-vec (get vectors word-idx) 16 | context-vec (get vectors context-idx) 17 | dot-product (reduce + (map * word-vec context-vec)) 18 | error (- 1 (sigmoid dot-product)) 19 | word-delta (mapv #(* learning-rate error %) context-vec) 20 | context-delta (mapv #(* learning-rate error %) word-vec)] 21 | (-> vectors 22 | (update word-idx #(mapv + % word-delta)) 23 | (update context-idx #(mapv + % context-delta))))) 24 | 25 | 26 | (defn get-context-words 27 | [idx walk window-size] 28 | (->> (range (max 0 29 | (- idx window-size)) 30 | (min (count walk) 31 | (+ idx window-size 1))) 32 | (remove #(= % idx)) 33 | (map #(nth walk %)))) 34 | 35 | 36 | (defn process-walk 37 | "trains the skip gram model for one random walk" 38 | [{:keys [walk embedding vocab window-size learning-rate]}] 39 | (reduce 40 | (fn [embedding [idx word]] 41 | (let [context-words (get-context-words idx walk window-size)] 42 | (reduce 43 | #(train-pair word %2 learning-rate vocab %1) 44 | embedding 45 | context-words))) 46 | embedding 47 | (map-indexed vector walk))) 48 | 49 | 50 | (defn init-embedding 51 | "Creates random embedding for size m x n where m = # of nodes, n = vector-size" 52 | [vector-size vocab-size] 53 | (->> rand 54 | ((partial repeatedly vector-size)) 55 | (partial vec) 56 | ((partial repeatedly vocab-size)) 57 | vec)) 58 | 59 | 60 | (defn train-epochs 61 | [graph {:keys [epochs embedding nodes vocab window-size 62 | learning-rate walk-length]}] 63 | (reduce 64 | (fn [embedding _] 65 | (reduce 66 | (fn [embedding start-node] 67 | (let [walk (gc/random-walk graph start-node walk-length)] 68 | (process-walk {:embedding embedding 69 | :walk walk 70 | :vocab vocab 71 | :window-size window-size 72 | :learning-rate learning-rate}))) 73 | embedding 74 | nodes)) 75 | embedding 76 | epochs)) 77 | 78 | 79 | ;; Training process 80 | (defn train-deepwalk 81 | "Train DeepWalk model" 82 | [graph & {:keys [vector-size walk-length num-walks window-size learning-rate] 83 | :or {vector-size 5 84 | walk-length 10 85 | num-walks 10 86 | window-size 5 87 | learning-rate 0.025}}] 88 | (let [vocab (gc/create-vocab graph) 89 | vocab-size (count vocab) 90 | embedding (init-embedding vector-size vocab-size) 91 | nodes (keys graph) 92 | epochs (range num-walks)] 93 | (train-epochs graph 94 | {:epochs epochs 95 | :embedding embedding 96 | :nodes nodes 97 | :vocab vocab 98 | :window-size window-size 99 | :learning-rate learning-rate 100 | :walk-length walk-length}))) 101 | --------------------------------------------------------------------------------