├── resources └── apple-data.txt.gz ├── test └── clojure_word2vec │ └── core_test.clj ├── project.clj ├── pom.xml.asc ├── README.md ├── src └── clojure_word2vec │ ├── examples.clj │ └── core.clj ├── pom.xml ├── doc └── intro.md └── LICENSE /resources/apple-data.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bridgei2i/clojure-word2vec/HEAD/resources/apple-data.txt.gz -------------------------------------------------------------------------------- /test/clojure_word2vec/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns clojure-word2vec.core-test 2 | (:require [clojure.test :refer :all] 3 | [clojure-word2vec.core :refer :all])) 4 | 5 | (deftest a-test 6 | (testing "FIXME, I fail." 7 | (is (= 0 1)))) 8 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject org.bridgei2i/word2vec "0.1.0" 2 | :description "A Clojure wrapper for the Medallia word2vecJava implementation" 3 | :url "https://github.com/Bridgei2i/clojure-word2vec" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :dependencies [[org.clojure/clojure "1.6.0"] 7 | [incanter "1.5.5"] 8 | [com.medallia.word2vec/Word2VecJava "0.9.0"] 9 | ]) 10 | -------------------------------------------------------------------------------- /pom.xml.asc: -------------------------------------------------------------------------------- 1 | -----BEGIN PGP SIGNATURE----- 2 | Version: GnuPG v1 3 | 4 | iQEcBAABAgAGBQJVE+K1AAoJEO8Ue+azRLbiSqwH/3YSbb1DUjY9B10gJIvJB+KH 5 | XzcVCcXdn0E6qWuSWTDG0g7furQR9dwU+SsKAejqdwHm3XxwnOvTCfHn+xR/8vdZ 6 | ra4CsY5kGxNwBlFMlcKW5wZ671I0GmW0gOKAyHCr+jpCRTNofBfBWjA7jZrGImbh 7 | Tz4ii+L9ztEqC6UEhp5vVrVAGrsdmLIWY3dgdNHn1EbpzElygOrkZDYOlLzweZ6A 8 | BKUY6s0vRzFnqEEyo8pzu7uBqYQBHusD95kQVwMVR0a4w96kq1Hl4FiZ0vj3wfKv 9 | NBeyZVFhTBTsQdQBy+KNXn0UEnOA79tFNqYAJzHB6QNBk1xQsfkBlTAz/QRdrsQ= 10 | =ZD4g 11 | -----END PGP SIGNATURE----- 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # clojure-word2vec 2 | 3 | The [word2vec tool](http://code.google.com/p/word2vec/) by Mikolov et al enables us to 4 | create word vectors from a dataset containing text data. Unlike a binary present/absent representation 5 | used by a bag-of-words, these word vectors can be used to compare 2 words and see if they are related. 6 | 7 | This is a Clojure wrapper of Java implementation of word2vec [available here] (https://github.com/medallia/Word2VecJava). 8 | 9 | ## Installation 10 | 11 | To include word2vec, add the following to your :dependencies section of project.clj 12 | 13 | [![Clojars Project](http://clojars.org/org.bridgei2i/word2vec/latest-version.svg)] 14 | 15 | ## Usage 16 | 17 | First import clojure-word2vec.core into your namespace 18 | 19 | ```clojure 20 | (ns clojure-word2vec.examples 21 | (:require [clojure-word2vec.core :refer :all] 22 | [clojure.java.io :as io])) 23 | ``` 24 | 25 | Download a text corpus and place it in the resources folder. 26 | Here we'll download James Joyce's Ulysses from [Project Gutenberg](https://www.gutenberg.org/ebooks/4300.txt.utf-8). 27 | 28 | 29 | ```clojure 30 | (def data 31 | (create-input-format "ulysses.txt")) 32 | ``` 33 | 34 | Create the model and train it, using the default hyperparameters 35 | ```clojure 36 | (def model (word2vec data)) 37 | ``` 38 | 39 | The hyper parameters can be specified as arguments to word2vec. 40 | 41 | ```clojure 42 | (def model (word2vec data :window-size 15) 43 | ``` 44 | 45 | Find the closest words to a given word 46 | 47 | ```clojure 48 | (get-matches model "woman") 49 | ``` 50 | 51 | ### A longer introduction is available in the [docs](https://github.com/Bridgei2i/clojure-word2vec/blob/master/doc/intro.md) . 52 | 53 | ## License 54 | 55 | Copyright © 2015 Bridgei2i 56 | 57 | Distributed under the Eclipse Public License version 1.0. 58 | -------------------------------------------------------------------------------- /src/clojure_word2vec/examples.clj: -------------------------------------------------------------------------------- 1 | (ns clojure-word2vec.examples 2 | (:require [clojure-word2vec.core :refer :all] 3 | [incanter.stats :as i-stat] 4 | [clojure.edn :as edn] 5 | [clojure.java.io :as io])) 6 | 7 | 8 | (defn read-filtered-dataset 9 | [inpfile] 10 | (with-open [r (io/reader (java.util.zip.GZIPInputStream. 11 | (io/input-stream inpfile)))] 12 | (mapv edn/read-string (line-seq r)))) 13 | 14 | ;let's read the apple dataset and train a word2vec model on the data 15 | (def appvec 16 | (-> (read-filtered-dataset "resources/apple-data.txt.gz") word2vec)) 17 | 18 | ;see the top view words in the vocabulary 19 | (take 20 (.getVocab appvec)) 20 | 21 | ;the total number of words in the vocabulary 22 | (.getVocabSize (.toThrift appvec)) 23 | 24 | ;calculate the cosine similarity between 2 words 25 | ;this is done by fetching the raw word vectors and using 26 | ;incanter's cosine-similarity API 27 | (defn cosine-sim 28 | [model word1 word2] 29 | (let [rawvecfn #(.getRawVector (.forSearch model) %) 30 | [a1 a2] (map rawvecfn [word1 word2])] 31 | (i-stat/cosine-similarity a1 a2))) 32 | ;calculate 33 | (cosine-sim appvec "g5" "ipod") 34 | 35 | ;Some examples of the relations API 36 | ;in the original paper, the example offered was 37 | ;if Paris is related to France, Berlin is related to ? 38 | ;and the query would find Germany as the answer. 39 | ;in the Apple dataset (as with any other dataset), 40 | ;the relationship found are usually noisy. Lets look at some 41 | ;good answers 42 | 43 | ;if nano is a 'kind of' ipod, then g3 is a 44 | (get-relations appvec "nano" "ipod" "g3") 45 | ;an ibook 46 | 47 | ;when we query for a G5 instead (a desktop computer) 48 | (get-relations appvec "nano" "ipod" "g5") 49 | ;we don't find a desktop in the top 5 answers 50 | 51 | ;if ghz is a measure of speed, then gb is a measure of 52 | (get-relations appvec "ghz" "speed" "gb") 53 | ;data, the 4th item on the list 54 | 55 | ;if 300gb is the measure of a drive, then 2ghz is a 56 | (get-relations appvec "300gb" "drive" "2ghz") 57 | ;measure of a processor (2nd item) 58 | 59 | ;sub-type of a product :airport-extreme (a wifi base station made by apple) 60 | ;what's an ipod's type 61 | (get-relations appvec "airport" "extreme" "ipod") 62 | ;nano (2nd item) 63 | 64 | ;we can use the get-matches API to return the words 65 | ;that are closest (by euclidean distance) to the argument 66 | (get-matches appvec "radeon") 67 | (get-matches appvec "seagate") 68 | (get-matches appvec "nano") 69 | (get-matches appvec "projector") 70 | (get-matches appvec "raid") 71 | (get-matches appvec "quicktime") 72 | (get-matches appvec "powermac") 73 | -------------------------------------------------------------------------------- /src/clojure_word2vec/core.clj: -------------------------------------------------------------------------------- 1 | (ns clojure-word2vec.core 2 | (:require [clojure.java.io :as io] 3 | ;[clojure.core.matrix :as mat] 4 | ;[clojure.core.matrix.operators :as matop] 5 | [incanter.stats :as i-stat] 6 | ) 7 | (:import [com.medallia.word2vec Word2VecTrainerBuilder Word2VecModel] 8 | [com.medallia.word2vec.thrift Word2VecModelThrift] 9 | [com.medallia.word2vec.neuralnetwork NeuralNetworkType] 10 | [com.medallia.word2vec.util Common])) 11 | 12 | (defn word2vec 13 | "Return a trained instance of Word2Vec, 14 | The first argument is a seq of seqs, where each seq is a list of words. 15 | The rest of the arguments are hyperparameters used in training the 16 | neural net." 17 | ([sentences & 18 | {:keys [ min-vocab-frequency window-size type layer-size 19 | use-negative-samples downsampling-rate num-iterations num-threads] 20 | :or { min-vocab-frequency 5 21 | window-size 8 22 | type NeuralNetworkType/CBOW 23 | layer-size 5 24 | use-negative-samples 25 25 | downsampling-rate 1e-5 26 | num-iterations 100 27 | num-threads (.availableProcessors (Runtime/getRuntime))} 28 | }] 29 | (let [bldr (doto (Word2VecModel/trainer ) 30 | (.setMinVocabFrequency min-vocab-frequency) 31 | (.useNumThreads num-threads))] 32 | (.train bldr sentences) 33 | ))) 34 | 35 | (defn create-input-format 36 | "Takes a text file and creates the input format required 37 | for training the word2vec model" 38 | [inpfile] 39 | (let [f (io/file (io/resource inpfile))] 40 | (if (not (.exists f)) 41 | (throw (IllegalStateException. (str "Please download " inpfile " and place it in the resources folder "))) 42 | (let [data (Common/readToList f)] 43 | (->> data 44 | ;split the string and return a seq instead of Array of strings 45 | (map #(seq (.split #" " % ))) 46 | ;remove empty strings 47 | (map #(remove empty? %)) 48 | ;remove collections that are empty 49 | (remove empty?) 50 | ))))) 51 | 52 | (defn get-matches 53 | "Given a trained word2vec model and a search word, 54 | it returns 10 (default) words using a distance metric, 55 | which is Euclidean distance in this case" 56 | ([model word] (get-matches model word 10)) 57 | ([model word num-matches] 58 | (let [matches (-> (.forSearch model) 59 | (.getMatches word (inc num-matches)))] 60 | (->> matches 61 | (mapv #(.match %)) 62 | (remove #{word} ) 63 | (take num-matches))))) 64 | 65 | (defn get-relations 66 | "Given a trained word2vec model, and a relationship between word1 and word2, 67 | find the closest relationship to word3. 68 | For example, if Paris is to France, then Berlin is to ? 69 | -Germany would a probable answer. 70 | This function returns the top 5 probable answers" 71 | [model word1 word2 word3] 72 | (let [inp-words #{word1 word2 word3} 73 | matches (-> (.forSearch model) 74 | (.similarity word1 word2) 75 | (.getMatches word3 8))] 76 | (->> matches 77 | (mapv #(.match %)) 78 | (remove inp-words) 79 | (take 5)))) 80 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | org.bridgei2i 4 | word2vec 5 | jar 6 | 0.1.0 7 | word2vec 8 | A Clojure wrapper for the Medallia word2vecJava implementation 9 | https://github.com/Bridgei2i/clojure-word2vec 10 | 11 | 12 | Eclipse Public License 13 | http://www.eclipse.org/legal/epl-v10.html 14 | 15 | 16 | 17 | scm:git:git://github.com/Bridgei2i/clojure-word2vec.git 18 | scm:git:ssh://git@github.com/Bridgei2i/clojure-word2vec.git 19 | b16344b1541ae3bb84be8037884eee095e35e08e 20 | 21 | https://github.com/Bridgei2i/clojure-word2vec 22 | 23 | 24 | src 25 | test 26 | 27 | 28 | resources 29 | 30 | 31 | 32 | 33 | dev-resources 34 | 35 | 36 | resources 37 | 38 | 39 | target 40 | target/classes 41 | 42 | 43 | 44 | 45 | central 46 | https://repo1.maven.org/maven2/ 47 | 48 | false 49 | 50 | 51 | true 52 | 53 | 54 | 55 | clojars 56 | https://clojars.org/repo/ 57 | 58 | true 59 | 60 | 61 | true 62 | 63 | 64 | 65 | 66 | 67 | org.clojure 68 | clojure 69 | 1.6.0 70 | 71 | 72 | incanter 73 | incanter 74 | 1.5.5 75 | 76 | 77 | com.medallia.word2vec 78 | Word2VecJava 79 | 0.9.0 80 | 81 | 82 | org.clojure 83 | tools.nrepl 84 | 0.2.6 85 | 86 | 87 | org.clojure 88 | clojure 89 | 90 | 91 | test 92 | 93 | 94 | clojure-complete 95 | clojure-complete 96 | 0.2.3 97 | 98 | 99 | org.clojure 100 | clojure 101 | 102 | 103 | test 104 | 105 | 106 | 107 | 108 | 112 | -------------------------------------------------------------------------------- /doc/intro.md: -------------------------------------------------------------------------------- 1 | 2 | # Introduction to clojure-word2vec 3 | 4 | Problem statement: 5 | When we want to classify or cluster data, the first step is to create a representation of data, usually called the Feature Vector. Datasets consisting of images or audio files have feature vectors that are already in numeric form. If we have text data, we have to convert words /characters into numbers. 6 | 7 | For a number of years, the Bag of words approach was used to create a Feature Vector. This approach required the use of a dictionary which contains all the words used in the dataset. 8 | 9 | Assume that we have a dictionary consisting of the words {"the", "sleepy","happy","cat","dog"}. If we encounter 2 sentences :"the sleepy cat" and "the happy dog", we replace the words with the index in the dictionary. Thus "the sleepy cat" becomes "0,1,3", and "the happy dog" translates to "0,2,5" . 10 | 11 | However, the bag of words representation does not place similar words together (in a vector space model). 12 | Word2Vec is a tool developed by Mikolov et al, improves on this by learning high-dimension representation in a vector space model, which places similar words together. 13 | 14 | 15 | We'll use a dataset (consists of forum postings on Apple products) to try out the capabilities of word2vec. 16 | We read the Apple dataset and train a word2vec model on the data. This is a modified version 17 | of the Apple dataset, which can be downloaded [here] (http://times.cs.uiuc.edu/~wang296/Data/). 18 | 19 | ```clojure 20 | (def appvec 21 | (-> (read-filtered-dataset "resources/apple-data.txt.gz") word2vec)) 22 | ``` 23 | 24 | Let's view the top 20 words in the vocabulary 25 | 26 | ```clojure 27 | (take 20 (.getVocab appvec)) 28 | ``` 29 | 30 | ``` 31 | ("ipod" "drive" "problem" "computer" "itune" "apple" "disc" 32 | "nano" "song" "thank" "mac" "os" "screen" "card" "time" 33 | "system" "music" "g5" "display" "file") 34 | ``` 35 | 36 | The total number of words in the vocabulary 37 | 38 | ```clojure 39 | (.getVocabSize (.toThrift appvec)) 40 | ``` 41 | 6813 42 | 43 | 44 | Given that word2vec transforms a word into a high dimension vector, we can 45 | compute the closeness of 2 words by comparing the corresponding word vectors 46 | using a distance metric like 47 | [cosine similarity](http://en.wikipedia.org/wiki/Cosine_similarity) 48 | 49 | To calculate the distance between 2 words, *g5* and *ipod*, 50 | we fetch the raw word vectors and compute distance using 51 | the [Incanter](http://Incanter.org) cosine-similarity API. 52 | 53 | ```clojure 54 | (defn cosine-sim 55 | [model word1 word2] 56 | (let [rawvecfn #(.getRawVector (.forSearch model) %) 57 | 58 | (cosine-sim appvec "g5" "ipod") 59 | 60 | ``` 61 | The resulting score is a value between 0 and 1, where a high score indicates 62 | that the words lie close to each other in the vector space model. 63 | ``` 64 | 0.9988300697889931 65 | ``` 66 | 67 | ## Some examples of the relations API: 68 | In the original paper, the example offered was: 69 | "if Paris is related to France, Berlin is related to ?" 70 | and the query would find Germany as the answer. 71 | In the Apple dataset (as with any other dataset), 72 | the relationship found are usually noisy. Lets look at some 73 | good answers 74 | 75 | If Nano is a 'kind of' ipod, (The Ipod Nano was a bestselling model of the Ipod line) 76 | then [g3](http://en.wikipedia.org/wiki/Power_Macintosh_G3_%28Blue_%26_White%29) is a 77 | 78 | ```clojure 79 | (get-relations appvec "nano" "ipod" "g3") 80 | ``` 81 | ("mac" "imac" "ibook" "installation" "system") 82 | 83 | If ghz is a measure of speed, then gb is a measure of 84 | 85 | ```clojure 86 | (get-relations appvec "ghz" "speed" "gb") 87 | ``` 88 | ("data" "hd" "backup" "size" "cache") 89 | 90 | We'd expect *memory* to be the right answer, but *data*, 91 | the 1th item on the list, is a reasonable approximation. 92 | 93 | If 300gb is the measure of a drive, then 2ghz is a 94 | 95 | ```clojure 96 | (get-relations appvec "300gb" "drive" "2ghz") 97 | ``` 98 | ("imac" "system" "processor" "upgrade" "model") 99 | measure of a processor (3nd item) 100 | 101 | Airport is a product line for wifi basestations, and extreme is 102 | one of the products in that line, (a wifi base station made by apple) 103 | what's a model in the ipod line? 104 | 105 | ```clojure 106 | (get-relations appvec "airport" "extreme" "ipod") 107 | ``` 108 | ("nano" "content" "library" "music" "shuffle") 109 | The *nano* (1nd item) 110 | 111 | --- 112 | There's a lot of noise in the answers however. 113 | When we query for a [G5](http://en.wikipedia.org/wiki/Power_Mac_G5) 114 | instead (a desktop computer) 115 | 116 | ```clojure 117 | (get-relations appvec "nano" "ipod" "g5") 118 | ``` 119 | ("speed" "raid" "quad" "model" "performance") 120 | 121 | we don't find a desktop in the top 5 answers. 122 | 123 | --- 124 | 125 | We can use the *get-matches* API to return the words 126 | that are closest (by euclidean distance) to the queried word. 127 | Here are a few examples 128 | 129 | Radeon is a video card 130 | ```clojure 131 | (get-matches appvec "radeon") 132 | ``` 133 | ("card" "ati" "dual" "g5" "agp" "nvidia" "ghz" "graphic" "pcie" "model") 134 | The top few answers suggest that it is a card, and the manufacturer is ATI. 135 | 136 | ```clojure 137 | (get-matches appvec "seagate") 138 | ``` 139 | ("maxtor" "gb" "raid" "drive" "quad" "speed" "raptor" "performance" "digital" "enclosure") 140 | 141 | ```clojure 142 | (get-matches appvec "nano") 143 | ``` 144 | ("ipod" "gen" "track" "music" "shuffle" "content" "itune" "library" "play" "ipods") 145 | 146 | 147 | 148 | --- 149 | ## Conclusion 150 | 151 | * Word2vec is an excellent tool to find co-occurances of words in a corpus. Depending on the kind of data, it may be possible to determine relationships as well. 152 | * We used the Apple dataset as it has content that was annotated with Part of Speech tags (such as nouns, verbs). For this exercise, we only used words that were nouns or were part of noun phrases. 153 | * Word2vec can tell us what is being discussed *about* something. We can see from the relations API that customers talking about Seagate (A hard disk manufacturer) are concerned about sizes, speed, performance and enclosures. 154 | 155 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC 2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM 3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 4 | 5 | 1. DEFINITIONS 6 | 7 | "Contribution" means: 8 | 9 | a) in the case of the initial Contributor, the initial code and 10 | documentation distributed under this Agreement, and 11 | 12 | b) in the case of each subsequent Contributor: 13 | 14 | i) changes to the Program, and 15 | 16 | ii) additions to the Program; 17 | 18 | where such changes and/or additions to the Program originate from and are 19 | distributed by that particular Contributor. A Contribution 'originates' from 20 | a Contributor if it was added to the Program by such Contributor itself or 21 | anyone acting on such Contributor's behalf. Contributions do not include 22 | additions to the Program which: (i) are separate modules of software 23 | distributed in conjunction with the Program under their own license 24 | agreement, and (ii) are not derivative works of the Program. 25 | 26 | "Contributor" means any person or entity that distributes the Program. 27 | 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are 29 | necessarily infringed by the use or sale of its Contribution alone or when 30 | combined with the Program. 31 | 32 | "Program" means the Contributions distributed in accordance with this 33 | Agreement. 34 | 35 | "Recipient" means anyone who receives the Program under this Agreement, 36 | including all Contributors. 37 | 38 | 2. GRANT OF RIGHTS 39 | 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to 42 | reproduce, prepare derivative works of, publicly display, publicly perform, 43 | distribute and sublicense the Contribution of such Contributor, if any, and 44 | such derivative works, in source code and object code form. 45 | 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise 49 | transfer the Contribution of such Contributor, if any, in source code and 50 | object code form. This patent license shall apply to the combination of the 51 | Contribution and the Program if, at the time the Contribution is added by the 52 | Contributor, such addition of the Contribution causes such combination to be 53 | covered by the Licensed Patents. The patent license shall not apply to any 54 | other combinations which include the Contribution. No hardware per se is 55 | licensed hereunder. 56 | 57 | c) Recipient understands that although each Contributor grants the licenses 58 | to its Contributions set forth herein, no assurances are provided by any 59 | Contributor that the Program does not infringe the patent or other 60 | intellectual property rights of any other entity. Each Contributor disclaims 61 | any liability to Recipient for claims brought by any other entity based on 62 | infringement of intellectual property rights or otherwise. As a condition to 63 | exercising the rights and licenses granted hereunder, each Recipient hereby 64 | assumes sole responsibility to secure any other intellectual property rights 65 | needed, if any. For example, if a third party patent license is required to 66 | allow Recipient to distribute the Program, it is Recipient's responsibility 67 | to acquire that license before distributing the Program. 68 | 69 | d) Each Contributor represents that to its knowledge it has sufficient 70 | copyright rights in its Contribution, if any, to grant the copyright license 71 | set forth in this Agreement. 72 | 73 | 3. REQUIREMENTS 74 | 75 | A Contributor may choose to distribute the Program in object code form under 76 | its own license agreement, provided that: 77 | 78 | a) it complies with the terms and conditions of this Agreement; and 79 | 80 | b) its license agreement: 81 | 82 | i) effectively disclaims on behalf of all Contributors all warranties and 83 | conditions, express and implied, including warranties or conditions of title 84 | and non-infringement, and implied warranties or conditions of merchantability 85 | and fitness for a particular purpose; 86 | 87 | ii) effectively excludes on behalf of all Contributors all liability for 88 | damages, including direct, indirect, special, incidental and consequential 89 | damages, such as lost profits; 90 | 91 | iii) states that any provisions which differ from this Agreement are offered 92 | by that Contributor alone and not by any other party; and 93 | 94 | iv) states that source code for the Program is available from such 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on 96 | or through a medium customarily used for software exchange. 97 | 98 | When the Program is made available in source code form: 99 | 100 | a) it must be made available under this Agreement; and 101 | 102 | b) a copy of this Agreement must be included with each copy of the Program. 103 | 104 | Contributors may not remove or alter any copyright notices contained within 105 | the Program. 106 | 107 | Each Contributor must identify itself as the originator of its Contribution, 108 | if any, in a manner that reasonably allows subsequent Recipients to identify 109 | the originator of the Contribution. 110 | 111 | 4. COMMERCIAL DISTRIBUTION 112 | 113 | Commercial distributors of software may accept certain responsibilities with 114 | respect to end users, business partners and the like. While this license is 115 | intended to facilitate the commercial use of the Program, the Contributor who 116 | includes the Program in a commercial product offering should do so in a 117 | manner which does not create potential liability for other Contributors. 118 | Therefore, if a Contributor includes the Program in a commercial product 119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend 120 | and indemnify every other Contributor ("Indemnified Contributor") against any 121 | losses, damages and costs (collectively "Losses") arising from claims, 122 | lawsuits and other legal actions brought by a third party against the 123 | Indemnified Contributor to the extent caused by the acts or omissions of such 124 | Commercial Contributor in connection with its distribution of the Program in 125 | a commercial product offering. The obligations in this section do not apply 126 | to any claims or Losses relating to any actual or alleged intellectual 127 | property infringement. In order to qualify, an Indemnified Contributor must: 128 | a) promptly notify the Commercial Contributor in writing of such claim, and 129 | b) allow the Commercial Contributor tocontrol, and cooperate with the 130 | Commercial Contributor in, the defense and any related settlement 131 | negotiations. The Indemnified Contributor may participate in any such claim 132 | at its own expense. 133 | 134 | For example, a Contributor might include the Program in a commercial product 135 | offering, Product X. That Contributor is then a Commercial Contributor. If 136 | that Commercial Contributor then makes performance claims, or offers 137 | warranties related to Product X, those performance claims and warranties are 138 | such Commercial Contributor's responsibility alone. Under this section, the 139 | Commercial Contributor would have to defend claims against the other 140 | Contributors related to those performance claims and warranties, and if a 141 | court requires any other Contributor to pay any damages as a result, the 142 | Commercial Contributor must pay those damages. 143 | 144 | 5. NO WARRANTY 145 | 146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON 147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER 148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR 149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A 150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the 151 | appropriateness of using and distributing the Program and assumes all risks 152 | associated with its exercise of rights under this Agreement , including but 153 | not limited to the risks and costs of program errors, compliance with 154 | applicable laws, damage to or loss of data, programs or equipment, and 155 | unavailability or interruption of operations. 156 | 157 | 6. DISCLAIMER OF LIABILITY 158 | 159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY 160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, 161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION 162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY 166 | OF SUCH DAMAGES. 167 | 168 | 7. GENERAL 169 | 170 | If any provision of this Agreement is invalid or unenforceable under 171 | applicable law, it shall not affect the validity or enforceability of the 172 | remainder of the terms of this Agreement, and without further action by the 173 | parties hereto, such provision shall be reformed to the minimum extent 174 | necessary to make such provision valid and enforceable. 175 | 176 | If Recipient institutes patent litigation against any entity (including a 177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself 178 | (excluding combinations of the Program with other software or hardware) 179 | infringes such Recipient's patent(s), then such Recipient's rights granted 180 | under Section 2(b) shall terminate as of the date such litigation is filed. 181 | 182 | All Recipient's rights under this Agreement shall terminate if it fails to 183 | comply with any of the material terms or conditions of this Agreement and 184 | does not cure such failure in a reasonable period of time after becoming 185 | aware of such noncompliance. If all Recipient's rights under this Agreement 186 | terminate, Recipient agrees to cease use and distribution of the Program as 187 | soon as reasonably practicable. However, Recipient's obligations under this 188 | Agreement and any licenses granted by Recipient relating to the Program shall 189 | continue and survive. 190 | 191 | Everyone is permitted to copy and distribute copies of this Agreement, but in 192 | order to avoid inconsistency the Agreement is copyrighted and may only be 193 | modified in the following manner. The Agreement Steward reserves the right to 194 | publish new versions (including revisions) of this Agreement from time to 195 | time. No one other than the Agreement Steward has the right to modify this 196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The 197 | Eclipse Foundation may assign the responsibility to serve as the Agreement 198 | Steward to a suitable separate entity. Each new version of the Agreement will 199 | be given a distinguishing version number. The Program (including 200 | Contributions) may always be distributed subject to the version of the 201 | Agreement under which it was received. In addition, after a new version of 202 | the Agreement is published, Contributor may elect to distribute the Program 203 | (including its Contributions) under the new version. Except as expressly 204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or 205 | licenses to the intellectual property of any Contributor under this 206 | Agreement, whether expressly, by implication, estoppel or otherwise. All 207 | rights in the Program not expressly granted under this Agreement are 208 | reserved. 209 | 210 | This Agreement is governed by the laws of the State of New York and the 211 | intellectual property laws of the United States of America. No party to this 212 | Agreement will bring a legal action under this Agreement more than one year 213 | after the cause of action arose. Each party waives its rights to a jury trial 214 | in any resulting litigation. 215 | --------------------------------------------------------------------------------