├── .gitignore ├── FIXME.txt ├── LICENSE.txt ├── NOTICE.txt ├── README.textile ├── lib ├── lucene-analyzers-common-4.4.0.jar ├── lucene-core-4.4.0.jar ├── mallet-2.0.7-RC2.jar ├── pygmalion-1.1.0-SNAPSHOT.jar └── trove-2.0.4.jar ├── macros ├── nlp │ └── tfidf.pig └── similarity │ └── similarity.pig ├── pom.xml ├── scripts ├── document_clustering │ ├── README.textile │ ├── check_convergence.pig │ ├── cluster_documents.pig │ ├── clusterer.sh │ ├── sample_k_centers.pig │ └── tfidf.pig └── topic_clustering │ └── discover_topics_example.pig └── src └── main └── java └── varaha ├── text ├── StanfordTokenize.java ├── StopWords.java ├── TermVector.java ├── TermVectorCentroid.java ├── TermVectorSimilarity.java └── TokenizeText.java └── topic └── LDATopics.java /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/.gitignore -------------------------------------------------------------------------------- /FIXME.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/FIXME.txt -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/LICENSE.txt -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/NOTICE.txt -------------------------------------------------------------------------------- /README.textile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/README.textile -------------------------------------------------------------------------------- /lib/lucene-analyzers-common-4.4.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/lib/lucene-analyzers-common-4.4.0.jar -------------------------------------------------------------------------------- /lib/lucene-core-4.4.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/lib/lucene-core-4.4.0.jar -------------------------------------------------------------------------------- /lib/mallet-2.0.7-RC2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/lib/mallet-2.0.7-RC2.jar -------------------------------------------------------------------------------- /lib/pygmalion-1.1.0-SNAPSHOT.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/lib/pygmalion-1.1.0-SNAPSHOT.jar -------------------------------------------------------------------------------- /lib/trove-2.0.4.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/lib/trove-2.0.4.jar -------------------------------------------------------------------------------- /macros/nlp/tfidf.pig: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/macros/nlp/tfidf.pig -------------------------------------------------------------------------------- /macros/similarity/similarity.pig: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/macros/similarity/similarity.pig -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/pom.xml -------------------------------------------------------------------------------- /scripts/document_clustering/README.textile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/scripts/document_clustering/README.textile -------------------------------------------------------------------------------- /scripts/document_clustering/check_convergence.pig: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/scripts/document_clustering/check_convergence.pig -------------------------------------------------------------------------------- /scripts/document_clustering/cluster_documents.pig: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/scripts/document_clustering/cluster_documents.pig -------------------------------------------------------------------------------- /scripts/document_clustering/clusterer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/scripts/document_clustering/clusterer.sh -------------------------------------------------------------------------------- /scripts/document_clustering/sample_k_centers.pig: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/scripts/document_clustering/sample_k_centers.pig -------------------------------------------------------------------------------- /scripts/document_clustering/tfidf.pig: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/scripts/document_clustering/tfidf.pig -------------------------------------------------------------------------------- /scripts/topic_clustering/discover_topics_example.pig: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/scripts/topic_clustering/discover_topics_example.pig -------------------------------------------------------------------------------- /src/main/java/varaha/text/StanfordTokenize.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/src/main/java/varaha/text/StanfordTokenize.java -------------------------------------------------------------------------------- /src/main/java/varaha/text/StopWords.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/src/main/java/varaha/text/StopWords.java -------------------------------------------------------------------------------- /src/main/java/varaha/text/TermVector.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/src/main/java/varaha/text/TermVector.java -------------------------------------------------------------------------------- /src/main/java/varaha/text/TermVectorCentroid.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/src/main/java/varaha/text/TermVectorCentroid.java -------------------------------------------------------------------------------- /src/main/java/varaha/text/TermVectorSimilarity.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/src/main/java/varaha/text/TermVectorSimilarity.java -------------------------------------------------------------------------------- /src/main/java/varaha/text/TokenizeText.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/src/main/java/varaha/text/TokenizeText.java -------------------------------------------------------------------------------- /src/main/java/varaha/topic/LDATopics.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alienrobotwizard/varaha/HEAD/src/main/java/varaha/topic/LDATopics.java --------------------------------------------------------------------------------